attachment:rstuff2.r von RstatisTik/RstatisTikPortal/RcourSeSep

   1 ##########################################################
   2 ###################### Exercises Indexing ################
   3 ##########################################################
   4 
   5 ########## part A
   6 
   7 ## run the following commands!
   8 ## try to understand whats going on in each case!
   9 x <- c(2, 7, 0, 9, 10, 23, 11, 4, 7, 8, 6, 0)
  10 x[4]
  11 x[3:5]
  12 x[c(1, 5, 8)]
  13 x[x > 10]
  14 x[(1:6) * 2]
  15 
  16 all(x > 0)
  17 
  18 x == 0
  19 x[x == 0]
  20 
  21 x[x == 0] <- 1
  22 x
  23 
  24 all(x > 0)
  25 
  26 
  27 ifelse(round(x/2) == x/2, "even", "odd")
  28 
  29 sum(x > 4)
  30 
  31 ## Display every third element in x
  32 
  33 x[seq(1,12,by = 3)]
  34 x[seq(1,length(x),by = 3)]
  35 
  36 x[rep(c(F,F,T),length(x)/3)]
  37 x[c(3,6,9,12)]
  38 
  39 ## Display elements that are less than 10,
  40 ## but greater than 4
  41 
  42 x[x > 4 & x < 10]
  43 x
  44 ## Modify the vector x, replacing by 10 all values
  45 ## that are greater than 10
  46 
  47 x[x > 10] <- 10
  48 
  49 ## Modify the vector x, multiplying by 2 all
  50 ## elements that are smaller than 5
  51 
  52 x[x < 5] <- 2 * x[x < 5]
  53 
  54 ## Create a new vector y with elements 0,1,0,1, . . .
  55 ## (12 elements) and a vector z that equals x when y=0
  56 ## and 3x when y=1.
  57 ## (You can do it using ifelse, but there are other possibilities)
  58 
  59 y <- rep(0:1,6)
  60 z <- ifelse(y==0,x,3 * x)
  61 z2 <- x + y * 2 * x
  62 
  63 require(faraway)
  64 help(pima) ## or ?pima
  65 names(pima)
  66 head(pima,3)
  67 
  68 pima[1, "glucose"]
  69 
  70 pima[1, 2]
  71 
  72 pima[2, "diastolic"]
  73 
  74 pima[1:10, "bmi"]
  75 
  76 pima[, "bmi"]
  77 
  78 pima[1, -2]
  79 
  80 -which(names(pima)=="glucose")
  81 pima[1,-which(names(pima) %in% c("glucose","age"))]
  82 
  83 ## Display the data on the variable age for row 7 in the
  84 ## pima data frame.
  85 
  86 
  87 ## Display all data in row 7.
  88 
  89 
  90 ## Display the first 10 rows of the data on the variable triceps
  91 
  92 
  93 
  94 ####################################################
  95 ################### read in data ###################
  96 ####################################################
  97 
  98 read.table("data/fishercats.txt", sep=" ",header=T)
  99 
 100 winer <- read.table( 
 101     "http://socserv.socsci.mcmaster.ca/jfox/Courses/R/ICPSR/Winer.txt",
 102     header=T)
 103 
 104 
 105 xx <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv",
 106                  header = T, skip = 3)
 107 
 108 
 109 ## you have to copy some part of a excel file (does not work
 110 ## on every platform and/or software 
 111 mydata <- read.delim("clipboard",na.strings=".")
 112 
 113 
 114 library(XLConnect)
 115 my.wb <- loadWorkbook("data/Duncan.xls")
 116 sheets <- getSheets(my.wb)
 117 content <- readWorksheet(my.wb, sheet=1)
 118 head(content)
 119 
 120 require(readxl)
 121 x <- read_excel("data/Duncan.xls")
 122 head(x)
 123 
 124 ####################################################
 125 ################### read data ex ###################
 126 ####################################################
 127 
 128 pers22029 <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv", skip = 3, header = T )
 129 
 130 sum(x$accuracy == 0)
 131 sum(x$accuracy == 1)
 132 
 133 table(x$accuracy, useNA = "ifany")
 134 
 135 ####################################################
 136 ####################### dir() ######################
 137 ####################################################
 138 
 139 dir()
 140 
 141 dir("../logfiles")
 142 
 143 dir("..",recursive = T)
 144 
 145 dir("../logfiles", full.names = T)
 146 
 147 
 148 dir("../logfiles", pattern = "2013")
 149 dir("../logfiles", pattern = "^T2")
 150 dir("../logfiles", pattern = "25502|11944")
 151 
 152 ###
 153 
 154 ## files from persons 25512, 25485, 25461
 155 dir("../logfiles", pattern = "25512|25485|25461")
 156 
 157 ## file list 2014;  how many from 2014
 158 files <- dir("../logfiles", pattern = "2014")
 159 length(files)
 160 
 161 ## how many files from T3
 162 files <- dir("../logfiles", pattern = "^T3")
 163 length(files)
 164 
 165 
 166 files <- dir("../logfiles", pattern = "^T3.+22283")
 167 length(files)
 168 
 169 ####################################################
 170 ####################### apply ######################
 171 ####################################################
 172 
 173 lapply(mtcars, mean)
 174 sapply(mtcars,mean)
 175 
 176 x <- 1:12
 177 dim(x) <- c(3,4)
 178 apply(x,1,mean)
 179 apply(x,2,mean)
 180 
 181 rowMeans(x)
 182 colMeans(x)
 183 
 184 x <- 1:12
 185 dim(x)<-c(2,2,3)
 186 apply(x,3,quantile)
 187 
 188 
 189 tapply(mtcars$mpg,mtcars$cyl,mean)
 190 tapply(mtcars$mpg,rep(0:1,16),mean)
 191 
 192 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean)
 193 
 194 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean,na.rm = T)
 195 table(mtcars$cyl,mtcars$vs)
 196 
 197 ## Exercises
 198 
 199 ## from the data frame read in above (from the subject 22029, T0)
 200 ## calculate the mean response time response_time for each of
 201 ## the categories of accuracy using tapply()
 202 
 203 tapply(xx$response_time,xx$accuracy,mean)
 204 
 205 ## find also the min, max, and median#
 206 tapply(xx$response_time,xx$accuracy,min)
 207 tapply(xx$response_time,xx$accuracy,max)
 208 tapply(xx$response_time,xx$accuracy,median)
 209 
 210 tapply(xx$response_time,xx$accuracy,summary)
 211 
 212 
 213 
 214 ## use lapply() or sapply() on the data frame in
 215 ## combination with class()
 216 ## to get the column types of the data frame
 217 
 218 lapply(xx,class)
 219 
 220 table(sapply(xx,class))
 221 
 222 files <- dir("../logfiles",full.names = T)
 223 
 224 files <- dir("../logfiles", pattern = "22111", full.names = T)
 225 data.list <- lapply(files,read.table,header = T, skip = 3)
 226 
 227 head(data.list[[2]])
 228 
 229 length(data.list)
 230 
 231 files <- dir("../logfiles", pattern = "22111|25507|25508|25509", full.names = T)
 232 data.list <- lapply(files,read.table,header = T, skip = 3)
 233 
 234 length(data.list)
 235 names(data.list)
 236 
 237 
 238 ####################################################
 239 ##################### reduce #######################
 240 ####################################################
 241 
 242 tmp <- lapply(data.list,names)
 243 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
 244 
 245 lapply(data.list,ncol)
 246 
 247 res <- Reduce(rbind,data.list)
 248 
 249 
 250 res <- Reduce(function(x,y){
 251     names(y) <- names(x)
 252     rbind(x,y)
 253 }, data.list)
 254 
 255 
 256 res <- Reduce(function(x,y){
 257     names(x) <- names(y) <- paste0("col",1:ncol(x))
 258     rbind(x,y)
 259 }, data.list)
 260 
 261 
 262 ## read Lines
 263 require(lubridate)
 264 files <- dir("../logfiles", full.names = T)
 265 filename <- files[1]
 266 
 267 xx <- readLines(filename)
 268 head(xx)
 269 
 270 d1 <- read.table(text = xx[4:length(xx)],
 271                  sep = "\t",header=T)
 272 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
 273 
 274 tmp <- lapply(files,function(filename){
 275        xx <- readLines(filename)
 276        d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
 277        names(d1)[3] <- "trial"
 278        d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
 279        d1$subject <- as.character(d2$subject.)
 280        d1$timepoint <- as.character(d2$timepoint.)
 281        d1$datetime <- mdy_hms(d2$date.)
 282        d1$no.trials <- d2$no_trials
 283        return(d1)
 284    })
 285 
 286 system.time(result <- Reduce(rbind,tmp))
 287 
 288 
 289 ####################################################
 290 ###################### dplyr  ######################
 291 ####################################################
 292 
 293 require(dplyr)
 294 ## filter
 295 prob22029 <- filter(result, subject == "22029_39")
 296 table(prob22029$subject)
 297 
 298 prob22029 <- filter(result, subject == "22029_39",
 299                     accuracy == 1)
 300 table(prob22029$subject,prob22029$accuracy)
 301 
 302 
 303 fast <- filter(result, response_time < 5000)
 304 summary(fast$response_time)
 305 
 306 ## select
 307 subframe <- select(result, measurement, first_pulse, subject)
 308 nrow(subframe)
 309 
 310 subframe <- filter(result, response_time < 5000) %>%
 311     select(measurement, first_pulse, subject)
 312 nrow(subframe)
 313 
 314 ## arrange
 315 head(result[,c("name","measurement","timepoint","response_time")])
 316 arr.frame <- arrange(result, response_time)
 317 
 318 head(arr.frame[,c("name","measurement","timepoint","response_time")])
 319 
 320 arr.frame <- arrange(result, response_time) %>%
 321     filter(response_time > 0)
 322 head(arr.frame[,c("name","measurement","timepoint","response_time")])
 323 
 324 ## mutate/summariz(s)e
 325 require(stringr)
 326 subframe <- filter(result,subject == "22074_39") %>%
 327     mutate(video2 = str_replace(video, "\\.avi", ""),
 328            video3 = str_replace(video2, "[0-9]", ""),
 329            concern_time = concern_time_ended - concern_time_started )
 330 
 331 table(subframe$subject)
 332 
 333 summary(subframe$concern_time)
 334 table(subframe$video3)
 335 
 336 ## summariz(s)e
 337 sumframe <- summarise(result,
 338                       right.perc = sum(accuracy == 1)/n(),
 339                       mean.resp.time = mean(response_time, na.rm = T))
 340 
 341 
 342 ## group_by and summarise
 343 sumframe <- group_by(result, subject) %>%
 344     summarise(right.perc = sum(accuracy == 1)/n(),
 345               mean.resp.time = mean(response_time, na.rm = T))
 346 
 347 head(sumframe)
 348 
 349 
 350 sumframe <- group_by(result, subject, timepoint) %>%
 351     summarise(right.perc = sum(accuracy == 1)/n(),
 352               mean.resp.time = mean(response_time, na.rm = T))
 353 
 354 head(sumframe)
 355 
 356 
 357 sumframe <- group_by(result, subject, timepoint) %>%
 358     summarise(right.perc = sum(accuracy == 1)/n(),
 359               mean.resp.time = mean(response_time, na.rm = T)) %>%
 360     arrange(right.perc,desc(mean.resp.time))
 361 head(sumframe)
 362 
 363 
 364 sumframe <- group_by(result, subject, timepoint) %>%
 365     summarise(right.perc = sum(accuracy == 1)/n(),
 366               mean.resp.time = mean(response_time, na.rm = T)) %>%
 367     ungroup() %>% 
 368     arrange(right.perc,desc(mean.resp.time))
 369 head(sumframe)
 370 
 371 
 372 ###########################################################################
 373 #########################  graphics #######################################
 374 ###########################################################################
 375 require(ggplot2)
 376 ## create a new object
 377 po <- ggplot()
 378 summary(po)
 379 
 380 ## show structure of the object
 381 str(po)
 382 
 383 ## example data
 384 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
 385 l1 <- LETTERS[1:10]
 386 a <- 10; b <- (0:-9)/10:1
 387 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
 388 
 389 ## create a new ggplot object containing the data
 390 po <- ggplot(ex,aes(x=x1,y=y1))
 391 summary(po)
 392 
 393 ## scatter plot
 394 p1 <- po + geom_point()
 395 
 396 
 397 ## second example data frame
 398 ex2 <- data.frame(x1=sample(1:20),
 399                   y1=sample(1:10),
 400                   l=letters[1:20])
 401 head(ex2,10)
 402 
 403 ## replace data in po
 404 pn <- p1 %+% ex2 
 405 pn + geom_line()
 406 
 407 
 408 ## add a text layer
 409 my.text <- geom_text(aes(label=l), 
 410                          hjust=1.1, 
 411                          vjust=-0.2)
 412 pn + geom_path() + my.text
 413 
 414 
 415 ### add lines
 416 ## one line
 417 p1 + geom_abline(intercept=10,slope=-1,
 418                 colour=rgb(.5,.5,.9))
 419 ## two lines
 420 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
 421                 colour=rgb(.5,.5,.9))
 422 ## more lines -> takes only the first intercept
 423 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
 424                 colour=rgb(.5,.5,.9))
 425 
 426 p1 +
 427   geom_abline(aes(slope=b,intercept=a,colour=x1)) + 
 428   scale_x_continuous(limits=c(0,10))
 429 
 430 p1 + geom_hline(yintercept=1:10)
 431 p1 + geom_hline(yintercept=1:10) + 
 432     geom_vline(xintercept=1:10)
 433
Quick Links

Search Wiki

Page Tools

Dateianhang 'rstuff2.r'

Gespeicherte Dateianhänge