attachment:rstuff2ex.r von RstatisTik/RstatisTikPortal/RcourSeSep

   1 ##########################################################
   2 ###################### Exercises Indexing ################
   3 ##########################################################
   4 
   5 ########## part A
   6 
   7 ## run the following commands!
   8 ## try to understand whats going on in each case!
   9 x <- c(2, 7, 0, 9, 10, 23, 11, 4, 7, 8, 6, 0)
  10 x[4]
  11 x[3:5]
  12 x[c(1, 5, 8)]
  13 x[x > 10]
  14 x[(1:6) * 2]
  15 all(x > 0)
  16 x[x == 0] <- 1
  17 x
  18 
  19 all(x > 0)
  20 ifelse(round(x/2) == x/2, "even", "odd")
  21 
  22 sum(x > 4)
  23 
  24 ## Display every third element in x
  25 
  26 
  27 
  28 ## Display elements that are less than 10,
  29 ## but greater than 4
  30 
  31 
  32 
  33 ## Modify the vector x, replacing by 10 all values
  34 ## that are greater than 10
  35 
  36 
  37 
  38 ## Modify the vector x, multiplying by 2 all
  39 ## elements that are smaller than 5
  40 
  41 
  42 
  43 ## Create a new vector y with elements 0,1,0,1, . . .
  44 ## (12 elements) and a vector z that equals x when y=0
  45 ## and 3x when y=1.
  46 ## (You can do it using ifelse, but there are other possibilities)
  47 
  48 
  49 help(pima) ## or ?pima
  50 names(pima)
  51 head(pima)
  52 
  53 pima[1, "glucose"]
  54 
  55 pima[1, 2]
  56 
  57 pima[2, "diastolic"]
  58 
  59 pima[1:10, "bmi"]
  60 
  61 pima[, "bmi"]
  62 
  63 pima[1, -2]
  64 
  65 ## Display the data on the variable age for row 7 in the
  66 ## pima data frame.
  67 
  68 
  69 ## Display all data in row 7.
  70 
  71 
  72 ## Display the first 10 rows of the data on the variable triceps
  73 
  74 
  75 
  76 ####################################################
  77 ################### read in data ###################
  78 ####################################################
  79 
  80 read.table("data/fishercats.txt", sep=" ",header=T)
  81 
  82 winer <- read.table( 
  83     "http://socserv.socsci.mcmaster.ca/jfox/Courses/R/ICPSR/Winer.txt",
  84     header=T)
  85 
  86 
  87 xx <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv",
  88                  header = T, skip = 3)
  89 
  90 
  91 ## you have to copy some part of a excel file (does not work
  92 ## on every platform and/or software 
  93 mydata <- read.delim("clipboard",na.strings=".")
  94 
  95 
  96 library(XLConnect)
  97 my.wb <- loadWorkbook("data/Duncan.xls")
  98 sheets <- getSheets(my.wb)
  99 content <- readWorksheet(my.wb, sheet=1)
 100 head(content)
 101 
 102 require(readxl)
 103 x <- read_excel("data/Duncan.xls")
 104 head(x)
 105 
 106 ## exercises
 107 ## read the file T0_22029_39_20130503_946-empatom.csv using
 108 ## read.table() with the suitable arguments (omit the first
 109 ## three lines, use the third line as names for the columns);
 110 ## do not forget to assign the data to a variable
 111 
 112 
 113 ## how many rows
 114 
 115 
 116 ## how many columns
 117 
 118 
 119 ## how many answers were right and how many wrong
 120 ## accuracy == 1; accuracy == 0 resp)
 121 
 122 
 123 
 124 ####################################################
 125 ####################### dir() ######################
 126 ####################################################
 127 
 128 dir()
 129 
 130 dir("../logfiles")
 131 
 132 dir("..",recursive = T)
 133 
 134 dir("../logfiles", full.names = T)
 135 
 136 
 137 dir("../logfiles", pattern = "2013")
 138 dir("../logfiles", pattern = "^T2")
 139 dir("../logfiles", pattern = "25502|11944")
 140 
 141 
 142 ####################################################
 143 ####################### apply ######################
 144 ####################################################
 145 
 146 lapply(mtcars, mean)
 147 sapply(mtcars,mean)
 148 
 149 x <- 1:12
 150 dim(x)<-c(2,2,3)
 151 apply(x,3,quantile)
 152 
 153 
 154 tapply(mtcars$mpg,mtcars$cyl,mean)
 155 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean)
 156 
 157 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean,na.rm = T)
 158 table(mtcars$cyl,mtcars$vs)
 159 
 160 ## Exercises
 161 
 162 ## from the data frame read in above (from the subject 22029, T0)
 163 ## calculate the mean response time response_time for each of
 164 ## the categories of accuracy using tapply()
 165 
 166 
 167 
 168 ## find also the min, max, and median#
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 ## use lapply() or sapply() on the data frame in
 177 ## combination with class()
 178 ## to get the column types of the data frame
 179 
 180 
 181 
 182 
 183 
 184 
 185 ## dir and apply
 186 
 187 ## now use dir() to get all files from id 22111,
 188 ## set the full.names argument to true; store them
 189 ## in a vector called files
 190 files <- dir("../logfiles", pattern = "22111", full.names = T)
 191 
 192 ## then use \texttt{lapply()} in combination with \texttt{read.table()}
 193 data.list <- lapply(files,read.table,header = T, skip = 3)
 194 
 195 ## repeat but now reading in addition the files
 196 ## from id 25507, 25508, and 25509
 197 
 198 
 199 ## what is the length of the resulting list? how to interpret the length?
 200 
 201 
 202 
 203 ####################################################
 204 ##################### reduce #######################
 205 ####################################################
 206 
 207 tmp <- lapply(data.list,names)
 208 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
 209 
 210 lapply(data.list,ncol)
 211 
 212 res <- Reduce(rbind,data.list)
 213 
 214 
 215 res <- Reduce(function(x,y){
 216     names(y) <- names(x)
 217     rbind(x,y)
 218 }, data.list)
 219 
 220 
 221 res <- Reduce(function(x,y){
 222     names(x) <- names(y) <- paste0("col",1:ncol(x))
 223     rbind(x,y)
 224 }, data.list)
 225 
 226 
 227 ## read Lines
 228 require(lubridate)
 229 files <- dir("../logfiles", full.names = T)
 230 filename <- files[1]
 231 
 232 xx <- readLines(filename)
 233 head(xx)
 234 
 235 d1 <- read.table(text = xx[4:length(xx)],
 236                  sep = "\t",header=T)
 237 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
 238 
 239 tmp <- lapply(files,function(filename){
 240        xx <- readLines(filename)
 241        d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
 242        names(d1)[3] <- "trial"
 243        d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
 244        d1$subject <- as.character(d2$subject.)
 245        d1$timepoint <- as.character(d2$timepoint.)
 246        d1$datetime <- mdy_hms(d2$date.)
 247        d1$no.trials <- d2$no_trials
 248        return(d1)
 249    })
 250 
 251 system.time(result <- Reduce(rbind,tmp))
 252 
 253 
 254 ####################################################
 255 ###################### dplyr  ######################
 256 ####################################################
 257 
 258 require(dplyr)
 259 ## filter
 260 prob22029 <- filter(result, subject == "22029_39")
 261 table(prob22029$subject)
 262 
 263 prob22029 <- filter(result, subject == "22029_39",
 264                     accuracy == 1)
 265 table(prob22029$subject,prob22029$accuracy)
 266 
 267 
 268 fast <- filter(result, response_time < 5000)
 269 summary(fast$response_time)
 270 
 271 ## select
 272 subframe <- select(result, measurement, first_pulse, subject)
 273 nrow(subframe)
 274 
 275 subframe <- filter(result, response_time < 5000) %>%
 276     select(measurement, first_pulse, subject)
 277 nrow(subframe)
 278 
 279 ## arrange
 280 head(result[,c("name","measurement","timepoint","response_time")])
 281 arr.frame <- arrange(result, response_time)
 282 
 283 head(arr.frame[,c("name","measurement","timepoint","response_time")])
 284 
 285 arr.frame <- arrange(result, response_time) %>%
 286     filter(response_time > 0)
 287 head(arr.frame[,c("name","measurement","timepoint","response_time")])
 288 
 289 ## mutate/summariz(s)e
 290 require(stringr)
 291 subframe <- filter(result,subject == "22074_39") %>%
 292     mutate(video2 = str_replace(video, "\\.avi", ""),
 293            video3 = str_replace(video2, "[0-9]", ""),
 294            concern_time = concern_time_ended - concern_time_started )
 295 
 296 table(subframe$subject)
 297 
 298 summary(subframe$concern_time)
 299 table(subframe$video3)
 300 
 301 ## summariz(s)e
 302 sumframe <- summarise(result,
 303                       right.perc = sum(accuracy == 1)/n(),
 304                       mean.resp.time = mean(response_time, na.rm = T))
 305 
 306 
 307 ## group_by and summarise
 308 sumframe <- group_by(result, subject) %>%
 309     summarise(right.perc = sum(accuracy == 1)/n(),
 310               mean.resp.time = mean(response_time, na.rm = T))
 311 
 312 head(sumframe)
 313 
 314 
 315 sumframe <- group_by(result, subject, timepoint) %>%
 316     summarise(right.perc = sum(accuracy == 1)/n(),
 317               mean.resp.time = mean(response_time, na.rm = T))
 318 
 319 head(sumframe)
 320 
 321 
 322 sumframe <- group_by(result, subject, timepoint) %>%
 323     summarise(right.perc = sum(accuracy == 1)/n(),
 324               mean.resp.time = mean(response_time, na.rm = T)) %>%
 325     arrange(right.perc,desc(mean.resp.time))
 326 head(sumframe)
 327 
 328 
 329 sumframe <- group_by(result, subject, timepoint) %>%
 330     summarise(right.perc = sum(accuracy == 1)/n(),
 331               mean.resp.time = mean(response_time, na.rm = T)) %>%
 332     ungroup() %>% 
 333     arrange(right.perc,desc(mean.resp.time))
 334 head(sumframe)
 335 
 336 
 337 ## exercises
 338 
 339 ## use select() and filter() in combination (%>%) to
 340 ## select all rows belonging to the T0 or the T3 test,
 341 ## keep subject, timepoint, accuracy, response_button,
 342 ## response_time,  affect_time_ended and concern_pos_started
 343 ## column. Create a new data frame with a appropriate name.
 344 
 345 
 346 
 347 
 348 ## add two new variables containing the counts of each of
 349 ## the possible values of accuracy. Use mutate()
 350 ## and sum(accuracy==1).
 351 
 352 
 353 
 354 ## use group\_by() and summarise() to extract the minimum
 355 ## and maximum response_time per person from the original data frame
 356 
 357 
 358 
 359 ## repeat the last exercise, but now group per person and time point
 360 
 361 ###########################################################################
 362 #########################  graphics #######################################
 363 ###########################################################################
 364 require(ggplot2)
 365 ## create a new object
 366 po <- ggplot()
 367 summary(po)
 368 
 369 ## show structure of the object
 370 str(po)
 371 
 372 ## example data
 373 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
 374 l1 <- LETTERS[1:10]
 375 a <- 10; b <- (0:-9)/10:1
 376 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
 377 
 378 ## create a new ggplot object containing the data
 379 po <- ggplot(ex,aes(x=x1,y=y1))
 380 summary(po)
 381 
 382 ## scatter plot
 383 p1 <- po + geom_point()
 384 
 385 
 386 ## second example data frame
 387 ex2 <- data.frame(x1=sample(1:20),
 388                   y1=sample(1:10),
 389                   l=letters[1:20])
 390 head(ex2,10)
 391 
 392 ## replace data in po
 393 pn <- p1 %+% ex2 
 394 pn + geom_line()
 395 
 396 
 397 ## add a text layer
 398 my.text <- geom_text(aes(label=l), 
 399                          hjust=1.1, 
 400                          vjust=-0.2)
 401 pn + geom_path() + my.text
 402 
 403 
 404 ### add lines
 405 ## one line
 406 p1 + geom_abline(intercept=10,slope=-1,
 407                 colour=rgb(.5,.5,.9))
 408 ## two lines
 409 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
 410                 colour=rgb(.5,.5,.9))
 411 ## more lines -> takes only the first intercept
 412 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
 413                 colour=rgb(.5,.5,.9))
 414 
 415 p1 +
 416   geom_abline(aes(slope=b,intercept=a,colour=x1)) + 
 417   scale_x_continuous(limits=c(0,10))
 418 
 419 p1 + geom_hline(yintercept=1:10)
 420 p1 + geom_hline(yintercept=1:10) + 
 421     geom_vline(xintercept=1:10)
 422
Quick Links

Search Wiki

Page Tools

Dateianhang 'rstuff2ex.r'

Gespeicherte Dateianhänge