welcome: please sign in
location: attachment:rstuff3sol.r von RstatisTik/RstatisTikPortal/RcourSeSep

Dateianhang 'rstuff3sol.r'

Herunterladen

   1 #################################################################
   2 ###################### Recap Exercises ##########################
   3 #################################################################
   4 
   5 ## use dir() with the pattern argument to get a list of all file
   6 ## names containing the number 25497; store the list in a variable
   7 ## named files or something similar
   8 
   9 files <- dir("../logfiles/", pattern = "25497", full.names = T)
  10 
  11 ## use lapply() on this list to read in the respective files;
  12 ## again store the result in a appropriately names variable
  13 ## (remember: we had to set the header and skip arguments)
  14 
  15 data.list <- lapply(files, read.table, header = T, skip = 3)
  16 
  17 
  18 ## on the resulting list run simultaneously dim() to get the number
  19 ## of rows and columns (again using lapply()): How many rows
  20 ## and columns we have in these data frames
  21 lapply(data.list, dim)
  22 
  23 
  24 ## now use names() as function to get the column names of each
  25 ## of the data frame
  26 lapply(data.list, names)
  27 
  28 
  29 ####################################################
  30 ##################### reduce #######################
  31 ####################################################
  32 
  33 tmp <- lapply(data.list,names)
  34 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
  35 
  36 lapply(data.list,ncol)
  37 
  38 res <- Reduce(rbind,data.list)
  39 
  40 
  41 res <- Reduce(function(x,y) {merge(x,y,all=T)} , data.list, all = T)
  42 
  43 res <- Reduce(function(x,y){
  44     names(y) <- names(x)
  45     rbind(x,y)
  46 }, data.list)
  47 
  48 
  49 res <- Reduce(function(x,y){
  50     names(x) <- names(y) <- paste0("col",1:ncol(x))
  51     rbind(x,y)
  52 }, data.list)
  53 
  54 
  55 ## read Lines
  56 require(lubridate)
  57 files <- dir("../logfiles", full.names = T)
  58 filename <- files[1]
  59 
  60 xx <- readLines(filename)
  61 head(xx)
  62 
  63 d1 <- read.table(text = xx[4:length(xx)],
  64                  sep = "\t",header=T)
  65 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
  66 
  67 tmp <- lapply(files,function(filename){
  68        xx <- readLines(filename)
  69        d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
  70        names(d1)[3] <- "trial"
  71        d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
  72        d1$timepoint <- as.character(d2$timepoint.)
  73        d1$datetime <- mdy_hms(d2$date.)
  74        return(d1)
  75    })
  76 
  77 system.time(result <- Reduce(rbind,tmp))
  78 
  79 ## add the subject and the feedback duration to the resulting data frame
  80 ## save data frame result using the following syntax (change path and
  81 ## name to your needs)
  82 
  83 tmp <- lapply(files,function(filename){
  84        xx <- readLines(filename)
  85        d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
  86        names(d1)[3] <- "trial"
  87        d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
  88        d1$timepoint <- as.character(d2$timepoint.)
  89        d1$datetime <- mdy_hms(d2$date.)
  90        d1$subject <- d2$subject.
  91        d1$feedback.dur <- d2$feedback_duration
  92        return(d1)
  93    })
  94 
  95 system.time(result <- Reduce(rbind,tmp))
  96 save(result, file = "20151013result.rdata")
  97 
  98 load("20151013result.rdata")
  99 
 100 persdat <- result %>% group_by(subject) %>%
 101     summarise(
 102         aff = mean(affect_pos_rated),
 103         respt = mean(response_time[response_time != 0])
 104     )
 105 
 106 sample(c("male","female"),
 107        size = nrow(persdat),
 108        prob = result$affect_pos_rated,
 109        replace = T)
 110 
 111 persdat$sex <- apply(persdat,1, function(x){
 112     p <- as.numeric(x["aff"])
 113     sample(factor(c("male","female")),
 114            size = 1,
 115            prob = c(p,570-p) )})
 116 
 117 
 118 persdat$bd <- sample(seq(as.Date("1960-01-01"),as.Date("1995-01-01"),
 119                          by = "week"), size = nrow(persdat))
 120 
 121 persdat$aff <- NULL
 122 persdat$respt <- NULL
 123 
 124 save(persdat,file = "fakepersdat.rdata")
 125 
 126 load("fakepersdat.rdata")
 127 
 128 
 129 result <- merge(persdat,result, by = "subject")
 130 
 131 ####################################################
 132 ###################### dplyr  ######################
 133 ####################################################
 134 
 135 require(dplyr)
 136 ## filter
 137 prob22029 <- filter(result, subject == "22029_39")
 138 table(prob22029$subject)
 139 
 140 prob22029 <- filter(result, subject == "22029_39",
 141                     accuracy == 1)
 142 table(prob22029$subject,prob22029$accuracy)
 143 
 144 
 145 prob22029 <- filter(result, subject == "22029_39",
 146                     accuracy == 1) %>% droplevels
 147 table(prob22029$subject,prob22029$accuracy)
 148 
 149 prob22029 <- filter(result, subject == "22029_39"|accuracy == 1)
 150 table(prob22029$subject,prob22029$accuracy)
 151 
 152 
 153 fast <- filter(result, response_time < 5000)
 154 summary(fast$response_time)
 155 
 156 ## select
 157 subframe <- select(result, measurement, first_pulse, subject)
 158 nrow(subframe)
 159 
 160 subframe <- filter(result, response_time < 5000) %>%
 161     select(measurement, first_pulse, subject)
 162 nrow(subframe)
 163 
 164 ## arrange
 165 head(select(arr.frame,name,measurement,timepoint,response_time))
 166 ## head(arr.frame[,c("name","measurement","timepoint","response_time")])
 167 
 168 arr.frame <- arrange(result, response_time)
 169 
 170 head(select(arr.frame,name,measurement,timepoint,response_time))
 171 
 172 arr.frame <- arrange(result, response_time) %>%
 173     filter(response_time > 0)
 174 head(select(arr.frame,name,measurement,timepoint,response_time))
 175 
 176 
 177 ## mutate/summariz(s)e
 178 require(stringr)
 179 subframe <- filter(result,subject == "22074_39") %>%
 180     mutate(video2 = str_replace(video, "\\.avi", ""),
 181            video3 = str_replace(video2, "[0-9]", ""),
 182            concern_time = concern_time_ended - concern_time_started )
 183 
 184 table(subframe$subject)
 185 
 186 summary(subframe$concern_time)
 187 table(subframe$video3)
 188 
 189 ## summariz(s)e
 190 sumframe <- summarise(result,
 191                       right.perc = sum(accuracy == 1)/n(),
 192                       mean.resp.time = mean(response_time, na.rm = T))
 193 
 194 
 195 ## group_by and summarise
 196 sumframe <- group_by(result, subject) %>%
 197     summarise(right.perc = sum(accuracy == 1)/n(),
 198               mean.resp.time = mean(response_time, na.rm = T))
 199 
 200 head(sumframe)
 201 
 202 
 203 sumframe <- group_by(result, subject, timepoint) %>%
 204     summarise(right.perc = sum(accuracy == 1)/n(),
 205               mean.resp.time = mean(response_time, na.rm = T))
 206 
 207 head(sumframe)
 208 
 209 
 210 sumframe <- group_by(result, subject, timepoint) %>%
 211     summarise(right.perc = sum(accuracy == 1)/n(),
 212               mean.resp.time = mean(response_time, na.rm = T)) %>%
 213     arrange(right.perc,desc(mean.resp.time))
 214 head(sumframe)
 215 
 216 
 217 sumframe <- group_by(result, subject, timepoint) %>%
 218     summarise(right.perc = sum(accuracy == 1)/n(),
 219               mean.resp.time = mean(response_time, na.rm = T)) %>%
 220     ungroup() %>% 
 221     arrange(right.perc,desc(mean.resp.time))
 222 head(sumframe)
 223 
 224 
 225 ###########################################################################
 226 #########################  dplyr exercises ################################
 227 ###########################################################################
 228 require(dplyr)
 229 ## use select() and filter() in combination (%>%) to select
 230 ## all rows belonging to the T0 or the T3 test, keep subject,
 231 ## timepoint, accuracy, response_button, response_time,
 232 ## affect_time_ended and concern_pos_started column.
 233 ## Create a new data frame with a appropriate name.
 234 
 235 res <- result %>% filter(timepoint %in% c("T0","T3")) %>%
 236     select(subject, timepoint, accuracy, response_button, response_time,
 237            affect_time_ended, concern_pos_started)
 238 
 239 res <- filter(result, timepoint %in% c("T0","T3")) %>%
 240     select(subject, timepoint, accuracy, response_button, response_time,
 241            affect_time_ended, concern_pos_started)
 242 
 243 
 244 ## add three new variables containing the counts of each of the possible
 245 ## values of accuracy. Use mutate() and something like sum(accuracy==1).
 246 
 247 
 248 res <- res %>% mutate(
 249     acc1 = sum(accuracy==1),
 250     acc0 = sum(accuracy==0),
 251     accm1 = sum(accuracy==-1)
 252 )
 253 
 254 ## use group_by() and summarise() to extract the minimum and maximum
 255 ## response_time per person from the original data frame
 256 
 257 sum.df <- result %>% group_by(subject) %>%
 258     summarise(
 259         min.resp.time = min(response_time),
 260         max.resp.time = max(response_time)
 261     )
 262 
 263 
 264 sum.df <-  group_by(result,subject) %>%
 265     summarise(
 266         min.resp.time = min(response_time),
 267         max.resp.time = max(response_time)
 268     )
 269 
 270 ## look at the resulting data frame. What is the problem and the
 271 ## possible reason. And what is the solution 
 272 
 273 sum.df <- result %>% group_by(subject) %>%
 274     summarise(
 275         min.resp.time = min(response_time[accuracy!=-1]),
 276         max.resp.time = max(response_time[accuracy!=-1])
 277     )
 278 
 279 
 280 ## repeat the last exercise, but now group per person and time point
 281 sum.df2 <- result %>% group_by(subject,timepoint) %>%
 282     summarise(
 283         min.resp.time = min(response_time[accuracy!=-1]),
 284         max.resp.time = max(response_time[accuracy!=-1])
 285     )
 286 
 287 
 288 save(sum.df2, file = "../20151013sumdf.rdata")
 289 
 290 
 291 ## reshaping
 292 require(reshape2)
 293 sum.df2.wide <- dcast(sum.df2,
 294                       subject ~ timepoint,
 295                       value.var = c("min.resp.time"))
 296 
 297 
 298 sum.df2 <- as.data.frame(sum.df2)
 299 sum.df2.wide <- reshape(sum.df2,
 300                         timevar = "timepoint",
 301                         idvar = "subject",
 302                         direction = "wide")
 303 
 304 
 305 ###########################################################################
 306 #########################  graphics #######################################
 307 ###########################################################################
 308 
 309 ## traditional graphics
 310 
 311 x <- rnorm(40)
 312 y <- factor(sample(c("yes","now"),40,replace=T))
 313 par(mfrow=c(2,2))
 314 boxplot(x~y)
 315 boxplot(x~y,boxwex=0.5)
 316 z <- sample(1:5)
 317 barplot(z)
 318 barplot(z, horiz=T)
 319 
 320 require(ggplot2)
 321 ## create a new object
 322 po <- ggplot()
 323 summary(po)
 324 
 325 ## show structure of the object
 326 str(po)
 327 
 328 ## example data
 329 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
 330 l1 <- LETTERS[1:10]
 331 a <- 10; b <- (0:-9)/10:1
 332 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
 333 
 334 ## create a new ggplot object containing the data
 335 po <- ggplot(ex,aes(x=x1,y=y1))
 336 summary(po)
 337 
 338 ## scatter plot
 339 p1 <- po + geom_point()
 340 
 341 
 342 ## second example data frame
 343 ex2 <- data.frame(x1=sample(1:20),
 344                   y1=sample(1:10),
 345                   l=letters[1:20])
 346 head(ex2,10)
 347 
 348 ## replace data in po
 349 pn <- p1 %+% ex2 
 350 pn + geom_line()
 351 
 352 
 353 ## add a text layer
 354 my.text <- geom_text(aes(label=l), 
 355                          hjust=1.1, 
 356                          vjust=-0.2)
 357 
 358 pn  + my.text + geom_path()
 359 
 360 
 361 ### add lines
 362 ## one line
 363 p1 + geom_abline(intercept=10,slope=-1,
 364                 colour=rgb(.5,.5,.9))
 365 ## two lines
 366 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
 367                 colour=rgb(.5,.5,.9))
 368 ## more lines -> takes only the first intercept
 369 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
 370                 colour=rgb(.5,.5,.9))
 371 
 372 p1 +
 373   geom_abline(aes(slope=b,intercept=a,colour=x1)) + 
 374   scale_x_continuous(limits=c(0,10))
 375 
 376 p1 + geom_hline(yintercept=1:10)
 377 p1 + geom_hline(yintercept=1:10) + 
 378     geom_vline(xintercept=1:10)
 379 
 380 
 381 
 382 ###########################################################################
 383 ################## Exercises ggplot2 ######################################
 384 ###########################################################################
 385 require(ggplot2)
 386 load("../20151013sumdf.rdata")
 387 
 388 ## use the last summary data frame from the dplyr exercise (with
 389 ## subject and timepoint as grouping variables) to create a scatter
 390 ## plot (geom\_point()) colouring the points different per timepoint
 391 
 392 
 393 ggplot(sum.df2, aes(x = min.resp.time, y = max.resp.time, color = timepoint)) +
 394     geom_point() +
 395            facet_wrap(~timepoint,nrow = 3) + theme_stata()
 396 
 397 require(ggplot2)
 398 head(result)
 399 
 400 
 401 result$accuracy <- factor(result$accuracy,
 402                           levels = 0:1,
 403                           labels = c("wrong","right"))
 404 
 405 
 406 ## Map the category variable to the x-axis and the
 407 ## affect_pos_rated to the y-axis, use geom_boxplot()
 408 
 409 ggplot(result, aes(x = factor(response_button),
 410                    y = affect_pos_rated,
 411                    fill = factor(category))) +
 412                        geom_boxplot()
 413 
 414 
 415 ## use facet\_wrap as above to facet the plot per timepoint
 416 ggplot(result, aes(x = factor(response_button),
 417                    y = affect_pos_rated,
 418                    fill = factor(category))) +
 419                        geom_boxplot() +
 420                        facet_wrap(~timepoint)
 421 
 422 
 423 ## create a scatter plot with affect\_pos\_started on the x-axis
 424 ## and affect_pos_rated on the y-axis                           
 425 
 426 ggplot(result, aes(x =  affect_pos_started, y = affect_pos_rated)) +
 427     geom_point()
 428 
 429 
 430 ## add a geom_smooth()
 431 ggplot(result, aes(x = affect_pos_started, y = affect_pos_rated)) +
 432     geom_point() +
 433         geom_smooth()
 434 
 435 
 436 ## add a colour aesthetics
 437 ggplot(result, aes(x = affect_pos_started,
 438                    y = affect_pos_rated,
 439                    colour = accuracy)) +
 440     geom_point(alpha = 0.5) +
 441         geom_smooth() 
 442 
 443 
 444 ###################################################################
 445 
 446 
 447 ggplot(result, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
 448     geom_point(stat = "summary", fun.y = "mean")
 449 
 450 
 451 require(dplyr)
 452 
 453 res.l <- split(result,result$subject)
 454 
 455 res.pl <- lapply(res.l, function(x) {
 456     ggplot(x, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
 457         geom_point(stat = "summary", fun.y = "mean") +
 458             ggtitle(x$subject[1])
 459 })
 460 
 461 pdf("all.pdf")
 462 lapply(res.pl, print)
 463 dev.off()
 464 
 465 require(gridExtra)
 466 ggsave("arrange2x2.pdf", do.call(marrangeGrob, c(res.pl, list(nrow=2, ncol=2))))
 467 

Gespeicherte Dateianhänge

Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.
  • [laden | anzeigen] (2015-10-19 19:25:19, 6.2 KB) [[attachment:dataweek4.zip]]
  • [laden | anzeigen] (2015-10-26 10:49:21, 8.2 KB) [[attachment:dataweek5.zip]]
  • [laden | anzeigen] (2015-10-13 05:01:59, 2.9 KB) [[attachment:fakepersdat.rdata]]
  • [laden | anzeigen] (2015-11-10 07:40:26, 1.3 KB) [[attachment:plar.r]]
  • [laden | anzeigen] (2015-10-05 17:35:00, 1.1 KB) [[attachment:readdata.r]]
  • [laden | anzeigen] (2015-09-28 19:42:02, 4.9 KB) [[attachment:rstuff.r]]
  • [laden | anzeigen] (2015-10-13 10:11:57, 10.0 KB) [[attachment:rstuff2.r]]
  • [laden | anzeigen] (2015-10-05 17:37:33, 9.7 KB) [[attachment:rstuff2ex.r]]
  • [laden | anzeigen] (2015-10-13 05:37:06, 9.6 KB) [[attachment:rstuff3ex.r]]
  • [laden | anzeigen] (2015-10-13 10:11:42, 12.9 KB) [[attachment:rstuff3sol.r]]
  • [laden | anzeigen] (2015-10-19 19:25:32, 9.6 KB) [[attachment:rstuff4ex.r]]
  • [laden | anzeigen] (2015-11-03 06:37:52, 11.8 KB) [[attachment:rstuff4sol.r]]
  • [laden | anzeigen] (2015-10-27 08:07:10, 11.1 KB) [[attachment:rstuff5ex.r]]
  • [laden | anzeigen] (2015-11-03 06:34:59, 9.6 KB) [[attachment:rstuff6ex.r]]
  • [laden | anzeigen] (2015-11-03 10:59:26, 12.3 KB) [[attachment:rstuff6sol.r]]
  • [laden | anzeigen] (2015-11-10 07:40:17, 5.6 KB) [[attachment:rstuff7ex.r]]
  • [laden | anzeigen] (2015-09-28 19:41:50, 1167.2 KB) [[attachment:session1.pdf]]
  • [laden | anzeigen] (2015-10-05 17:36:54, 303.0 KB) [[attachment:session2.pdf]]
  • [laden | anzeigen] (2015-10-13 05:37:00, 285.2 KB) [[attachment:session3.pdf]]
  • [laden | anzeigen] (2015-10-19 19:26:08, 1151.7 KB) [[attachment:session4.pdf]]
  • [laden | anzeigen] (2015-10-26 10:50:14, 1867.6 KB) [[attachment:session5.pdf]]
  • [laden | anzeigen] (2015-11-03 06:35:42, 2039.1 KB) [[attachment:session6.pdf]]
  • [laden | anzeigen] (2015-11-10 07:38:04, 929.2 KB) [[attachment:session7.pdf]]
  • [laden | anzeigen] (2015-09-28 19:43:16, 10.2 KB) [[attachment:week1data.zip]]
  • [laden | anzeigen] (2015-10-05 17:36:29, 10.2 KB) [[attachment:week2data.zip]]
 Alle Dateien | Ausgewählte Dateien: löschen verschieben auf Seite copy to page

Sie dürfen keine Anhänge an diese Seite anhängen!