Dateianhang 'rstuff3sol.r'
Herunterladen 1 #################################################################
2 ###################### Recap Exercises ##########################
3 #################################################################
4
5 ## use dir() with the pattern argument to get a list of all file
6 ## names containing the number 25497; store the list in a variable
7 ## named files or something similar
8
9 files <- dir("../logfiles/", pattern = "25497", full.names = T)
10
11 ## use lapply() on this list to read in the respective files;
12 ## again store the result in a appropriately names variable
13 ## (remember: we had to set the header and skip arguments)
14
15 data.list <- lapply(files, read.table, header = T, skip = 3)
16
17
18 ## on the resulting list run simultaneously dim() to get the number
19 ## of rows and columns (again using lapply()): How many rows
20 ## and columns we have in these data frames
21 lapply(data.list, dim)
22
23
24 ## now use names() as function to get the column names of each
25 ## of the data frame
26 lapply(data.list, names)
27
28
29 ####################################################
30 ##################### reduce #######################
31 ####################################################
32
33 tmp <- lapply(data.list,names)
34 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
35
36 lapply(data.list,ncol)
37
38 res <- Reduce(rbind,data.list)
39
40
41 res <- Reduce(function(x,y) {merge(x,y,all=T)} , data.list, all = T)
42
43 res <- Reduce(function(x,y){
44 names(y) <- names(x)
45 rbind(x,y)
46 }, data.list)
47
48
49 res <- Reduce(function(x,y){
50 names(x) <- names(y) <- paste0("col",1:ncol(x))
51 rbind(x,y)
52 }, data.list)
53
54
55 ## read Lines
56 require(lubridate)
57 files <- dir("../logfiles", full.names = T)
58 filename <- files[1]
59
60 xx <- readLines(filename)
61 head(xx)
62
63 d1 <- read.table(text = xx[4:length(xx)],
64 sep = "\t",header=T)
65 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
66
67 tmp <- lapply(files,function(filename){
68 xx <- readLines(filename)
69 d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
70 names(d1)[3] <- "trial"
71 d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
72 d1$timepoint <- as.character(d2$timepoint.)
73 d1$datetime <- mdy_hms(d2$date.)
74 return(d1)
75 })
76
77 system.time(result <- Reduce(rbind,tmp))
78
79 ## add the subject and the feedback duration to the resulting data frame
80 ## save data frame result using the following syntax (change path and
81 ## name to your needs)
82
83 tmp <- lapply(files,function(filename){
84 xx <- readLines(filename)
85 d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
86 names(d1)[3] <- "trial"
87 d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
88 d1$timepoint <- as.character(d2$timepoint.)
89 d1$datetime <- mdy_hms(d2$date.)
90 d1$subject <- d2$subject.
91 d1$feedback.dur <- d2$feedback_duration
92 return(d1)
93 })
94
95 system.time(result <- Reduce(rbind,tmp))
96 save(result, file = "20151013result.rdata")
97
98 load("20151013result.rdata")
99
100 persdat <- result %>% group_by(subject) %>%
101 summarise(
102 aff = mean(affect_pos_rated),
103 respt = mean(response_time[response_time != 0])
104 )
105
106 sample(c("male","female"),
107 size = nrow(persdat),
108 prob = result$affect_pos_rated,
109 replace = T)
110
111 persdat$sex <- apply(persdat,1, function(x){
112 p <- as.numeric(x["aff"])
113 sample(factor(c("male","female")),
114 size = 1,
115 prob = c(p,570-p) )})
116
117
118 persdat$bd <- sample(seq(as.Date("1960-01-01"),as.Date("1995-01-01"),
119 by = "week"), size = nrow(persdat))
120
121 persdat$aff <- NULL
122 persdat$respt <- NULL
123
124 save(persdat,file = "fakepersdat.rdata")
125
126 load("fakepersdat.rdata")
127
128
129 result <- merge(persdat,result, by = "subject")
130
131 ####################################################
132 ###################### dplyr ######################
133 ####################################################
134
135 require(dplyr)
136 ## filter
137 prob22029 <- filter(result, subject == "22029_39")
138 table(prob22029$subject)
139
140 prob22029 <- filter(result, subject == "22029_39",
141 accuracy == 1)
142 table(prob22029$subject,prob22029$accuracy)
143
144
145 prob22029 <- filter(result, subject == "22029_39",
146 accuracy == 1) %>% droplevels
147 table(prob22029$subject,prob22029$accuracy)
148
149 prob22029 <- filter(result, subject == "22029_39"|accuracy == 1)
150 table(prob22029$subject,prob22029$accuracy)
151
152
153 fast <- filter(result, response_time < 5000)
154 summary(fast$response_time)
155
156 ## select
157 subframe <- select(result, measurement, first_pulse, subject)
158 nrow(subframe)
159
160 subframe <- filter(result, response_time < 5000) %>%
161 select(measurement, first_pulse, subject)
162 nrow(subframe)
163
164 ## arrange
165 head(select(arr.frame,name,measurement,timepoint,response_time))
166 ## head(arr.frame[,c("name","measurement","timepoint","response_time")])
167
168 arr.frame <- arrange(result, response_time)
169
170 head(select(arr.frame,name,measurement,timepoint,response_time))
171
172 arr.frame <- arrange(result, response_time) %>%
173 filter(response_time > 0)
174 head(select(arr.frame,name,measurement,timepoint,response_time))
175
176
177 ## mutate/summariz(s)e
178 require(stringr)
179 subframe <- filter(result,subject == "22074_39") %>%
180 mutate(video2 = str_replace(video, "\\.avi", ""),
181 video3 = str_replace(video2, "[0-9]", ""),
182 concern_time = concern_time_ended - concern_time_started )
183
184 table(subframe$subject)
185
186 summary(subframe$concern_time)
187 table(subframe$video3)
188
189 ## summariz(s)e
190 sumframe <- summarise(result,
191 right.perc = sum(accuracy == 1)/n(),
192 mean.resp.time = mean(response_time, na.rm = T))
193
194
195 ## group_by and summarise
196 sumframe <- group_by(result, subject) %>%
197 summarise(right.perc = sum(accuracy == 1)/n(),
198 mean.resp.time = mean(response_time, na.rm = T))
199
200 head(sumframe)
201
202
203 sumframe <- group_by(result, subject, timepoint) %>%
204 summarise(right.perc = sum(accuracy == 1)/n(),
205 mean.resp.time = mean(response_time, na.rm = T))
206
207 head(sumframe)
208
209
210 sumframe <- group_by(result, subject, timepoint) %>%
211 summarise(right.perc = sum(accuracy == 1)/n(),
212 mean.resp.time = mean(response_time, na.rm = T)) %>%
213 arrange(right.perc,desc(mean.resp.time))
214 head(sumframe)
215
216
217 sumframe <- group_by(result, subject, timepoint) %>%
218 summarise(right.perc = sum(accuracy == 1)/n(),
219 mean.resp.time = mean(response_time, na.rm = T)) %>%
220 ungroup() %>%
221 arrange(right.perc,desc(mean.resp.time))
222 head(sumframe)
223
224
225 ###########################################################################
226 ######################### dplyr exercises ################################
227 ###########################################################################
228 require(dplyr)
229 ## use select() and filter() in combination (%>%) to select
230 ## all rows belonging to the T0 or the T3 test, keep subject,
231 ## timepoint, accuracy, response_button, response_time,
232 ## affect_time_ended and concern_pos_started column.
233 ## Create a new data frame with a appropriate name.
234
235 res <- result %>% filter(timepoint %in% c("T0","T3")) %>%
236 select(subject, timepoint, accuracy, response_button, response_time,
237 affect_time_ended, concern_pos_started)
238
239 res <- filter(result, timepoint %in% c("T0","T3")) %>%
240 select(subject, timepoint, accuracy, response_button, response_time,
241 affect_time_ended, concern_pos_started)
242
243
244 ## add three new variables containing the counts of each of the possible
245 ## values of accuracy. Use mutate() and something like sum(accuracy==1).
246
247
248 res <- res %>% mutate(
249 acc1 = sum(accuracy==1),
250 acc0 = sum(accuracy==0),
251 accm1 = sum(accuracy==-1)
252 )
253
254 ## use group_by() and summarise() to extract the minimum and maximum
255 ## response_time per person from the original data frame
256
257 sum.df <- result %>% group_by(subject) %>%
258 summarise(
259 min.resp.time = min(response_time),
260 max.resp.time = max(response_time)
261 )
262
263
264 sum.df <- group_by(result,subject) %>%
265 summarise(
266 min.resp.time = min(response_time),
267 max.resp.time = max(response_time)
268 )
269
270 ## look at the resulting data frame. What is the problem and the
271 ## possible reason. And what is the solution
272
273 sum.df <- result %>% group_by(subject) %>%
274 summarise(
275 min.resp.time = min(response_time[accuracy!=-1]),
276 max.resp.time = max(response_time[accuracy!=-1])
277 )
278
279
280 ## repeat the last exercise, but now group per person and time point
281 sum.df2 <- result %>% group_by(subject,timepoint) %>%
282 summarise(
283 min.resp.time = min(response_time[accuracy!=-1]),
284 max.resp.time = max(response_time[accuracy!=-1])
285 )
286
287
288 save(sum.df2, file = "../20151013sumdf.rdata")
289
290
291 ## reshaping
292 require(reshape2)
293 sum.df2.wide <- dcast(sum.df2,
294 subject ~ timepoint,
295 value.var = c("min.resp.time"))
296
297
298 sum.df2 <- as.data.frame(sum.df2)
299 sum.df2.wide <- reshape(sum.df2,
300 timevar = "timepoint",
301 idvar = "subject",
302 direction = "wide")
303
304
305 ###########################################################################
306 ######################### graphics #######################################
307 ###########################################################################
308
309 ## traditional graphics
310
311 x <- rnorm(40)
312 y <- factor(sample(c("yes","now"),40,replace=T))
313 par(mfrow=c(2,2))
314 boxplot(x~y)
315 boxplot(x~y,boxwex=0.5)
316 z <- sample(1:5)
317 barplot(z)
318 barplot(z, horiz=T)
319
320 require(ggplot2)
321 ## create a new object
322 po <- ggplot()
323 summary(po)
324
325 ## show structure of the object
326 str(po)
327
328 ## example data
329 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
330 l1 <- LETTERS[1:10]
331 a <- 10; b <- (0:-9)/10:1
332 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
333
334 ## create a new ggplot object containing the data
335 po <- ggplot(ex,aes(x=x1,y=y1))
336 summary(po)
337
338 ## scatter plot
339 p1 <- po + geom_point()
340
341
342 ## second example data frame
343 ex2 <- data.frame(x1=sample(1:20),
344 y1=sample(1:10),
345 l=letters[1:20])
346 head(ex2,10)
347
348 ## replace data in po
349 pn <- p1 %+% ex2
350 pn + geom_line()
351
352
353 ## add a text layer
354 my.text <- geom_text(aes(label=l),
355 hjust=1.1,
356 vjust=-0.2)
357
358 pn + my.text + geom_path()
359
360
361 ### add lines
362 ## one line
363 p1 + geom_abline(intercept=10,slope=-1,
364 colour=rgb(.5,.5,.9))
365 ## two lines
366 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
367 colour=rgb(.5,.5,.9))
368 ## more lines -> takes only the first intercept
369 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
370 colour=rgb(.5,.5,.9))
371
372 p1 +
373 geom_abline(aes(slope=b,intercept=a,colour=x1)) +
374 scale_x_continuous(limits=c(0,10))
375
376 p1 + geom_hline(yintercept=1:10)
377 p1 + geom_hline(yintercept=1:10) +
378 geom_vline(xintercept=1:10)
379
380
381
382 ###########################################################################
383 ################## Exercises ggplot2 ######################################
384 ###########################################################################
385 require(ggplot2)
386 load("../20151013sumdf.rdata")
387
388 ## use the last summary data frame from the dplyr exercise (with
389 ## subject and timepoint as grouping variables) to create a scatter
390 ## plot (geom\_point()) colouring the points different per timepoint
391
392
393 ggplot(sum.df2, aes(x = min.resp.time, y = max.resp.time, color = timepoint)) +
394 geom_point() +
395 facet_wrap(~timepoint,nrow = 3) + theme_stata()
396
397 require(ggplot2)
398 head(result)
399
400
401 result$accuracy <- factor(result$accuracy,
402 levels = 0:1,
403 labels = c("wrong","right"))
404
405
406 ## Map the category variable to the x-axis and the
407 ## affect_pos_rated to the y-axis, use geom_boxplot()
408
409 ggplot(result, aes(x = factor(response_button),
410 y = affect_pos_rated,
411 fill = factor(category))) +
412 geom_boxplot()
413
414
415 ## use facet\_wrap as above to facet the plot per timepoint
416 ggplot(result, aes(x = factor(response_button),
417 y = affect_pos_rated,
418 fill = factor(category))) +
419 geom_boxplot() +
420 facet_wrap(~timepoint)
421
422
423 ## create a scatter plot with affect\_pos\_started on the x-axis
424 ## and affect_pos_rated on the y-axis
425
426 ggplot(result, aes(x = affect_pos_started, y = affect_pos_rated)) +
427 geom_point()
428
429
430 ## add a geom_smooth()
431 ggplot(result, aes(x = affect_pos_started, y = affect_pos_rated)) +
432 geom_point() +
433 geom_smooth()
434
435
436 ## add a colour aesthetics
437 ggplot(result, aes(x = affect_pos_started,
438 y = affect_pos_rated,
439 colour = accuracy)) +
440 geom_point(alpha = 0.5) +
441 geom_smooth()
442
443
444 ###################################################################
445
446
447 ggplot(result, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
448 geom_point(stat = "summary", fun.y = "mean")
449
450
451 require(dplyr)
452
453 res.l <- split(result,result$subject)
454
455 res.pl <- lapply(res.l, function(x) {
456 ggplot(x, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
457 geom_point(stat = "summary", fun.y = "mean") +
458 ggtitle(x$subject[1])
459 })
460
461 pdf("all.pdf")
462 lapply(res.pl, print)
463 dev.off()
464
465 require(gridExtra)
466 ggsave("arrange2x2.pdf", do.call(marrangeGrob, c(res.pl, list(nrow=2, ncol=2))))
467
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!