Dateianhang 'rstuff3ex.r'
Herunterladen 1 #################################################################
2 ###################### Recap Exercises ##########################
3 #################################################################
4
5 ## use dir() with the pattern argument to get a list of all file
6 ## names containing the number 25497; store the list in a variable
7 ## named files or something similar
8
9
10
11 ## use lapply() on this list to read in the respective files;
12 ## again store the result in a appropriately names variable
13 ## (remember: we had to set the header and skip arguments)
14
15
16
17
18 ## on the resulting list run simultaneously dim() to get the number
19 ## of rows and columns (again using lapply()): How many rows
20 ## and columns we have in these data frames
21
22
23
24 ## now use names() as function to get the column names of each
25 ## of the data frame
26
27
28
29 ####################################################
30 ##################### reduce #######################
31 ####################################################
32
33 tmp <- lapply(data.list,names)
34 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
35
36 lapply(data.list,ncol)
37
38 res <- Reduce(rbind,data.list)
39
40
41 res <- Reduce(function(x,y){
42 names(y) <- names(x)
43 rbind(x,y)
44 }, data.list)
45
46
47 res <- Reduce(function(x,y){
48 names(x) <- names(y) <- paste0("col",1:ncol(x))
49 rbind(x,y)
50 }, data.list)
51
52
53 ## read Lines
54 require(lubridate)
55 files <- dir("../logfiles", full.names = T)
56 filename <- files[1]
57
58 xx <- readLines(filename)
59 head(xx)
60
61 d1 <- read.table(text = xx[4:length(xx)],
62 sep = "\t",header=T)
63 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
64
65 tmp <- lapply(files,function(filename){
66 xx <- readLines(filename)
67 d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
68 names(d1)[3] <- "trial"
69 d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
70 d1$timepoint <- as.character(d2$timepoint.)
71 d1$datetime <- mdy_hms(d2$date.)
72 return(d1)
73 })
74
75 system.time(result <- Reduce(rbind,tmp))
76
77 ## add the subject and the feedback duration to the resulting data frame
78 ## save data frame result using the following syntax (change path and
79 ## name to your needs)
80
81
82
83
84
85
86
87 save(result, file = "../20151013result.rdata")
88
89 ## load the fake subject data (fakepersdat.rdata)
90 ## using load(), merge theme to the result data frame
91
92
93
94
95 ####################################################
96 ###################### dplyr ######################
97 ####################################################
98
99 require(dplyr)
100 ## filter
101 prob22029 <- filter(result, subject == "22029_39")
102 table(prob22029$subject)
103
104 prob22029 <- filter(result, subject == "22029_39",
105 accuracy == 1)
106 table(prob22029$subject,prob22029$accuracy)
107
108
109 prob22029 <- filter(result, subject == "22029_39",
110 accuracy == 1) %>% droplevels
111 table(prob22029$subject,prob22029$accuracy)
112
113 prob22029 <- filter(result, subject == "22029_39"|accuracy == 1)
114 table(prob22029$subject,prob22029$accuracy)
115
116
117 fast <- filter(result, response_time < 5000)
118 summary(fast$response_time)
119
120 ## select
121 subframe <- select(result, measurement, first_pulse, subject)
122 nrow(subframe)
123
124 subframe <- filter(result, response_time < 5000) %>%
125 select(measurement, first_pulse, subject)
126 nrow(subframe)
127
128 ## arrange
129 head(select(arr.frame,name,measurement,timepoint,response_time))
130 ## head(arr.frame[,c("name","measurement","timepoint","response_time")])
131
132 arr.frame <- arrange(result, response_time)
133
134 head(select(arr.frame,name,measurement,timepoint,response_time))
135
136 arr.frame <- arrange(result, response_time) %>%
137 filter(response_time > 0)
138 head(select(arr.frame,name,measurement,timepoint,response_time))
139
140
141 ## mutate/summariz(s)e
142 require(stringr)
143 subframe <- filter(result,subject == "22074_39") %>%
144 mutate(video2 = str_replace(video, "\\.avi", ""),
145 video3 = str_replace(video2, "[0-9]", ""),
146 concern_time = concern_time_ended - concern_time_started )
147
148 table(subframe$subject)
149
150 summary(subframe$concern_time)
151 table(subframe$video3)
152
153 ## summariz(s)e
154 sumframe <- summarise(result,
155 right.perc = sum(accuracy == 1)/n(),
156 mean.resp.time = mean(response_time, na.rm = T))
157
158
159 ## group_by and summarise
160 sumframe <- group_by(result, subject) %>%
161 summarise(right.perc = sum(accuracy == 1)/n(),
162 mean.resp.time = mean(response_time, na.rm = T))
163
164 head(sumframe)
165
166
167 sumframe <- group_by(result, subject, timepoint) %>%
168 summarise(right.perc = sum(accuracy == 1)/n(),
169 mean.resp.time = mean(response_time, na.rm = T))
170
171 head(sumframe)
172
173
174 sumframe <- group_by(result, subject, timepoint) %>%
175 summarise(right.perc = sum(accuracy == 1)/n(),
176 mean.resp.time = mean(response_time, na.rm = T)) %>%
177 arrange(right.perc,desc(mean.resp.time))
178 head(sumframe)
179
180
181 sumframe <- group_by(result, subject, timepoint) %>%
182 summarise(right.perc = sum(accuracy == 1)/n(),
183 mean.resp.time = mean(response_time, na.rm = T)) %>%
184 ungroup() %>%
185 arrange(right.perc,desc(mean.resp.time))
186 head(sumframe)
187
188
189 ###########################################################################
190 ######################### dplyr exercises ################################
191 ###########################################################################
192 require(dplyr)
193 ## use select() and filter() in combination (%>%) to select
194 ## all rows belonging to the T0 or the T3 test, keep subject,
195 ## timepoint, accuracy, response_button, response_time,
196 ## affect_time_ended and concern_pos_started column.
197 ## Create a new data frame with a appropriate name.
198
199
200
201
202
203 ## add three new variables containing the counts of each of the possible
204 ## values of accuracy. Use mutate() and something like sum(accuracy==1).
205
206
207
208
209
210
211 ## use group_by() and summarise() to extract the minimum and maximum
212 ## response_time per person from the original data frame
213
214
215
216
217
218
219 ## look at the resulting data frame. What is the problem and the
220 ## possible reason. And what is the solution
221
222
223
224
225
226
227
228 ## repeat the last exercise, but now group per person and time point
229
230
231
232
233
234
235
236
237
238
239 ## reshaping
240 require(reshape2)
241 sum.df2.wide <- dcast(sum.df2,
242 subject ~ timepoint,
243 value.var = c("min.resp.time"))
244
245
246 sum.df2 <- as.data.frame(sum.df2)
247 sum.df2.wide <- reshape(sum.df2,
248 timevar = "timepoint",
249 idvar = "subject",
250 direction = "wide")
251
252
253 ###########################################################################
254 ######################### graphics #######################################
255 ###########################################################################
256
257 ## traditional graphics
258
259 x <- rnorm(40)
260 y <- factor(sample(c("yes","now"),40,replace=T))
261 par(mfrow=c(2,2))
262 boxplot(x~y)
263 boxplot(x~y,boxwex=0.5)
264 z <- sample(1:5)
265 barplot(z)
266 barplot(z, horiz=T)
267
268
269
270 require(ggplot2)
271 ## create a new object
272 po <- ggplot()
273 summary(po)
274
275 ## show structure of the object
276 str(po)
277
278 ## example data
279 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
280 l1 <- LETTERS[1:10]
281 a <- 10; b <- (0:-9)/10:1
282 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
283
284 ## create a new ggplot object containing the data
285 po <- ggplot(ex,aes(x=x1,y=y1))
286 summary(po)
287
288 ## scatter plot
289 p1 <- po + geom_point()
290
291
292 ## second example data frame
293 ex2 <- data.frame(x1=sample(1:20),
294 y1=sample(1:10),
295 l=letters[1:20])
296 head(ex2,10)
297
298 ## replace data in po
299 pn <- p1 %+% ex2
300 pn + geom_line()
301
302
303 ## add a text layer
304 my.text <- geom_text(aes(label=l),
305 hjust=1.1,
306 vjust=-0.2)
307 pn + geom_path() + my.text
308
309
310 ### add lines
311 ## one line
312 p1 + geom_abline(intercept=10,slope=-1,
313 colour=rgb(.5,.5,.9))
314 ## two lines
315 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
316 colour=rgb(.5,.5,.9))
317 ## more lines -> takes only the first intercept
318 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
319 colour=rgb(.5,.5,.9))
320
321 p1 +
322 geom_abline(aes(slope=b,intercept=a,colour=x1)) +
323 scale_x_continuous(limits=c(0,10))
324
325 p1 + geom_hline(yintercept=1:10)
326 p1 + geom_hline(yintercept=1:10) +
327 geom_vline(xintercept=1:10)
328
329
330
331 ###########################################################################
332 ################## Exercises ggplot2 ######################################
333 ###########################################################################
334 require(ggplot2)
335 load("../20151013sumdf.rdata")
336
337 ## use the last summary data frame from the dplyr exercise (with
338 ## subject and timepoint as grouping variables) to create a scatter
339 ## plot (geom_point()) colouring the points different per timepoint
340
341
342
343
344
345
346 require(ggplot2)
347 head(result)
348
349
350
351
352
353 ## Map the category variable to the x-axis and the
354 ## affect_pos_rated to the y-axis, use geom_boxplot()
355
356
357
358
359
360
361
362 ## use facet\_wrap as above to facet the plot per timepoint
363
364
365
366
367
368
369
370 ## create a scatter plot with affect\_pos\_started on the x-axis
371 ## and affect_pos_rated on the y-axis
372
373
374
375
376 ## add a geom_smooth()
377
378
379
380
381
382
383 ## add a colour aesthetics
384
385
386
387
388
389
390
391 ###################################################################
392
393
394 ggplot(result, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
395 geom_point(stat = "summary", fun.y = "mean")
396
397
398 require(dplyr)
399
400 res.l <- split(result,result$subject)
401
402 res.pl <- lapply(res.l, function(x) {
403 ggplot(x, aes(x = timepoint, y = as.numeric((accuracy == 1)))) +
404 geom_point(stat = "summary", fun.y = "mean") +
405 ggtitle(x$subject[1])
406 })
407
408 pdf("all.pdf")
409 lapply(res.pl, print)
410 dev.off()
411
412 require(gridExtra)
413 ggsave("arrange2x2.pdf", do.call(marrangeGrob, c(res.pl, list(nrow=2, ncol=2))))
414
415
416 result$accuracy <- factor(result$accuracy,
417 levels = 0:1,
418 labels = c("wrong","right"))
419
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!