Dateianhang 'rstuff2.r'
Herunterladen 1 ##########################################################
2 ###################### Exercises Indexing ################
3 ##########################################################
4
5 ########## part A
6
7 ## run the following commands!
8 ## try to understand whats going on in each case!
9 x <- c(2, 7, 0, 9, 10, 23, 11, 4, 7, 8, 6, 0)
10 x[4]
11 x[3:5]
12 x[c(1, 5, 8)]
13 x[x > 10]
14 x[(1:6) * 2]
15
16 all(x > 0)
17
18 x == 0
19 x[x == 0]
20
21 x[x == 0] <- 1
22 x
23
24 all(x > 0)
25
26
27 ifelse(round(x/2) == x/2, "even", "odd")
28
29 sum(x > 4)
30
31 ## Display every third element in x
32
33 x[seq(1,12,by = 3)]
34 x[seq(1,length(x),by = 3)]
35
36 x[rep(c(F,F,T),length(x)/3)]
37 x[c(3,6,9,12)]
38
39 ## Display elements that are less than 10,
40 ## but greater than 4
41
42 x[x > 4 & x < 10]
43 x
44 ## Modify the vector x, replacing by 10 all values
45 ## that are greater than 10
46
47 x[x > 10] <- 10
48
49 ## Modify the vector x, multiplying by 2 all
50 ## elements that are smaller than 5
51
52 x[x < 5] <- 2 * x[x < 5]
53
54 ## Create a new vector y with elements 0,1,0,1, . . .
55 ## (12 elements) and a vector z that equals x when y=0
56 ## and 3x when y=1.
57 ## (You can do it using ifelse, but there are other possibilities)
58
59 y <- rep(0:1,6)
60 z <- ifelse(y==0,x,3 * x)
61 z2 <- x + y * 2 * x
62
63 require(faraway)
64 help(pima) ## or ?pima
65 names(pima)
66 head(pima,3)
67
68 pima[1, "glucose"]
69
70 pima[1, 2]
71
72 pima[2, "diastolic"]
73
74 pima[1:10, "bmi"]
75
76 pima[, "bmi"]
77
78 pima[1, -2]
79
80 -which(names(pima)=="glucose")
81 pima[1,-which(names(pima) %in% c("glucose","age"))]
82
83 ## Display the data on the variable age for row 7 in the
84 ## pima data frame.
85
86
87 ## Display all data in row 7.
88
89
90 ## Display the first 10 rows of the data on the variable triceps
91
92
93
94 ####################################################
95 ################### read in data ###################
96 ####################################################
97
98 read.table("data/fishercats.txt", sep=" ",header=T)
99
100 winer <- read.table(
101 "http://socserv.socsci.mcmaster.ca/jfox/Courses/R/ICPSR/Winer.txt",
102 header=T)
103
104
105 xx <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv",
106 header = T, skip = 3)
107
108
109 ## you have to copy some part of a excel file (does not work
110 ## on every platform and/or software
111 mydata <- read.delim("clipboard",na.strings=".")
112
113
114 library(XLConnect)
115 my.wb <- loadWorkbook("data/Duncan.xls")
116 sheets <- getSheets(my.wb)
117 content <- readWorksheet(my.wb, sheet=1)
118 head(content)
119
120 require(readxl)
121 x <- read_excel("data/Duncan.xls")
122 head(x)
123
124 ####################################################
125 ################### read data ex ###################
126 ####################################################
127
128 pers22029 <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv", skip = 3, header = T )
129
130 sum(x$accuracy == 0)
131 sum(x$accuracy == 1)
132
133 table(x$accuracy, useNA = "ifany")
134
135 ####################################################
136 ####################### dir() ######################
137 ####################################################
138
139 dir()
140
141 dir("../logfiles")
142
143 dir("..",recursive = T)
144
145 dir("../logfiles", full.names = T)
146
147
148 dir("../logfiles", pattern = "2013")
149 dir("../logfiles", pattern = "^T2")
150 dir("../logfiles", pattern = "25502|11944")
151
152 ###
153
154 ## files from persons 25512, 25485, 25461
155 dir("../logfiles", pattern = "25512|25485|25461")
156
157 ## file list 2014; how many from 2014
158 files <- dir("../logfiles", pattern = "2014")
159 length(files)
160
161 ## how many files from T3
162 files <- dir("../logfiles", pattern = "^T3")
163 length(files)
164
165
166 files <- dir("../logfiles", pattern = "^T3.+22283")
167 length(files)
168
169 ####################################################
170 ####################### apply ######################
171 ####################################################
172
173 lapply(mtcars, mean)
174 sapply(mtcars,mean)
175
176 x <- 1:12
177 dim(x) <- c(3,4)
178 apply(x,1,mean)
179 apply(x,2,mean)
180
181 rowMeans(x)
182 colMeans(x)
183
184 x <- 1:12
185 dim(x)<-c(2,2,3)
186 apply(x,3,quantile)
187
188
189 tapply(mtcars$mpg,mtcars$cyl,mean)
190 tapply(mtcars$mpg,rep(0:1,16),mean)
191
192 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean)
193
194 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean,na.rm = T)
195 table(mtcars$cyl,mtcars$vs)
196
197 ## Exercises
198
199 ## from the data frame read in above (from the subject 22029, T0)
200 ## calculate the mean response time response_time for each of
201 ## the categories of accuracy using tapply()
202
203 tapply(xx$response_time,xx$accuracy,mean)
204
205 ## find also the min, max, and median#
206 tapply(xx$response_time,xx$accuracy,min)
207 tapply(xx$response_time,xx$accuracy,max)
208 tapply(xx$response_time,xx$accuracy,median)
209
210 tapply(xx$response_time,xx$accuracy,summary)
211
212
213
214 ## use lapply() or sapply() on the data frame in
215 ## combination with class()
216 ## to get the column types of the data frame
217
218 lapply(xx,class)
219
220 table(sapply(xx,class))
221
222 files <- dir("../logfiles",full.names = T)
223
224 files <- dir("../logfiles", pattern = "22111", full.names = T)
225 data.list <- lapply(files,read.table,header = T, skip = 3)
226
227 head(data.list[[2]])
228
229 length(data.list)
230
231 files <- dir("../logfiles", pattern = "22111|25507|25508|25509", full.names = T)
232 data.list <- lapply(files,read.table,header = T, skip = 3)
233
234 length(data.list)
235 names(data.list)
236
237
238 ####################################################
239 ##################### reduce #######################
240 ####################################################
241
242 tmp <- lapply(data.list,names)
243 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
244
245 lapply(data.list,ncol)
246
247 res <- Reduce(rbind,data.list)
248
249
250 res <- Reduce(function(x,y){
251 names(y) <- names(x)
252 rbind(x,y)
253 }, data.list)
254
255
256 res <- Reduce(function(x,y){
257 names(x) <- names(y) <- paste0("col",1:ncol(x))
258 rbind(x,y)
259 }, data.list)
260
261
262 ## read Lines
263 require(lubridate)
264 files <- dir("../logfiles", full.names = T)
265 filename <- files[1]
266
267 xx <- readLines(filename)
268 head(xx)
269
270 d1 <- read.table(text = xx[4:length(xx)],
271 sep = "\t",header=T)
272 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
273
274 tmp <- lapply(files,function(filename){
275 xx <- readLines(filename)
276 d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
277 names(d1)[3] <- "trial"
278 d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
279 d1$subject <- as.character(d2$subject.)
280 d1$timepoint <- as.character(d2$timepoint.)
281 d1$datetime <- mdy_hms(d2$date.)
282 d1$no.trials <- d2$no_trials
283 return(d1)
284 })
285
286 system.time(result <- Reduce(rbind,tmp))
287
288
289 ####################################################
290 ###################### dplyr ######################
291 ####################################################
292
293 require(dplyr)
294 ## filter
295 prob22029 <- filter(result, subject == "22029_39")
296 table(prob22029$subject)
297
298 prob22029 <- filter(result, subject == "22029_39",
299 accuracy == 1)
300 table(prob22029$subject,prob22029$accuracy)
301
302
303 fast <- filter(result, response_time < 5000)
304 summary(fast$response_time)
305
306 ## select
307 subframe <- select(result, measurement, first_pulse, subject)
308 nrow(subframe)
309
310 subframe <- filter(result, response_time < 5000) %>%
311 select(measurement, first_pulse, subject)
312 nrow(subframe)
313
314 ## arrange
315 head(result[,c("name","measurement","timepoint","response_time")])
316 arr.frame <- arrange(result, response_time)
317
318 head(arr.frame[,c("name","measurement","timepoint","response_time")])
319
320 arr.frame <- arrange(result, response_time) %>%
321 filter(response_time > 0)
322 head(arr.frame[,c("name","measurement","timepoint","response_time")])
323
324 ## mutate/summariz(s)e
325 require(stringr)
326 subframe <- filter(result,subject == "22074_39") %>%
327 mutate(video2 = str_replace(video, "\\.avi", ""),
328 video3 = str_replace(video2, "[0-9]", ""),
329 concern_time = concern_time_ended - concern_time_started )
330
331 table(subframe$subject)
332
333 summary(subframe$concern_time)
334 table(subframe$video3)
335
336 ## summariz(s)e
337 sumframe <- summarise(result,
338 right.perc = sum(accuracy == 1)/n(),
339 mean.resp.time = mean(response_time, na.rm = T))
340
341
342 ## group_by and summarise
343 sumframe <- group_by(result, subject) %>%
344 summarise(right.perc = sum(accuracy == 1)/n(),
345 mean.resp.time = mean(response_time, na.rm = T))
346
347 head(sumframe)
348
349
350 sumframe <- group_by(result, subject, timepoint) %>%
351 summarise(right.perc = sum(accuracy == 1)/n(),
352 mean.resp.time = mean(response_time, na.rm = T))
353
354 head(sumframe)
355
356
357 sumframe <- group_by(result, subject, timepoint) %>%
358 summarise(right.perc = sum(accuracy == 1)/n(),
359 mean.resp.time = mean(response_time, na.rm = T)) %>%
360 arrange(right.perc,desc(mean.resp.time))
361 head(sumframe)
362
363
364 sumframe <- group_by(result, subject, timepoint) %>%
365 summarise(right.perc = sum(accuracy == 1)/n(),
366 mean.resp.time = mean(response_time, na.rm = T)) %>%
367 ungroup() %>%
368 arrange(right.perc,desc(mean.resp.time))
369 head(sumframe)
370
371
372 ###########################################################################
373 ######################### graphics #######################################
374 ###########################################################################
375 require(ggplot2)
376 ## create a new object
377 po <- ggplot()
378 summary(po)
379
380 ## show structure of the object
381 str(po)
382
383 ## example data
384 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
385 l1 <- LETTERS[1:10]
386 a <- 10; b <- (0:-9)/10:1
387 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
388
389 ## create a new ggplot object containing the data
390 po <- ggplot(ex,aes(x=x1,y=y1))
391 summary(po)
392
393 ## scatter plot
394 p1 <- po + geom_point()
395
396
397 ## second example data frame
398 ex2 <- data.frame(x1=sample(1:20),
399 y1=sample(1:10),
400 l=letters[1:20])
401 head(ex2,10)
402
403 ## replace data in po
404 pn <- p1 %+% ex2
405 pn + geom_line()
406
407
408 ## add a text layer
409 my.text <- geom_text(aes(label=l),
410 hjust=1.1,
411 vjust=-0.2)
412 pn + geom_path() + my.text
413
414
415 ### add lines
416 ## one line
417 p1 + geom_abline(intercept=10,slope=-1,
418 colour=rgb(.5,.5,.9))
419 ## two lines
420 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
421 colour=rgb(.5,.5,.9))
422 ## more lines -> takes only the first intercept
423 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
424 colour=rgb(.5,.5,.9))
425
426 p1 +
427 geom_abline(aes(slope=b,intercept=a,colour=x1)) +
428 scale_x_continuous(limits=c(0,10))
429
430 p1 + geom_hline(yintercept=1:10)
431 p1 + geom_hline(yintercept=1:10) +
432 geom_vline(xintercept=1:10)
433
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!