Dateianhang 'rstuff2ex.r'
Herunterladen 1 ##########################################################
2 ###################### Exercises Indexing ################
3 ##########################################################
4
5 ########## part A
6
7 ## run the following commands!
8 ## try to understand whats going on in each case!
9 x <- c(2, 7, 0, 9, 10, 23, 11, 4, 7, 8, 6, 0)
10 x[4]
11 x[3:5]
12 x[c(1, 5, 8)]
13 x[x > 10]
14 x[(1:6) * 2]
15 all(x > 0)
16 x[x == 0] <- 1
17 x
18
19 all(x > 0)
20 ifelse(round(x/2) == x/2, "even", "odd")
21
22 sum(x > 4)
23
24 ## Display every third element in x
25
26
27
28 ## Display elements that are less than 10,
29 ## but greater than 4
30
31
32
33 ## Modify the vector x, replacing by 10 all values
34 ## that are greater than 10
35
36
37
38 ## Modify the vector x, multiplying by 2 all
39 ## elements that are smaller than 5
40
41
42
43 ## Create a new vector y with elements 0,1,0,1, . . .
44 ## (12 elements) and a vector z that equals x when y=0
45 ## and 3x when y=1.
46 ## (You can do it using ifelse, but there are other possibilities)
47
48
49 help(pima) ## or ?pima
50 names(pima)
51 head(pima)
52
53 pima[1, "glucose"]
54
55 pima[1, 2]
56
57 pima[2, "diastolic"]
58
59 pima[1:10, "bmi"]
60
61 pima[, "bmi"]
62
63 pima[1, -2]
64
65 ## Display the data on the variable age for row 7 in the
66 ## pima data frame.
67
68
69 ## Display all data in row 7.
70
71
72 ## Display the first 10 rows of the data on the variable triceps
73
74
75
76 ####################################################
77 ################### read in data ###################
78 ####################################################
79
80 read.table("data/fishercats.txt", sep=" ",header=T)
81
82 winer <- read.table(
83 "http://socserv.socsci.mcmaster.ca/jfox/Courses/R/ICPSR/Winer.txt",
84 header=T)
85
86
87 xx <- read.table("../logfiles/T0_22029_39_20130503_946-empatom.csv",
88 header = T, skip = 3)
89
90
91 ## you have to copy some part of a excel file (does not work
92 ## on every platform and/or software
93 mydata <- read.delim("clipboard",na.strings=".")
94
95
96 library(XLConnect)
97 my.wb <- loadWorkbook("data/Duncan.xls")
98 sheets <- getSheets(my.wb)
99 content <- readWorksheet(my.wb, sheet=1)
100 head(content)
101
102 require(readxl)
103 x <- read_excel("data/Duncan.xls")
104 head(x)
105
106 ## exercises
107 ## read the file T0_22029_39_20130503_946-empatom.csv using
108 ## read.table() with the suitable arguments (omit the first
109 ## three lines, use the third line as names for the columns);
110 ## do not forget to assign the data to a variable
111
112
113 ## how many rows
114
115
116 ## how many columns
117
118
119 ## how many answers were right and how many wrong
120 ## accuracy == 1; accuracy == 0 resp)
121
122
123
124 ####################################################
125 ####################### dir() ######################
126 ####################################################
127
128 dir()
129
130 dir("../logfiles")
131
132 dir("..",recursive = T)
133
134 dir("../logfiles", full.names = T)
135
136
137 dir("../logfiles", pattern = "2013")
138 dir("../logfiles", pattern = "^T2")
139 dir("../logfiles", pattern = "25502|11944")
140
141
142 ####################################################
143 ####################### apply ######################
144 ####################################################
145
146 lapply(mtcars, mean)
147 sapply(mtcars,mean)
148
149 x <- 1:12
150 dim(x)<-c(2,2,3)
151 apply(x,3,quantile)
152
153
154 tapply(mtcars$mpg,mtcars$cyl,mean)
155 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean)
156
157 tapply(mtcars$mpg,list(mtcars$cyl,mtcars$vs),mean,na.rm = T)
158 table(mtcars$cyl,mtcars$vs)
159
160 ## Exercises
161
162 ## from the data frame read in above (from the subject 22029, T0)
163 ## calculate the mean response time response_time for each of
164 ## the categories of accuracy using tapply()
165
166
167
168 ## find also the min, max, and median#
169
170
171
172
173
174
175
176 ## use lapply() or sapply() on the data frame in
177 ## combination with class()
178 ## to get the column types of the data frame
179
180
181
182
183
184
185 ## dir and apply
186
187 ## now use dir() to get all files from id 22111,
188 ## set the full.names argument to true; store them
189 ## in a vector called files
190 files <- dir("../logfiles", pattern = "22111", full.names = T)
191
192 ## then use \texttt{lapply()} in combination with \texttt{read.table()}
193 data.list <- lapply(files,read.table,header = T, skip = 3)
194
195 ## repeat but now reading in addition the files
196 ## from id 25507, 25508, and 25509
197
198
199 ## what is the length of the resulting list? how to interpret the length?
200
201
202
203 ####################################################
204 ##################### reduce #######################
205 ####################################################
206
207 tmp <- lapply(data.list,names)
208 lapply(2:length(data.list), function(x) all(tmp[[1]] == tmp[[x]]))
209
210 lapply(data.list,ncol)
211
212 res <- Reduce(rbind,data.list)
213
214
215 res <- Reduce(function(x,y){
216 names(y) <- names(x)
217 rbind(x,y)
218 }, data.list)
219
220
221 res <- Reduce(function(x,y){
222 names(x) <- names(y) <- paste0("col",1:ncol(x))
223 rbind(x,y)
224 }, data.list)
225
226
227 ## read Lines
228 require(lubridate)
229 files <- dir("../logfiles", full.names = T)
230 filename <- files[1]
231
232 xx <- readLines(filename)
233 head(xx)
234
235 d1 <- read.table(text = xx[4:length(xx)],
236 sep = "\t",header=T)
237 d2 <- read.table(text = xx[1:2],sep = "\t",header=T)
238
239 tmp <- lapply(files,function(filename){
240 xx <- readLines(filename)
241 d1 <- read.table(text = xx[4:length(xx)],sep = "\t",fill = T,header=T)
242 names(d1)[3] <- "trial"
243 d2 <- read.table(text = xx[1:2],sep = "\t",fill = T,header=T)
244 d1$subject <- as.character(d2$subject.)
245 d1$timepoint <- as.character(d2$timepoint.)
246 d1$datetime <- mdy_hms(d2$date.)
247 d1$no.trials <- d2$no_trials
248 return(d1)
249 })
250
251 system.time(result <- Reduce(rbind,tmp))
252
253
254 ####################################################
255 ###################### dplyr ######################
256 ####################################################
257
258 require(dplyr)
259 ## filter
260 prob22029 <- filter(result, subject == "22029_39")
261 table(prob22029$subject)
262
263 prob22029 <- filter(result, subject == "22029_39",
264 accuracy == 1)
265 table(prob22029$subject,prob22029$accuracy)
266
267
268 fast <- filter(result, response_time < 5000)
269 summary(fast$response_time)
270
271 ## select
272 subframe <- select(result, measurement, first_pulse, subject)
273 nrow(subframe)
274
275 subframe <- filter(result, response_time < 5000) %>%
276 select(measurement, first_pulse, subject)
277 nrow(subframe)
278
279 ## arrange
280 head(result[,c("name","measurement","timepoint","response_time")])
281 arr.frame <- arrange(result, response_time)
282
283 head(arr.frame[,c("name","measurement","timepoint","response_time")])
284
285 arr.frame <- arrange(result, response_time) %>%
286 filter(response_time > 0)
287 head(arr.frame[,c("name","measurement","timepoint","response_time")])
288
289 ## mutate/summariz(s)e
290 require(stringr)
291 subframe <- filter(result,subject == "22074_39") %>%
292 mutate(video2 = str_replace(video, "\\.avi", ""),
293 video3 = str_replace(video2, "[0-9]", ""),
294 concern_time = concern_time_ended - concern_time_started )
295
296 table(subframe$subject)
297
298 summary(subframe$concern_time)
299 table(subframe$video3)
300
301 ## summariz(s)e
302 sumframe <- summarise(result,
303 right.perc = sum(accuracy == 1)/n(),
304 mean.resp.time = mean(response_time, na.rm = T))
305
306
307 ## group_by and summarise
308 sumframe <- group_by(result, subject) %>%
309 summarise(right.perc = sum(accuracy == 1)/n(),
310 mean.resp.time = mean(response_time, na.rm = T))
311
312 head(sumframe)
313
314
315 sumframe <- group_by(result, subject, timepoint) %>%
316 summarise(right.perc = sum(accuracy == 1)/n(),
317 mean.resp.time = mean(response_time, na.rm = T))
318
319 head(sumframe)
320
321
322 sumframe <- group_by(result, subject, timepoint) %>%
323 summarise(right.perc = sum(accuracy == 1)/n(),
324 mean.resp.time = mean(response_time, na.rm = T)) %>%
325 arrange(right.perc,desc(mean.resp.time))
326 head(sumframe)
327
328
329 sumframe <- group_by(result, subject, timepoint) %>%
330 summarise(right.perc = sum(accuracy == 1)/n(),
331 mean.resp.time = mean(response_time, na.rm = T)) %>%
332 ungroup() %>%
333 arrange(right.perc,desc(mean.resp.time))
334 head(sumframe)
335
336
337 ## exercises
338
339 ## use select() and filter() in combination (%>%) to
340 ## select all rows belonging to the T0 or the T3 test,
341 ## keep subject, timepoint, accuracy, response_button,
342 ## response_time, affect_time_ended and concern_pos_started
343 ## column. Create a new data frame with a appropriate name.
344
345
346
347
348 ## add two new variables containing the counts of each of
349 ## the possible values of accuracy. Use mutate()
350 ## and sum(accuracy==1).
351
352
353
354 ## use group\_by() and summarise() to extract the minimum
355 ## and maximum response_time per person from the original data frame
356
357
358
359 ## repeat the last exercise, but now group per person and time point
360
361 ###########################################################################
362 ######################### graphics #######################################
363 ###########################################################################
364 require(ggplot2)
365 ## create a new object
366 po <- ggplot()
367 summary(po)
368
369 ## show structure of the object
370 str(po)
371
372 ## example data
373 x1 <- 1:10; y1 <- 1:10; z1 <- 10:1
374 l1 <- LETTERS[1:10]
375 a <- 10; b <- (0:-9)/10:1
376 ex <- data.frame(x1=x1,y1=y1,z=z1,l=l1,a=a,b=b)
377
378 ## create a new ggplot object containing the data
379 po <- ggplot(ex,aes(x=x1,y=y1))
380 summary(po)
381
382 ## scatter plot
383 p1 <- po + geom_point()
384
385
386 ## second example data frame
387 ex2 <- data.frame(x1=sample(1:20),
388 y1=sample(1:10),
389 l=letters[1:20])
390 head(ex2,10)
391
392 ## replace data in po
393 pn <- p1 %+% ex2
394 pn + geom_line()
395
396
397 ## add a text layer
398 my.text <- geom_text(aes(label=l),
399 hjust=1.1,
400 vjust=-0.2)
401 pn + geom_path() + my.text
402
403
404 ### add lines
405 ## one line
406 p1 + geom_abline(intercept=10,slope=-1,
407 colour=rgb(.5,.5,.9))
408 ## two lines
409 p1 + geom_abline(intercept=c(10,1),slope=c(-1,-2),
410 colour=rgb(.5,.5,.9))
411 ## more lines -> takes only the first intercept
412 p1 + geom_abline(intercept=10:1,slope=-(10:1)/10,
413 colour=rgb(.5,.5,.9))
414
415 p1 +
416 geom_abline(aes(slope=b,intercept=a,colour=x1)) +
417 scale_x_continuous(limits=c(0,10))
418
419 p1 + geom_hline(yintercept=1:10)
420 p1 + geom_hline(yintercept=1:10) +
421 geom_vline(xintercept=1:10)
422
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!