Dateianhang 'rstuff4sol.r'
Herunterladen 1 #########################################################
2 ###################### Recap ###########################
3 #########################################################
4
5 require(dplyr)
6 load("../week3/20151013result.rdata")
7
8 subframe <- select(result, measurement, first_pulse, subject)
9
10 subframe <- filter(result, response_time < 5000) %>%
11 select(measurement, first_pulse, subject)
12
13
14 sumframe <- group_by(result, subject) %>%
15 summarise(right.perc = sum(accuracy == 1)/n(),
16 mean.resp.time = mean(response_time, na.rm = T))
17
18 head(sumframe)
19
20
21 require(ggplot2)
22 p <- ggplot(mtcars, aes(x = wt, y = mpg))
23 p + geom_point()
24
25 #########################################################
26 ################### Exercises ###########################
27 #########################################################
28
29 ## load the data and run the following four lines to
30 ## create a new variable containing the sex of the
31 ## person in the video
32
33 load("../week3/20151013result.rdata")
34
35 require(car)
36 require(stringr)
37
38 result$video2 <- str_replace_all(result$video,"\\.avi|[0-9]", "")
39 result$video.sex <- recode(result$video2,"c('Alexandra','Anahita','Anna','Birgit','Carolin','Charlotte','Franziska','Gabi','Hannelore','Isabel','Jana','Julia','Juliane','Kathrin','Katja','Klara','Lina','Mailin','Marie','Lara','Laura','Lena','Mona','Natalie','Nina','Olga','Sabine','Sabrina','Silke','Stephanie')='female';c('Achim','Anton','Aziz','Ben','Bernhard','Marius','Markus','Matthias','Michael','Nabeel','Paul','Peter','Richard','Roland','Rolf','Stephan','Thomas','Tim','Tobias','Uwe','Christoph','Daniel','David','Felix','Gerd','Hannes','Holger','Ivko','Klaus','Leo') = 'male'")
40
41
42
43
44 ## use dplyr to summarize your data per time point
45 ## and per person:
46 ## calculate the 1. proportion of right answers and
47 ## 2. the mean response time per
48 ## person and time point
49 ## useing group_by() and summarize()
50 require(dplyr)
51 persubject <- result %>% group_by(subject, timepoint) %>%
52 summarize(perc.right = sum(accuracy == 1)/n(),
53 mean.time = mean(response_time[accuracy != - 1]))
54
55
56 ## now visualize the proportion dependent on time:
57 ## use ggplot() and geom_boxplot()
58 ## map time to x and the proportion to y using aes()
59 ## inside of ggplot()
60 require(ggplot2)
61
62 ggplot(persubject, aes(x = timepoint, y = mean.time)) +
63 geom_boxplot()
64
65 ggplot(persubject, aes(x = timepoint, y = perc.right)) +
66 geom_boxplot()
67
68
69 ## repeat the exercise, but this time group additional by
70 ## the sex of the person in the video
71
72 persubject2 <- result %>% group_by(subject, timepoint, video.sex) %>%
73 summarize(perc.right = sum(accuracy == 1)/n(),
74 mean.time = mean(response_time[accuracy != - 1]))
75
76
77 ggplot(persubject2, aes(x = timepoint, y = mean.time, colour = video.sex)) +
78 geom_boxplot()
79
80 ggplot(persubject2, aes(x = timepoint, y = perc.right, colour = video.sex)) +
81 geom_boxplot()
82
83
84 ggplot(persubject2, aes(x = timepoint, y = perc.right, fill = video.sex)) +
85 geom_boxplot()
86
87
88 ## visualize for each of the trials (1-48) the mean time
89 ## and the percentage of right answers
90 ## use facet_wrap to plot separate plots for each time point
91
92
93 pertrial <- result %>% group_by(trial, timepoint) %>%
94 summarize(perc.right = sum(accuracy == 1)/n(),
95 mean.time = mean(response_time[accuracy != - 1]))
96
97
98 ggplot(pertrial) +
99 geom_point( aes(x = trial, y = perc.right)) +
100 facet_wrap(~ timepoint, nrow = 5)
101
102
103
104 #########################################################
105 ################### Modelling ###########################
106 #########################################################
107
108
109 setwd("/media/mandy/Samsung_T1/mpicbs/2015kurs2/week4/")
110
111 require(dplyr)
112 ## Anova
113 oneway <- read.table("data/anovagarden.txt",header = T)
114 names(oneway) <- c("ozone","garden")
115
116 oneway
117
118 t(oneway %>% group_by(garden) %>% summarise(mean=mean(ozone)))
119
120 mean(oneway$ozone)
121 sum((oneway$ozone-mean(oneway$ozone))**2)
122
123 (oneway$ozone-mean(oneway$ozone))**2
124
125
126 ### Anova Grafiken
127 plot(oneway$ozone,pch=19)
128 abline(h=mean(oneway$ozone))
129 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
130
131 plot(oneway$ozone,pch=19,ylim=c(4,12))
132 abline(h=mean(oneway$ozone))
133 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
134 text(x=1:14,y=4,labels=(oneway$ozone-mean(oneway$ozone))**2,cex = 1.8)
135
136 oneway <- oneway %>% group_by(garden) %>% mutate(mean=mean(ozone))
137
138 par("mar")
139 par(mar=c(5,4,0,2)+0.1)
140 plot(oneway$ozone,pch=21,bg=oneway$garden)
141 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
142 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
143
144
145 plot(oneway$ozone,pch=21,bg=oneway$garden,ylim = c(4,12))
146 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
147 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
148 text(x=1:14,y=4,labels=(oneway$ozone-oneway$mean)**2,cex = 1.8)
149
150 mm <- aov(ozone ~ garden, data=oneway)
151
152 plot(seq(0.1,20,by=0.1),
153 df(seq(0.1,20,by=0.1),1,12),
154 type="l",
155 xlab = "F",
156 ylab = "density")
157 x <- seq(15.75,20,by=0.1)
158 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="firebrick2")
159 x <- seq(0.01,15.75,by=0.1)
160 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="darkgreen")
161
162 abline(v=15.75)
163 x
164 ## anova syntax R
165 mm <- lm(ozone ~ garden, data=oneway)
166 anova(mm)
167
168 m2 <- aov(ozone ~ garden, data=oneway)
169 summary(m2)
170
171 ########################################################################
172 ######################## Exer Anova #################################
173 ########################################################################
174
175
176 ## install and load the granovaGG package (a package for visualization
177 ## of ANOVAs)
178 require(granovaGG)
179
180 ## load the arousal data frame and use the stack()} command to bring
181 ## the data in the long form.
182 data(arousal)
183 datalong <- stack(arousal)
184 datalong$A <- grepl("\\.A",datalong$ind)
185 datalong$B <- grepl("\\.B",datalong$ind)
186
187 ## Do a anova analysis. Is there a difference at least 2 of the gddroups?
188 m1 <- aov(values ~ ind, data = datalong)
189 m1 <- aov(values ~ A * B, data = datalong)
190 summary(m1)
191
192 ## If indicated do a post-hoc test.
193 TukeyHSD(m1)
194 plot(TukeyHSD(m1))
195
196 ## Visualize your results
197 ggplot(datalong,aes(x=ind,y=values)) + geom_boxplot()
198
199 granovagg.1w(datalong$values,group = datalong$ind)
200
201 granovagg.1w(arousal)
202
203
204 #########################################################
205 ###################### lin regression ##################
206 #########################################################
207
208
209 load("data/births.rdata")
210
211
212 # metric response...
213 m.1 <- lm(bweight ~ gestwks, data=births)
214 coef(m.1)
215
216 summary(m.1)
217
218 ## exercise
219
220 ## create a scatter plot using ggplot the independent
221 ## variable on the x-axis and the dependent variable on
222 ## the y-axis
223
224 ## ggplot knows lm
225
226 ggplot(births,aes(x = gestwks, y = bweight)) +
227 geom_point() +
228 geom_smooth(method = "lm", se = F, colour = "red") +
229 geom_smooth( se = F, size = 3, linetype = 4)
230
231
232
233 ggplot(births,aes(x = gestwks, y = bweight)) +
234 geom_point() +
235 geom_smooth(method = "lm")
236
237 ggplot(births,aes(x = gestwks, y = bweight, colour = hyp)) +
238 geom_point() +
239 geom_smooth(method = "lm", formula = y ~ poly(x,2))
240
241
242
243
244 # extractor funcions
245 summary(m.1)
246
247 coef(m.1)
248 confint(m.1)
249
250 coef(summary(m.1))
251
252 m <- m.1
253 ## other useful functions
254
255 ## illustrate meaning of fitted and residuals
256 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m), residuals=resid(m.1))
257
258 m.2 <- lm(bweight ~ gestwks, data=births, na.action = na.exclude)
259 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m.2), residuals=resid(m.2))
260
261 ggplot(tmpdf, aes( x = gestwks, y = bweight)) +
262 geom_point() +
263 geom_line(aes(y = fitted))
264
265
266
267 ## illustrate meaning of predict
268 ga <- 10:60
269 pred.birth.weight <- predict(m.1,newdata=data.frame(gestwks=ga))
270 tmpdf <- data.frame(gestwks=ga,bweight=pred.birth.weight)
271
272 ## illustrate meaning of deviance
273 m0 <- lm(bweight~1,data=births)
274 deviance(m0) ## total sum of sqares
275 deviance(m.1) ## error sum of squares of our model
276
277 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
278
279 btmp <- births[complete.cases(births[,c("bweight","gestwks")]),]
280 m0 <- lm(bweight~1,data=btmp) ## repeat with this model (missing values excluded) you get the appropriate r-squared
281
282 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
283
284 ## ANOVA
285 ## slide explanatory v is factor
286 m.aov <- lm(bweight ~ hyp, data=births)
287 coef(m.aov) ## gives a mean and a difference of means
288
289 by(births$bweight,births$hyp,mean) ## gives the two means
290
291 m.aov2 <- lm(bweight ~ -1 + hyp, data=births) ## gives also the two means
292 coef(m.aov2)
293
294
295 summary(m.aov)
296
297 ## What is the appropriate plot to visualize the effect of hyp?
298
299 ggplot(births, aes(x = hyp, y = bweight)) +
300 geom_boxplot() +
301 stat_summary(fun.data = "mean_cl_boot")
302
303
304 ## What is the most common test to test these effect?
305 t.test(bweight ~ hyp, data = births)
306
307
308
309 ## model with both gestwks and hyp
310 m.3 <- lm(bweight ~ hyp + gestwks, data=births)
311 summary(m.3)
312
313 coef(m.3)
314 confint(m.3)
315
316 m.4 <- lm(bweight ~ hyp + gestwks - 1, data=births)
317 summary(m.4)
318
319 coef(m.4)
320 confint(m.4)
321
322
323 ## interaction models, scaled
324 m.5 <- lm(bweight ~ hyp * gestwks, data=births)
325
326
327 births$gwsc <- births$gestwks-40
328 m.6 <- lm(bweight ~ hyp * gwsc, data=births)
329
330 summary(m)
331
332 ## aov
333 m <- lm(bweight ~ gestwks, data=births)
334 anova(m)
335
336 sum(anova(m)$Sum)
337
338 anova(m)$Sum[1]/sum(anova(m)$Sum)
339
340 summary(m)
341 summary(m)$r.squared
342
343
344
345
346
347 ##################### additional stuff ###########################
348 ### jackknife
349
350 leverage <- function(x){1/length(x)+(x-mean(x))**2/sum((x-mean(x))**2)}
351 jtmp <- numeric()
352 for(i in 1:nrow(births)){
353 m <- lm(bweight ~ hyp + gestwks, data=births[-i,])
354 jtmp[i] <- summary(m)$r.squared
355 }
356
357 m1a <- lm(bweight ~ hyp + gestwks, data=births[-1,])
358
359
360 png("model1.png",width=1000,height=600)
361 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
362 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
363 abline(coef(m)[c(1,3)],lwd=2)
364 abline(coef(m)[1]+coef(m)[2],coef(m)[3],col="red",lwd=2)
365 text(x=30,y=1600,"A",cex=2)
366 text(x=32,y=1600,"B",col="red",cex=2)
367 text(36.5,2300,"hyp=hyper",col="red",cex=2)
368 text(34,2600,"hyp=normal",cex=2)
369 dev.off()
370
371 ## slide a multivariable model
372 m <- lm(bweight ~ hyp + gestwks, data=births)
373 summary(m)
374
375 ## slide interaction between
376 m <- lm(bweight ~ hyp * gestwks, data=births)
377
378 png("model2.png",width=1000,height=800)
379 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
380 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
381 abline(coef(m)[c(1,3)],lwd=2)
382 abline(coef(m)[1]+coef(m)[2],coef(m)[3]+coef(m)[4],col="red",lwd=2)
383 text(x=30,y=1680,"A",cex=2)
384 text(x=32,y=1500,"B",col="red",cex=2)
385 text(36.5,2300,"hyp=hyper",col="red",cex=2)
386 text(34,2600,"hyp=normal",cex=2)
387 dev.off()
388
389
390
391 #########################################################
392 #################### Exercises #########################
393 #########################################################
394
395 ## gambling
396
397 ## Fit a regression model with the expenditure on gambling
398 ## as the response and the sex, status, income
399 ## and verbal score as predictors. Present the output.
400
401 require(faraway)
402
403 m.tg <- lm(gamble ~ sex + status + income + verbal, data = teengamb)
404
405 summary(m.tg)
406
407 ## What percentage of variation in the response is explained
408 ## by these predictors?
409
410 sum.m.tg <- summary(m.tg)
411 sum.m.tg$r.squared
412
413 ## Which observation has the largest (positive) residual?
414 ## Give the case number.
415
416 which.max(residuals(m.tg))
417
418 residuals(m.tg)
419
420 ## Compute the correlation of the residuals with the fitted values.
421 ## use cor() or cor.test()
422 cor(residuals(m.tg),fitted(m.tg))
423 cor.test(residuals(m.tg),fitted(m.tg))
424
425 ## Compute the correlation of the residuals with the income.
426 cor(residuals(m.tg),teengamb$income)
427 cor.test(residuals(m.tg),teengamb$income)
428
429 ## For all other predictors held constant, what would be the
430 ## difference in predicted expenditure on gambling for a male
431 ## compared to a female?
432
433 summary(m.tg)
434
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!