Dateianhang 'rstuff4ex.r'
Herunterladen 1 #########################################################
2 ###################### Recap ###########################
3 #########################################################
4
5 load("../week3/20151013result.rdata")
6
7 subframe <- select(result, measurement, first_pulse, subject)
8
9 subframe <- filter(result, response_time < 5000) %>%
10 select(measurement, first_pulse, subject)
11
12
13 sumframe <- group_by(result, subject) %>%
14 summarise(right.perc = sum(accuracy == 1)/n(),
15 mean.resp.time = mean(response_time, na.rm = T))
16
17 head(sumframe)
18
19 p <- ggplot(mtcars, aes(wt, mpg))
20 p + geom_point()
21
22 #########################################################
23 ################### Exercises ###########################
24 #########################################################
25
26 ## load the data and run the following four lines to
27 ## create a new variable containing the sex of the
28 ## person in the video
29
30
31
32
33 require(car)
34 require(stringr)
35
36 result$video2 <- str_replace_all(result$video,"\\.avi|[0-9]", "")
37 result$video.sex <- recode(result$video2,"c('Alexandra','Anahita','Anna','Birgit','Carolin','Charlotte','Franziska','Gabi','Hannelore','Isabel','Jana','Julia','Juliane','Kathrin','Katja','Klara','Lina','Mailin','Marie','Lara','Laura','Lena','Mona','Natalie','Nina','Olga','Sabine','Sabrina','Silke','Stephanie')='female';c('Achim','Anton','Aziz','Ben','Bernhard','Marius','Markus','Matthias','Michael','Nabeel','Paul','Peter','Richard','Roland','Rolf','Stephan','Thomas','Tim','Tobias','Uwe','Christoph','Daniel','David','Felix','Gerd','Hannes','Holger','Ivko','Klaus','Leo') = 'male'")
38
39
40
41 ## use dplyr to summarize your data per time point
42 ## and per person:
43 ## calculate the 1. proportion of right answers and
44 ## 2. the mean response time per
45 ## person and time point
46 ## useing group_by() and summarize()
47
48
49
50
51
52 ## now visualize the proportion dependent on time:
53 ## use ggplot() and geom_boxplot()
54 ## map time to x and the proportion to y using aes()
55 ## inside of ggplot()
56
57
58
59
60
61
62
63
64 ## repeat the exercise, but this time group additional by
65 ## the sex of the person in the video
66
67
68
69
70
71
72
73
74
75 ## visualize for each of the trials (1-48) the mean time
76 ## and the percentage of right answers
77 ## use facet_wrap to plot separate plots for each time point
78
79
80
81
82
83
84
85
86
87 #########################################################
88 ################### Modelling ###########################
89 #########################################################
90
91
92 setwd("/media/mandy/Samsung_T1/mpicbs/2015kurs2/week4/")
93
94 require(dplyr)
95 ## Anova
96 oneway <- read.table("data/anovagarden.txt",header = T)
97 names(oneway) <- c("ozone","garden")
98
99 oneway
100
101 t(oneway %>% group_by(garden) %>% summarise(mean=mean(ozone)))
102
103 mean(oneway$ozone)
104 sum((oneway$ozone-mean(oneway$ozone))**2)
105
106 (oneway$ozone-mean(oneway$ozone))**2
107
108
109 ### Anova Grafiken
110 plot(oneway$ozone,pch=19)
111 abline(h=mean(oneway$ozone))
112 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
113
114 plot(oneway$ozone,pch=19,ylim=c(4,12))
115 abline(h=mean(oneway$ozone))
116 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
117 text(x=1:14,y=4,labels=(oneway$ozone-mean(oneway$ozone))**2,cex = 1.8)
118
119 oneway <- oneway %>% group_by(garden) %>% mutate(mean=mean(ozone))
120
121 par("mar")
122 par(mar=c(5,4,0,2)+0.1)
123 plot(oneway$ozone,pch=21,bg=oneway$garden)
124 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
125 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
126
127
128 plot(oneway$ozone,pch=21,bg=oneway$garden,ylim = c(4,12))
129 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
130 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
131 text(x=1:14,y=4,labels=(oneway$ozone-oneway$mean)**2,cex = 1.8)
132
133 mm <- aov(ozone ~ garden, data=oneway)
134
135 plot(seq(0.1,20,by=0.1),
136 df(seq(0.1,20,by=0.1),1,12),
137 type="l",
138 xlab = "F",
139 ylab = "density")
140 x <- seq(15.75,20,by=0.1)
141 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="firebrick2")
142 x <- seq(0.01,15.75,by=0.1)
143 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="darkgreen")
144
145 abline(v=15.75)
146
147 ## anova syntax R
148 mm <- lm(ozone ~ garden, data=oneway)
149 anova(mm)
150
151 m2 <- aov(ozone ~ garden, data=oneway)
152 summary(m2)
153
154 ########################################################################
155 ######################## Exer Anova #################################
156 ########################################################################
157
158
159 ## install and load the granovaGG package (a package for visualization
160 ## of ANOVAs)
161
162
163 ## load the arousal data frame and use the stack()} command to bring
164 ## the data in the long form.
165
166
167
168
169
170 ## Do a anova analysis. Is there a difference at least 2 of the gddroups?
171
172
173
174
175
176 ## If indicated do a post-hoc test.
177
178
179 ## Visualize your results
180
181
182
183
184
185
186
187 #########################################################
188 ###################### lin regression ##################
189 #########################################################
190
191
192 load("data/births.rdata")
193
194
195 # metric response...
196 m.1 <- lm(bweight ~ gestwks, data=births)
197 coef(m.1)
198
199 summary(m.1)
200
201 ## exercise
202
203 ## create a scatter plot using ggplot the independent
204 ## variable on the x-axis and the dependent variable on
205 ## the y-axis
206
207 ## ggplot knows lm
208
209
210
211
212
213
214
215
216
217
218 # extractor funcions
219 summary(m.1)
220
221 coef(m.1)
222 confint(m.1)
223
224 coef(summary(m.1))
225
226 ## other useful functions
227
228 ## illustrate meaning of fitted and residuals
229 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m), residuals=resid(m.1))
230
231 m.2 <- lm(bweight ~ gestwks, data=births, na.action = na.exclude)
232 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m.2), residuals=resid(m.2))
233
234 ggplot(tmpdf, aes( x = gestwks, y = bweight)) +
235 geom_point() +
236 geom_line(aes(y = fitted))
237
238
239
240 ## illustrate meaning of predict
241 ga <- 10:60
242 pred.birth.weight <- predict(m.1,newdata=data.frame(gestwks=ga))
243 tmpdf <- data.frame(gestwks=ga,bweight=pred.birth.weight)
244
245 ## illustrate meaning of deviance
246 m0 <- lm(bweight~1,data=births)
247 deviance(m0) ## total sum of sqares
248 deviance(m.1) ## error sum of squares of our model
249
250 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
251
252 btmp <- births[complete.cases(births[,c("bweight","gestwks")]),]
253 m0 <- lm(bweight~1,data=btmp) ## repeat with this model (missing values excluded) you get the appropriate r-squared
254
255 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
256
257 ## ANOVA
258 ## slide explanatory v is factor
259 m.aov <- lm(bweight ~ hyp, data=births)
260 coef(m.aov) ## gives a mean and a difference of means
261
262 by(births$bweight,births$hyp,mean) ## gives the two means
263
264 m.aov2 <- lm(bweight ~ -1 + hyp, data=births) ## gives also the two means
265 coef(m.aov2)
266
267
268 summary(m.aov)
269
270 ## What is the appropriate plot to visualize the effect of hyp?
271
272 ggplot(births, aes(x = hyp, y = bweight)) +
273 geom_boxplot() +
274 stat_summary(fun.data = "mean_cl_boot")
275
276
277 ## What is the most common test to test these effect?
278 t.test(bweight ~ hyp, data = births)
279
280
281
282 ## model with both gestwks and hyp
283 m.3 <- lm(bweight ~ hyp + gestwks, data=births)
284 summary(m.3)
285
286 coef(m.3)
287 confint(m.3)
288
289 m.4 <- lm(bweight ~ hyp + gestwks - 1, data=births)
290 summary(m.4)
291
292 coef(m.4)
293 confint(m.4)
294
295
296 ## interaction models, scaled
297 m.5 <- lm(bweight ~ hyp * gestwks, data=births)
298
299
300 births$gwsc <- births$gestwks-40
301 m.6 <- lm(bweight ~ hyp * gwsc, data=births)
302
303 summary(m)
304
305 ## aov
306 m <- lm(bweight ~ gestwks, data=births)
307 anova(m)
308
309 sum(anova(m)$Sum)
310
311 anova(m)$Sum[1]/sum(anova(m)$Sum)
312
313 summary(m)
314 summary(m)$r.squared
315
316 #########################################################
317 #################### Exercises #########################
318 #########################################################
319
320 ## gambling
321
322 ## Fit a regression model with the expenditure on gambling
323 ## as the response and the sex, status, income
324 ## and verbal score as predictors. Present the output.
325
326 require(faraway)
327
328
329
330
331
332 ## What percentage of variation in the response is explained
333 ## by these predictors?
334
335
336
337
338 ## Which observation has the largest (positive) residual?
339 ## Give the case number.
340
341
342
343
344
345 ## Compute the correlation of the residuals with the fitted values.
346 ## use cor() or cor.test()
347
348
349
350 ## Compute the correlation of the residuals with the income.
351
352
353
354 ## For all other predictors held constant, what would be the
355 ## difference in predicted expenditure on gambling for a male
356 ## compared to a female?
357
358
359
360
361
362
363 ##################### additional stuff ###########################
364 ### jackknife
365
366 leverage <- function(x){1/length(x)+(x-mean(x))**2/sum((x-mean(x))**2)}
367 jtmp <- numeric()
368 for(i in 1:nrow(births)){
369 m <- lm(bweight ~ hyp + gestwks, data=births[-i,])
370 jtmp[i] <- summary(m)$r.squared
371 }
372
373 m1a <- lm(bweight ~ hyp + gestwks, data=births[-1,])
374
375
376 png("model1.png",width=1000,height=600)
377 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
378 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
379 abline(coef(m)[c(1,3)],lwd=2)
380 abline(coef(m)[1]+coef(m)[2],coef(m)[3],col="red",lwd=2)
381 text(x=30,y=1600,"A",cex=2)
382 text(x=32,y=1600,"B",col="red",cex=2)
383 text(36.5,2300,"hyp=hyper",col="red",cex=2)
384 text(34,2600,"hyp=normal",cex=2)
385 dev.off()
386
387 ## slide a multivariable model
388 m <- lm(bweight ~ hyp + gestwks, data=births)
389 summary(m)
390
391 ## slide interaction between
392 m <- lm(bweight ~ hyp * gestwks, data=births)
393
394 png("model2.png",width=1000,height=800)
395 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
396 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
397 abline(coef(m)[c(1,3)],lwd=2)
398 abline(coef(m)[1]+coef(m)[2],coef(m)[3]+coef(m)[4],col="red",lwd=2)
399 text(x=30,y=1680,"A",cex=2)
400 text(x=32,y=1500,"B",col="red",cex=2)
401 text(36.5,2300,"hyp=hyper",col="red",cex=2)
402 text(34,2600,"hyp=normal",cex=2)
403 dev.off()
404
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!