welcome: please sign in
location: attachment:rstuff4sol.r von RstatisTik/RstatisTikPortal/RcourSeSep

Dateianhang 'rstuff4sol.r'

Herunterladen

   1  #########################################################
   2 ###################### Recap  ###########################
   3 #########################################################
   4 
   5 require(dplyr)
   6 load("../week3/20151013result.rdata")
   7 
   8 subframe <- select(result, measurement, first_pulse, subject)
   9 
  10 subframe <- filter(result, response_time < 5000) %>%
  11   select(measurement, first_pulse, subject)
  12 
  13 
  14 sumframe <- group_by(result, subject) %>%
  15   summarise(right.perc = sum(accuracy == 1)/n(),
  16             mean.resp.time = mean(response_time, na.rm = T))
  17 
  18 head(sumframe)
  19 
  20 
  21 require(ggplot2)
  22 p <- ggplot(mtcars, aes(x = wt, y = mpg))
  23 p + geom_point()
  24 
  25 #########################################################
  26 ################### Exercises ###########################
  27 #########################################################
  28 
  29 ## load the data and run the following four lines to
  30 ## create a new variable containing the sex of the
  31 ## person in the video
  32 
  33 load("../week3/20151013result.rdata")
  34 
  35 require(car)
  36 require(stringr)
  37 
  38 result$video2 <- str_replace_all(result$video,"\\.avi|[0-9]", "")
  39 result$video.sex <- recode(result$video2,"c('Alexandra','Anahita','Anna','Birgit','Carolin','Charlotte','Franziska','Gabi','Hannelore','Isabel','Jana','Julia','Juliane','Kathrin','Katja','Klara','Lina','Mailin','Marie','Lara','Laura','Lena','Mona','Natalie','Nina','Olga','Sabine','Sabrina','Silke','Stephanie')='female';c('Achim','Anton','Aziz','Ben','Bernhard','Marius','Markus','Matthias','Michael','Nabeel','Paul','Peter','Richard','Roland','Rolf','Stephan','Thomas','Tim','Tobias','Uwe','Christoph','Daniel','David','Felix','Gerd','Hannes','Holger','Ivko','Klaus','Leo') = 'male'")
  40 
  41 
  42 
  43 
  44 ## use dplyr to summarize your data per time point
  45 ## and per person:
  46 ## calculate the 1. proportion of right answers and
  47 ## 2. the mean response time per
  48 ## person and time point
  49 ## useing group_by() and summarize()
  50 require(dplyr)
  51 persubject <- result %>% group_by(subject, timepoint) %>%
  52   summarize(perc.right = sum(accuracy == 1)/n(),
  53             mean.time = mean(response_time[accuracy != - 1]))
  54 
  55 
  56 ## now visualize the proportion dependent on time:
  57 ## use ggplot() and geom_boxplot()
  58 ## map time to x and the proportion to y using aes()
  59 ## inside of ggplot()
  60 require(ggplot2)
  61 
  62 ggplot(persubject,  aes(x = timepoint, y = mean.time)) +
  63   geom_boxplot()
  64 
  65 ggplot(persubject,  aes(x = timepoint, y = perc.right)) +
  66   geom_boxplot()
  67 
  68 
  69 ## repeat the exercise,  but this time group additional by
  70 ## the sex of the person in the video
  71 
  72 persubject2 <- result %>% group_by(subject, timepoint, video.sex) %>%
  73   summarize(perc.right = sum(accuracy == 1)/n(),
  74             mean.time = mean(response_time[accuracy != - 1]))
  75 
  76 
  77 ggplot(persubject2,  aes(x = timepoint, y = mean.time, colour =  video.sex)) +
  78   geom_boxplot()
  79 
  80 ggplot(persubject2,  aes(x = timepoint, y = perc.right, colour =  video.sex)) +
  81   geom_boxplot()
  82 
  83 
  84 ggplot(persubject2,  aes(x = timepoint, y = perc.right, fill =  video.sex)) +
  85   geom_boxplot()
  86 
  87 
  88 ## visualize for each of the trials (1-48) the mean time
  89 ## and the percentage of right answers
  90 ## use facet_wrap to plot separate plots for each time point
  91 
  92 
  93 pertrial <- result %>% group_by(trial, timepoint) %>%
  94   summarize(perc.right = sum(accuracy == 1)/n(),
  95             mean.time = mean(response_time[accuracy != - 1]))
  96 
  97 
  98 ggplot(pertrial) +
  99   geom_point( aes(x = trial, y = perc.right)) +
 100   facet_wrap(~ timepoint, nrow = 5)
 101 
 102 
 103 
 104 #########################################################
 105 ################### Modelling ###########################
 106 #########################################################
 107 
 108 
 109 setwd("/media/mandy/Samsung_T1/mpicbs/2015kurs2/week4/")
 110 
 111 require(dplyr)
 112 ## Anova
 113 oneway <- read.table("data/anovagarden.txt",header = T)
 114 names(oneway) <- c("ozone","garden")
 115 
 116 oneway
 117 
 118 t(oneway %>% group_by(garden) %>% summarise(mean=mean(ozone)))
 119 
 120 mean(oneway$ozone)
 121 sum((oneway$ozone-mean(oneway$ozone))**2)
 122 
 123 (oneway$ozone-mean(oneway$ozone))**2
 124 
 125 
 126 ### Anova Grafiken
 127 plot(oneway$ozone,pch=19)
 128 abline(h=mean(oneway$ozone))
 129 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
 130 
 131 plot(oneway$ozone,pch=19,ylim=c(4,12))
 132 abline(h=mean(oneway$ozone))
 133 for(i in 1:14){lines(c(i,i),c(mean(oneway$ozone),oneway$ozone[i]))}
 134 text(x=1:14,y=4,labels=(oneway$ozone-mean(oneway$ozone))**2,cex = 1.8)
 135 
 136 oneway <- oneway %>% group_by(garden) %>% mutate(mean=mean(ozone))
 137 
 138 par("mar")
 139 par(mar=c(5,4,0,2)+0.1)
 140 plot(oneway$ozone,pch=21,bg=oneway$garden)
 141 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
 142 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
 143 
 144 
 145 plot(oneway$ozone,pch=21,bg=oneway$garden,ylim = c(4,12))
 146 abline(h=tapply(oneway$ozone,oneway$garden,mean),col=c(1,2))
 147 for(i in 1:14){lines(c(i,i),c(oneway$mean[i],oneway$ozone[i]),col=oneway$garden[i])}
 148 text(x=1:14,y=4,labels=(oneway$ozone-oneway$mean)**2,cex = 1.8)
 149 
 150 mm <- aov(ozone ~ garden, data=oneway)
 151 
 152 plot(seq(0.1,20,by=0.1),
 153      df(seq(0.1,20,by=0.1),1,12),
 154      type="l",
 155      xlab = "F",
 156      ylab = "density")
 157 x <- seq(15.75,20,by=0.1)
 158 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="firebrick2")
 159 x <- seq(0.01,15.75,by=0.1)
 160 polygon(c(x,rev(x)),c(df(x,1,12),rep(0,length(x))),col="darkgreen")
 161 
 162 abline(v=15.75)
 163 x
 164 ## anova syntax R
 165 mm <- lm(ozone ~ garden, data=oneway)
 166 anova(mm)
 167 
 168 m2 <- aov(ozone ~ garden, data=oneway)
 169 summary(m2)
 170 
 171 ########################################################################
 172 ######################## Exer Anova    #################################
 173 ########################################################################
 174 
 175 
 176 ## install and load the granovaGG package (a package for visualization
 177 ## of ANOVAs)
 178 require(granovaGG)
 179 
 180 ## load the arousal data frame and use the stack()} command to bring
 181 ## the data in the long form.
 182 data(arousal)
 183 datalong <- stack(arousal)
 184 datalong$A <- grepl("\\.A",datalong$ind)
 185 datalong$B <- grepl("\\.B",datalong$ind)
 186 
 187 ## Do a anova analysis. Is there a difference at least 2 of the gddroups?
 188 m1 <- aov(values ~ ind, data = datalong)
 189 m1 <- aov(values ~ A * B, data = datalong)
 190 summary(m1)
 191 
 192 ## If indicated do a post-hoc test.
 193 TukeyHSD(m1)
 194 plot(TukeyHSD(m1))
 195 
 196 ## Visualize your results
 197 ggplot(datalong,aes(x=ind,y=values)) + geom_boxplot()
 198 
 199 granovagg.1w(datalong$values,group = datalong$ind)
 200 
 201 granovagg.1w(arousal)
 202 
 203 
 204 #########################################################
 205 ###################### lin regression  ##################
 206 #########################################################
 207 
 208 
 209 load("data/births.rdata")
 210 
 211 
 212 # metric response...
 213 m.1 <- lm(bweight ~ gestwks, data=births)
 214 coef(m.1)
 215 
 216 summary(m.1)
 217 
 218 ## exercise
 219 
 220 ## create a scatter plot using ggplot the independent
 221 ## variable on the x-axis and the dependent variable on
 222 ## the y-axis
 223 
 224 ## ggplot knows lm
 225 
 226 ggplot(births,aes(x = gestwks, y = bweight)) +
 227   geom_point() +
 228   geom_smooth(method = "lm", se = F, colour = "red") +
 229   geom_smooth( se = F, size = 3, linetype = 4)
 230 
 231 
 232 
 233 ggplot(births,aes(x = gestwks, y = bweight)) +
 234   geom_point() +
 235   geom_smooth(method = "lm")
 236 
 237 ggplot(births,aes(x = gestwks, y = bweight, colour = hyp)) +
 238   geom_point() +
 239   geom_smooth(method = "lm", formula = y ~ poly(x,2))
 240 
 241 
 242 
 243 
 244 # extractor funcions
 245 summary(m.1)
 246 
 247 coef(m.1)
 248 confint(m.1)
 249 
 250 coef(summary(m.1))
 251 
 252 m <- m.1
 253 ## other useful functions
 254 
 255 ## illustrate meaning of fitted and residuals
 256 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m), residuals=resid(m.1))
 257 
 258 m.2 <- lm(bweight ~ gestwks, data=births, na.action = na.exclude)
 259 tmpdf <- data.frame(gestwks=births$gestwks, bweight=births$bweight , fitted=fitted(m.2), residuals=resid(m.2))
 260 
 261 ggplot(tmpdf, aes( x = gestwks, y = bweight)) +
 262   geom_point() +
 263   geom_line(aes(y = fitted))
 264 
 265 
 266 
 267 ## illustrate meaning of predict
 268 ga <- 10:60
 269 pred.birth.weight <- predict(m.1,newdata=data.frame(gestwks=ga))
 270 tmpdf <- data.frame(gestwks=ga,bweight=pred.birth.weight)
 271 
 272 ## illustrate meaning of deviance
 273 m0 <- lm(bweight~1,data=births)
 274 deviance(m0) ## total sum of sqares
 275 deviance(m.1) ## error sum of squares of our model
 276 
 277 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
 278 
 279 btmp <- births[complete.cases(births[,c("bweight","gestwks")]),]
 280 m0 <- lm(bweight~1,data=btmp) ## repeat with this model (missing values excluded) you get the appropriate r-squared
 281 
 282 (deviance(m0)-deviance(m))/deviance(m0) ## r-squared: slightly different because there are some missing values
 283 
 284 ## ANOVA
 285 ## slide explanatory v is factor
 286 m.aov <- lm(bweight ~ hyp, data=births)
 287 coef(m.aov) ## gives a mean and a difference of means
 288 
 289 by(births$bweight,births$hyp,mean) ## gives the two means
 290 
 291 m.aov2 <- lm(bweight ~ -1 + hyp, data=births) ## gives also the two means
 292 coef(m.aov2)
 293 
 294 
 295 summary(m.aov)
 296 
 297 ## What is the appropriate plot to visualize the effect of hyp?
 298 
 299 ggplot(births, aes(x = hyp, y = bweight)) +
 300   geom_boxplot() +
 301   stat_summary(fun.data = "mean_cl_boot")
 302 
 303 
 304 ## What is the most common test to test these effect?
 305 t.test(bweight ~ hyp, data = births)
 306 
 307 
 308 
 309 ## model with both gestwks and hyp
 310 m.3 <- lm(bweight ~ hyp + gestwks, data=births)
 311 summary(m.3)
 312 
 313 coef(m.3)
 314 confint(m.3)
 315 
 316 m.4 <- lm(bweight ~ hyp + gestwks - 1, data=births)
 317 summary(m.4)
 318 
 319 coef(m.4)
 320 confint(m.4)
 321 
 322 
 323 ## interaction models, scaled
 324 m.5 <- lm(bweight ~ hyp * gestwks, data=births)
 325 
 326 
 327 births$gwsc <- births$gestwks-40
 328 m.6 <- lm(bweight ~ hyp * gwsc, data=births)
 329 
 330 summary(m)
 331 
 332 ## aov
 333 m <- lm(bweight ~ gestwks, data=births)
 334 anova(m)
 335 
 336 sum(anova(m)$Sum)
 337 
 338 anova(m)$Sum[1]/sum(anova(m)$Sum)
 339 
 340 summary(m)
 341 summary(m)$r.squared
 342 
 343 
 344 
 345 
 346 
 347 ##################### additional stuff ###########################
 348 ### jackknife
 349 
 350 leverage <- function(x){1/length(x)+(x-mean(x))**2/sum((x-mean(x))**2)}
 351 jtmp <- numeric()
 352 for(i in 1:nrow(births)){
 353   m <- lm(bweight ~ hyp + gestwks, data=births[-i,])
 354   jtmp[i] <- summary(m)$r.squared
 355 }
 356 
 357 m1a <- lm(bweight ~ hyp + gestwks, data=births[-1,])
 358 
 359 
 360 png("model1.png",width=1000,height=600)
 361 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
 362 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
 363 abline(coef(m)[c(1,3)],lwd=2)
 364 abline(coef(m)[1]+coef(m)[2],coef(m)[3],col="red",lwd=2)
 365 text(x=30,y=1600,"A",cex=2)
 366 text(x=32,y=1600,"B",col="red",cex=2)
 367 text(36.5,2300,"hyp=hyper",col="red",cex=2)
 368 text(34,2600,"hyp=normal",cex=2)
 369 dev.off()
 370 
 371 ## slide a multivariable model
 372 m <- lm(bweight ~ hyp + gestwks, data=births)
 373 summary(m)
 374 
 375 ## slide interaction between
 376 m <- lm(bweight ~ hyp * gestwks, data=births)
 377 
 378 png("model2.png",width=1000,height=800)
 379 par(cex.lab=2,cex.axis=1.5, mar=c(5,5,4,2)+0.1)
 380 plot(births$gestwks,births$bweight,col=births$hyp,cex=2,pch=".",xlim=c(30,43),ylim=c(1500,4200))
 381 abline(coef(m)[c(1,3)],lwd=2)
 382 abline(coef(m)[1]+coef(m)[2],coef(m)[3]+coef(m)[4],col="red",lwd=2)
 383 text(x=30,y=1680,"A",cex=2)
 384 text(x=32,y=1500,"B",col="red",cex=2)
 385 text(36.5,2300,"hyp=hyper",col="red",cex=2)
 386 text(34,2600,"hyp=normal",cex=2)
 387 dev.off()
 388 
 389 
 390 
 391 #########################################################
 392 #################### Exercises  #########################
 393 #########################################################
 394 
 395 ## gambling
 396 
 397 ## Fit a regression model with the expenditure on gambling
 398 ## as the response and the sex, status, income
 399 ## and verbal score as predictors. Present the output.
 400 
 401 require(faraway)
 402 
 403 m.tg <- lm(gamble ~ sex + status + income + verbal, data = teengamb)
 404 
 405 summary(m.tg)
 406 
 407 ## What percentage of variation in the response is explained
 408 ## by these predictors?
 409 
 410 sum.m.tg <- summary(m.tg)
 411 sum.m.tg$r.squared
 412 
 413 ## Which observation has the largest (positive) residual?
 414 ## Give the case number.
 415 
 416 which.max(residuals(m.tg))
 417 
 418 residuals(m.tg)
 419 
 420 ## Compute the correlation of the residuals with the fitted values.
 421 ## use cor() or cor.test()
 422 cor(residuals(m.tg),fitted(m.tg))
 423 cor.test(residuals(m.tg),fitted(m.tg))
 424 
 425 ## Compute the correlation of the residuals with the income.
 426 cor(residuals(m.tg),teengamb$income)
 427 cor.test(residuals(m.tg),teengamb$income)
 428 
 429 ## For all other predictors held constant, what would be the
 430 ## difference in predicted expenditure on gambling for a male
 431 ## compared to a female?
 432 
 433 summary(m.tg)
 434 

Gespeicherte Dateianhänge

Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.
  • [laden | anzeigen] (2015-10-19 19:25:19, 6.2 KB) [[attachment:dataweek4.zip]]
  • [laden | anzeigen] (2015-10-26 10:49:21, 8.2 KB) [[attachment:dataweek5.zip]]
  • [laden | anzeigen] (2015-10-13 05:01:59, 2.9 KB) [[attachment:fakepersdat.rdata]]
  • [laden | anzeigen] (2015-11-10 07:40:26, 1.3 KB) [[attachment:plar.r]]
  • [laden | anzeigen] (2015-10-05 17:35:00, 1.1 KB) [[attachment:readdata.r]]
  • [laden | anzeigen] (2015-09-28 19:42:02, 4.9 KB) [[attachment:rstuff.r]]
  • [laden | anzeigen] (2015-10-13 10:11:57, 10.0 KB) [[attachment:rstuff2.r]]
  • [laden | anzeigen] (2015-10-05 17:37:33, 9.7 KB) [[attachment:rstuff2ex.r]]
  • [laden | anzeigen] (2015-10-13 05:37:06, 9.6 KB) [[attachment:rstuff3ex.r]]
  • [laden | anzeigen] (2015-10-13 10:11:42, 12.9 KB) [[attachment:rstuff3sol.r]]
  • [laden | anzeigen] (2015-10-19 19:25:32, 9.6 KB) [[attachment:rstuff4ex.r]]
  • [laden | anzeigen] (2015-11-03 06:37:52, 11.8 KB) [[attachment:rstuff4sol.r]]
  • [laden | anzeigen] (2015-10-27 08:07:10, 11.1 KB) [[attachment:rstuff5ex.r]]
  • [laden | anzeigen] (2015-11-03 06:34:59, 9.6 KB) [[attachment:rstuff6ex.r]]
  • [laden | anzeigen] (2015-11-03 10:59:26, 12.3 KB) [[attachment:rstuff6sol.r]]
  • [laden | anzeigen] (2015-11-10 07:40:17, 5.6 KB) [[attachment:rstuff7ex.r]]
  • [laden | anzeigen] (2015-09-28 19:41:50, 1167.2 KB) [[attachment:session1.pdf]]
  • [laden | anzeigen] (2015-10-05 17:36:54, 303.0 KB) [[attachment:session2.pdf]]
  • [laden | anzeigen] (2015-10-13 05:37:00, 285.2 KB) [[attachment:session3.pdf]]
  • [laden | anzeigen] (2015-10-19 19:26:08, 1151.7 KB) [[attachment:session4.pdf]]
  • [laden | anzeigen] (2015-10-26 10:50:14, 1867.6 KB) [[attachment:session5.pdf]]
  • [laden | anzeigen] (2015-11-03 06:35:42, 2039.1 KB) [[attachment:session6.pdf]]
  • [laden | anzeigen] (2015-11-10 07:38:04, 929.2 KB) [[attachment:session7.pdf]]
  • [laden | anzeigen] (2015-09-28 19:43:16, 10.2 KB) [[attachment:week1data.zip]]
  • [laden | anzeigen] (2015-10-05 17:36:29, 10.2 KB) [[attachment:week2data.zip]]
 Alle Dateien | Ausgewählte Dateien: löschen verschieben auf Seite copy to page

Sie dürfen keine Anhänge an diese Seite anhängen!