attachment:rstuff7ex.r von RstatisTik/RstatisTikPortal/RcourSeSep

   1 ######################################################################
   2 ########### Exercise: Building Linear Mixed Models ###################
   3 ######################################################################
   4 
   5 ## Machines
   6 
   7 ## load the data (i.e. install/load the resp. package if necessary)
   8 
   9 ## data(Machines)  
  10 
  11 ## look at the data structure (which command?)
  12 
  13 
  14 ## is the design balanced or unbalanced?
  15 
  16 
  17 
  18 
  19 ## if one covariate is fixed and one random,
  20 ## which would you choose for each category and why?
  21 
  22 
  23 
  24 
  25 ## try to visualize the data appropriately
  26 
  27 
  28 
  29 
  30 
  31 
  32 ## What can you get out of the visualization?
  33 
  34 
  35 ## fit the following models!
  36 
  37 m.malm <- lm(score ~ Machine, data = Machines)
  38 m.ma1 <- lmer(score ~ Machine + (1|Worker), data = Machines)
  39 m.ma2 <- lmer(score ~ Machine + (Machine|Worker), data = Machines)
  40 m.ma3 <- lmer(score ~ Machine + (1|Worker) + (1|Machine:Worker), data = Machines)
  41 
  42 ## which is the most complex one?
  43 
  44 
  45 ## examine the fixed effects in the models. Compare them!
  46 
  47 
  48 
  49 ## load the lmerTest package, refit the models,
  50 ## examine the fixed effects including p-values.
  51 
  52 
  53 
  54 
  55 
  56 
  57 
  58 
  59 
  60 ## now use anova() to compare the three models
  61 
  62 
  63 
  64 ## Which model seems to be the most appropriate?
  65 
  66 
  67 
  68 ### ergoStool
  69 str(ergoStool)
  70 
  71 ## fit the following models
  72 m.es1 <- lmer(effort ~ 1 + (1|Subject) + (1|Type), ergoStool, REML=FALSE)
  73 m.es2 <- lmer(effort ~ 1 + (1|Subject), ergoStool, REML=FALSE)
  74 m.es3 <- lmer(effort ~ 1 + Type + (1|Subject), ergoStool, REML=FALSE)
  75 m.es4 <- aov(effort ~ 1 + Type + Subject, ergoStool)
  76 
  77 
  78 ## which 
  79 
  80 
  81 
  82 
  83 
  84 ### sleepstudy
  85 str(sleepstudy)
  86 
  87 ## use ggplot to visualize the reaction times dependent on time
  88 ## add the regression line for a simple linear regression per subject
  89 
  90 
  91 
  92 
  93  
  94 ## is the design balance or unbalanced?
  95 
  96 
  97 
  98 
  99 ## fit the following models? correct errors
 100 m.ss1 <- lmer(Reaction ~ 1 + Days + (1 + Days|Subject), sleepstudy)
 101 m.ss2 <- lmer(Reaction ~ 1 + Days + (1|Subject) + (Days|Subject), sleepstudy)
 102 
 103 
 104 
 105 
 106 ## which model incorporates more estimates
 107 ## (i.e. less degrees of freedom left - if you use lmerTest)
 108 
 109 
 110 
 111 
 112 
 113 
 114 ## which of them would you prefer and why?
 115 
 116 
 117 ## try to understand the following lines of code
 118 
 119 require(broom)
 120 coef.mm <- as.data.frame(coef(m.ss2)[["Subject"]])
 121 coef.mm$model <- "mixed"
 122 coef.mm$Subject <- row.names(coef.mm)
 123 
 124 sleepstudy.l <- split(sleepstudy,sleepstudy$Subject)
 125 tmp <- lapply(sleepstudy.l, function(x){
 126     m <- lm(Reaction ~ Days, data = x)
 127     data.frame(t(coef(m)))
 128 })
 129 coef.sm <- Reduce(rbind,tmp)
 130 coef.sm$model <- "simple lm"
 131 coef.sm$Subject <- names(tmp)
 132 
 133 names(coef.mm)[1:2] <- names(coef.sm)[1:2] <- c("intercept","slope")
 134 coefs <- rbind(coef.mm,coef.sm)
 135 
 136 require(grid)
 137 dev.new()
 138 
 139 p1 <- ggplot(coefs,aes(x = intercept, y = slope, shape = model)) +
 140   geom_point(aes(colour = model), size = 5) +
 141   geom_path(aes(group = Subject), arrow = arrow(ends = "first")) +
 142   annotate(geom = "point",
 143            x = fixef(m.ss2)[1],y = fixef(m.ss2)[2],
 144            size = 6, colour = "darkgreen")
 145 
 146 require(dplyr)
 147 tmp <- coefs %>%
 148   filter(Subject %in% c(330,337,309,370)) %>%
 149   group_by(Subject) %>%
 150   summarise(mean.slope = mean(slope),
 151             mean.int = mean(intercept))
 152 
 153 
 154 p1 +  annotate(geom = "text",
 155                x = tmp$mean.int,
 156                y = tmp$mean.slope,
 157                label = tmp$Subject,
 158                size = 6, colour = "darkgreen")             
 159 
 160 
 161 ## shrinkage
 162 ## Tukey: borrowing strength from each other
 163 
 164 ######################################################################
 165 ########### Exercise: Understanding Hypothesis Testing ###############
 166 ######################################################################
 167 
 168 ## Write a function which takes a vector, the poulation standard deviation
 169 ## and the population mean as arguments
 170 ## and which gives the Z score as result. 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 ## add a line to your function that allows you to also process numeric
 179 ## vectors containing missing values!
 180 
 181 
 182 
 183 
 184 
 185 
 186 
 187 
 188 ## the function pnorm(Z) gives the probability of x <= Z. Change your
 189 ## function so has the p-value as result.
 190 
 191 
 192 
 193 
 194 
 195 
 196 
 197 
 198 ## now let the result be a named vector containing the estimated
 199 ## difference, Z, p and the n.
 200 
 201 
 202 
 203 
 204 
 205 
 206 
 207 
 208 ######################################################################
 209 ####################### Simulation exerc #############################
 210 ######################################################################
 211 
 212 ## Now sample 100 values from a Normal distribution with mean 10
 213 ## and standard deviation 2 and use a z-test to compare it against
 214 ## the population mean 10. What is the p-value?
 215 
 216 
 217 
 218 
 219 
 220 
 221 ## Now do the sampling and the testing 1000 times, what would be the
 222 ## number of statistically significant results? Use replicate()
 223 ## (which is a wrapper of tapply()) or a for() loop! Record at
 224 ## least the p-values and estimated differences! Transform the
 225 ## result into a data frame.
 226 
 227 ### replicate()
 228 
 229 
 230 
 231 
 232 ### replicate(,simplify=F) 
 233 
 234 
 235 
 236 
 237 ## for() loop
 238 res <- matrix(numeric(2000),ncol=2)
 239 for(i in seq.int(1000)){
 240     res[i,] <- ztest(rnorm(100,mean=10,sd=2),x.sd=2,mu=10)[c("pval","diff")] }
 241 res <- as.data.frame(res)
 242 names(res) <- c("pval","diff")
 243 
 244 ## Use table() to count the p-vals below 0.05.
 245 
 246 
 247 ## What is the smallest absolute difference with a p-value below 0.05?
 248 
 249 
 250 
 251 ## Repeat the last exercise, only change the sample size to 1000 in each of the 1000 samples! How many p-value below 0.05? What is now the smallest absolute difference with a p-value below 0.05?
 252 
 253 
 254 
 255 
 256 ## Use table() to count the p-vals below 0.05.
 257 
 258 
 259 ## What is the smallest absolute difference with a p-value below 0.05?
 260 
 261 
 262 ## Concatenate the both resulting data frames from above using rbind()
 263 
 264 
 265 ## Plot the distributions of the pvals and the difference per
 266 ## sample size. Use \texttt{ggplot2} with an appropriate
 267 ## geom (density/histogram)
 268
Quick Links

Search Wiki

Page Tools

Dateianhang 'rstuff7ex.r'

Gespeicherte Dateianhänge