Dateianhang 'rstuff7ex.r'
Herunterladen 1 ######################################################################
2 ########### Exercise: Building Linear Mixed Models ###################
3 ######################################################################
4
5 ## Machines
6
7 ## load the data (i.e. install/load the resp. package if necessary)
8
9 ## data(Machines)
10
11 ## look at the data structure (which command?)
12
13
14 ## is the design balanced or unbalanced?
15
16
17
18
19 ## if one covariate is fixed and one random,
20 ## which would you choose for each category and why?
21
22
23
24
25 ## try to visualize the data appropriately
26
27
28
29
30
31
32 ## What can you get out of the visualization?
33
34
35 ## fit the following models!
36
37 m.malm <- lm(score ~ Machine, data = Machines)
38 m.ma1 <- lmer(score ~ Machine + (1|Worker), data = Machines)
39 m.ma2 <- lmer(score ~ Machine + (Machine|Worker), data = Machines)
40 m.ma3 <- lmer(score ~ Machine + (1|Worker) + (1|Machine:Worker), data = Machines)
41
42 ## which is the most complex one?
43
44
45 ## examine the fixed effects in the models. Compare them!
46
47
48
49 ## load the lmerTest package, refit the models,
50 ## examine the fixed effects including p-values.
51
52
53
54
55
56
57
58
59
60 ## now use anova() to compare the three models
61
62
63
64 ## Which model seems to be the most appropriate?
65
66
67
68 ### ergoStool
69 str(ergoStool)
70
71 ## fit the following models
72 m.es1 <- lmer(effort ~ 1 + (1|Subject) + (1|Type), ergoStool, REML=FALSE)
73 m.es2 <- lmer(effort ~ 1 + (1|Subject), ergoStool, REML=FALSE)
74 m.es3 <- lmer(effort ~ 1 + Type + (1|Subject), ergoStool, REML=FALSE)
75 m.es4 <- aov(effort ~ 1 + Type + Subject, ergoStool)
76
77
78 ## which
79
80
81
82
83
84 ### sleepstudy
85 str(sleepstudy)
86
87 ## use ggplot to visualize the reaction times dependent on time
88 ## add the regression line for a simple linear regression per subject
89
90
91
92
93
94 ## is the design balance or unbalanced?
95
96
97
98
99 ## fit the following models? correct errors
100 m.ss1 <- lmer(Reaction ~ 1 + Days + (1 + Days|Subject), sleepstudy)
101 m.ss2 <- lmer(Reaction ~ 1 + Days + (1|Subject) + (Days|Subject), sleepstudy)
102
103
104
105
106 ## which model incorporates more estimates
107 ## (i.e. less degrees of freedom left - if you use lmerTest)
108
109
110
111
112
113
114 ## which of them would you prefer and why?
115
116
117 ## try to understand the following lines of code
118
119 require(broom)
120 coef.mm <- as.data.frame(coef(m.ss2)[["Subject"]])
121 coef.mm$model <- "mixed"
122 coef.mm$Subject <- row.names(coef.mm)
123
124 sleepstudy.l <- split(sleepstudy,sleepstudy$Subject)
125 tmp <- lapply(sleepstudy.l, function(x){
126 m <- lm(Reaction ~ Days, data = x)
127 data.frame(t(coef(m)))
128 })
129 coef.sm <- Reduce(rbind,tmp)
130 coef.sm$model <- "simple lm"
131 coef.sm$Subject <- names(tmp)
132
133 names(coef.mm)[1:2] <- names(coef.sm)[1:2] <- c("intercept","slope")
134 coefs <- rbind(coef.mm,coef.sm)
135
136 require(grid)
137 dev.new()
138
139 p1 <- ggplot(coefs,aes(x = intercept, y = slope, shape = model)) +
140 geom_point(aes(colour = model), size = 5) +
141 geom_path(aes(group = Subject), arrow = arrow(ends = "first")) +
142 annotate(geom = "point",
143 x = fixef(m.ss2)[1],y = fixef(m.ss2)[2],
144 size = 6, colour = "darkgreen")
145
146 require(dplyr)
147 tmp <- coefs %>%
148 filter(Subject %in% c(330,337,309,370)) %>%
149 group_by(Subject) %>%
150 summarise(mean.slope = mean(slope),
151 mean.int = mean(intercept))
152
153
154 p1 + annotate(geom = "text",
155 x = tmp$mean.int,
156 y = tmp$mean.slope,
157 label = tmp$Subject,
158 size = 6, colour = "darkgreen")
159
160
161 ## shrinkage
162 ## Tukey: borrowing strength from each other
163
164 ######################################################################
165 ########### Exercise: Understanding Hypothesis Testing ###############
166 ######################################################################
167
168 ## Write a function which takes a vector, the poulation standard deviation
169 ## and the population mean as arguments
170 ## and which gives the Z score as result.
171
172
173
174
175
176
177
178 ## add a line to your function that allows you to also process numeric
179 ## vectors containing missing values!
180
181
182
183
184
185
186
187
188 ## the function pnorm(Z) gives the probability of x <= Z. Change your
189 ## function so has the p-value as result.
190
191
192
193
194
195
196
197
198 ## now let the result be a named vector containing the estimated
199 ## difference, Z, p and the n.
200
201
202
203
204
205
206
207
208 ######################################################################
209 ####################### Simulation exerc #############################
210 ######################################################################
211
212 ## Now sample 100 values from a Normal distribution with mean 10
213 ## and standard deviation 2 and use a z-test to compare it against
214 ## the population mean 10. What is the p-value?
215
216
217
218
219
220
221 ## Now do the sampling and the testing 1000 times, what would be the
222 ## number of statistically significant results? Use replicate()
223 ## (which is a wrapper of tapply()) or a for() loop! Record at
224 ## least the p-values and estimated differences! Transform the
225 ## result into a data frame.
226
227 ### replicate()
228
229
230
231
232 ### replicate(,simplify=F)
233
234
235
236
237 ## for() loop
238 res <- matrix(numeric(2000),ncol=2)
239 for(i in seq.int(1000)){
240 res[i,] <- ztest(rnorm(100,mean=10,sd=2),x.sd=2,mu=10)[c("pval","diff")] }
241 res <- as.data.frame(res)
242 names(res) <- c("pval","diff")
243
244 ## Use table() to count the p-vals below 0.05.
245
246
247 ## What is the smallest absolute difference with a p-value below 0.05?
248
249
250
251 ## Repeat the last exercise, only change the sample size to 1000 in each of the 1000 samples! How many p-value below 0.05? What is now the smallest absolute difference with a p-value below 0.05?
252
253
254
255
256 ## Use table() to count the p-vals below 0.05.
257
258
259 ## What is the smallest absolute difference with a p-value below 0.05?
260
261
262 ## Concatenate the both resulting data frames from above using rbind()
263
264
265 ## Plot the distributions of the pvals and the difference per
266 ## sample size. Use \texttt{ggplot2} with an appropriate
267 ## geom (density/histogram)
268
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!