Dateianhang 'rstuff5ex.r'
Herunterladen 1 ###############################################################
2 ######################Colour Scales############################
3 ###############################################################
4
5
6 load("../week4/data/births.rdata")
7
8 require(ggplot2)
9 ggplot(births, aes(x = bweight)) +
10 geom_histogram()
11
12
13
14 ggplot(births, aes(x = bweight, fill = ..count..)) +
15 geom_histogram()
16
17
18 ggplot(births, aes(x = bweight, fill = ..count..)) +
19 geom_histogram() +
20 scale_fill_gradient(low="green",high="red")
21 ggsave("img/ggp10.png")
22
23
24 ggplot(births, aes(x = bweight, fill = ..count..)) +
25 geom_histogram() +
26 scale_fill_gradient(low="green",high="red", guide="legend")
27 ggsave("img/ggp11.png")
28
29
30 births$gest.age.group <- cut(births$gestwks, breaks = c(20,25,30,35,38,44))
31
32 ggplot(births, aes(x = gest.age.group,
33 fill = gest.age.group)) +
34 geom_bar()
35
36 ggsave("img/colorscaledefault.png")
37
38
39 ggplot(births, aes(x = gest.age.group, fill = gest.age.group)) +
40 geom_bar() +
41 scale_fill_brewer(palette = "Dark2", na.value = "grey")
42
43 ggsave("img/colorscalebrewer.png")
44
45
46 ggplot(births, aes(x = gest.age.group, fill = gest.age.group)) +
47 geom_bar(size = 2, colour = "black") +
48 scale_fill_grey(start = 0.7, end = 0, na.value = "white")
49
50 ggsave("img/colorscalegrey.png")
51
52
53 ggplot(births, aes(x = gest.age.group, fill = gest.age.group)) +
54 geom_bar(size = 2, colour = "black") +
55 scale_fill_hue(h = c(0,120), l = 70, c = 150)
56
57 ggsave("img/colorscalehue.png")
58
59 ggplot(births, aes(x = gest.age.group, fill = gest.age.group)) +
60 geom_bar(size = 2, colour = "black") +
61 scale_fill_manual(values = c("white","midnightblue","deeppink","firebrick","forestgreen"))
62
63 ggsave("img/colorscalemanual.png")
64
65
66 require(wesanderson)
67 ggplot(births, aes(x = gest.age.group, fill = gest.age.group)) +
68 geom_bar(size = 2, colour = "black") +
69 scale_fill_manual(values = wes_palette("Darjeeling"))
70
71 ggsave("img/colorscalewesanderson.png")
72
73
74 ###############################################################
75 ######################x and y Scales###########################
76 ###############################################################
77
78 require(ggplot2)
79 require(dplyr)
80 require(scales)
81
82
83 movies$years <- cut(movies$year,
84 breaks = seq(1890,2010,by = 10),
85 labels = paste(
86 seq(1890,2000,by = 10),
87 seq(1900,2010,by = 10),
88 sep = " - "))
89
90 summary(movies$years)
91
92 ## transformations
93 ## first we store the plot object without the scales adjustment
94 p <- ggplot(movies, aes(x=years,y=budget,colour=years)) +
95 geom_boxplot(fill="black") +
96 stat_summary(fun.y="mean",geom="point") +
97 theme(axis.text.x = element_text(angle=270,hjust=0.5,vjust=0.5),
98 legend.position="none")
99
100
101 ## different versions for changing the y-axis
102 p1 <- p + scale_y_continuous(labels=dollar,trans="log")
103 ## compare with
104 p2 <- ggplot(movies, aes(x=years,y=log(budget),colour=years)) +
105 geom_boxplot(fill="black") +
106 stat_summary(fun.y="mean",geom="point") +
107 theme(axis.text.x = element_text(angle=270,hjust=0.5,vjust=0.5),
108 legend.position="none")
109
110
111 library(gridExtra)
112 grid.arrange(p1,p2,ncol=2)
113 savePlot("img/transformaxisvsval.png")
114
115
116 ### formats
117 #### wrap
118 p2 + scale_x_discrete(labels = wrap_format(width = 5)) +
119 theme(axis.text.x = element_text(angle=0))
120
121 #### unit
122 ggplot(births, aes(x = bweight, fill = ..count..)) +
123 geom_histogram() +
124 scale_x_continuous(labels = unit_format(unit = "gr", sep = ""))
125
126
127 #### scientific
128 ggplot(births, aes(x = bweight, y = ..density.., fill = ..density..)) +
129 geom_histogram(binwidth = 50) +
130 scale_fill_gradient(low="green",high="red", guide="legend") +
131 scale_y_continuous(breaks = seq(0,0.001, by = 0.00033),
132 labels = scientific_format(digits = 1))
133
134 #### parse
135 df <- data.frame(x = c("alpha", "beta", "gamma","delta"),
136 y = 1:4)
137
138 ggplot(df, aes(x = x, y = y)) +
139 geom_bar(stat = "identity") +
140 scale_x_discrete(breaks = c("alpha", "beta", "gamma","delta"),
141 labels = unlist(parse_format()(c("alpha", "beta", "gamma","delta"))))
142
143
144 #### math
145 ggplot(births, aes(x = bweight, y = ..density.., fill = ..density..)) +
146 geom_histogram(binwidth = 50) +
147 ggtitle("y labels without meaning!") +
148 scale_fill_gradient(low="green",high="red", guide="legend") +
149 scale_y_continuous(breaks = seq(0,0.001, by = 0.00033),
150 labels = math_format(alpha + frac(1, (.x-1))))
151
152
153
154 ### date
155 df <- data.frame(x = seq(as.Date("1900-01-01"),as.Date("1900-06-01"),"weeks"),
156 y = 1:22)
157
158 ggplot(df, aes(x = x, y = y)) +
159 geom_bar(stat = "identity") +
160 scale_x_date()
161
162 ggplot(df, aes(x = x, y = y)) +
163 geom_bar(stat = "identity") +
164 scale_x_date(labels = date_format("%Y-%m-%d"))
165
166
167 ggplot(df, aes(x = x, y = y)) +
168 geom_bar(stat = "identity") +
169 scale_x_date(labels = date_format("%y-%m-%d"))
170
171 ggplot(df, aes(x = x, y = y)) +
172 geom_bar(stat = "identity") +
173 scale_x_date(labels = date_format("%U"))
174
175 ggplot(df, aes(x = x, y = y)) +
176 geom_bar(stat = "identity") +
177 scale_x_date(labels = date_format("%j"))
178
179 ?strptime
180
181
182 #### ordinal
183
184 ggplot(df, aes(x = x, y = y)) +
185 geom_bar(stat = "identity") +
186 scale_x_date(labels = date_format("%j")) +
187 scale_y_continuous(breaks = 1:22,
188 labels = ordinal_format())
189
190 #### own format
191 ggplot(births, aes(x = bweight, fill = ..count..)) +
192 geom_histogram() +
193 scale_x_continuous(labels = format_format(decimal.mark = ",",
194 big.mark = ".", nsmall = 2))
195
196
197
198
199
200 ####################################################################
201 ####################### Recap lm ##############################
202 ####################################################################
203
204 m.1 <- lm(bweight ~ gestwks, data=births)
205 summary(m.1)
206
207
208 #########################################################
209 #################### Exercises #########################
210 #########################################################
211
212 ## gambling
213
214 ## Fit a regression model with the expenditure on gambling
215 ## as the response and the sex, status, income
216 ## and verbal score as predictors. Present the output.
217
218
219
220
221
222 ## What percentage of variation in the response is explained
223 ## by these predictors?
224
225
226
227
228
229 ## Which observation has the largest (positive) residual?
230 ## Give the case number.
231
232
233
234
235
236 ## Compute the correlation of the residuals with the fitted values.
237 ## use cor() or cor.test()
238
239
240
241 ## Compute the correlation of the residuals with the income.
242
243
244
245 ## For all other predictors held constant, what would be the
246 ## difference in predicted expenditure on gambling for a male
247 ## compared to a female?
248
249
250
251
252 ###############################################################
253 ######################### glms ################################
254 ###############################################################
255
256 m.lm <- lm(bweight ~ hyp, data=births)
257 m.glm <- glm(bweight ~ hyp, family=gaussian, data=births)
258
259
260 m.bin1 <- glm(lowbw ~ hyp, family=binomial, data=births)
261 summary(m.bin1)
262
263
264 #### inverse logit function
265 invlogit <- function(x){ exp(x)/(exp(x) + 1)}
266 invlogit(coef(m.bin1)[1])
267
268 table(births$lowbw,births$hyp)
269 40/(388+40)
270
271 prop.table(table(births$lowbw,births$hyp),2)
272
273
274 invlogit(coef(m)[1]+coef(m)[2])
275
276
277 #### proportion test
278 prop.test(c(20,40),c(72,428))
279
280
281 #### chi sqared test
282 chisq.test(table(births$lowbw,births$hyp))
283
284
285 #### Effects package
286 require(effects)
287 plot(Effect("hyp",m))
288
289 Effect("hyp",m)
290
291 m.bin2 <- glm(lowbw ~ hyp+sex, family=binomial, data=births)
292
293
294 #### Adding an interaction
295 m.bin3 <- glm(lowbw ~ hyp + sex + hyp:sex,
296 family=binomial, data=births) # or shorter
297 m.bin3 <- glm(lowbw ~ hyp*sex, family=binomial,
298 data=births)
299
300
301 summary(m.bin3)
302
303
304 invlogit(coef(m3)[1])
305
306 invlogit(coef(m3)[1] + coef(m3)[2])
307
308 invlogit(coef(m3)[1] + coef(m3)[3])
309
310 invlogit(coef(m3)[1] + coef(m3)[2] + coef(m3)[3] + coef(m3)[4])
311
312
313 #### Anova
314 m.bin2 <- glm(lowbw ~ hyp+sex, family=binomial,
315 data=births)
316 m.bin3 <- glm(lowbw ~ hyp*sex, family=binomial,
317 data=births)
318
319 anova(m.bin2,mbin3)
320
321 #### stratified effects
322 m.bin4 <- glm(lowbw ~ sex + sex:hyp, family=binomial, data=births)
323
324 summary(m.bin4)
325 allEffects(m.bin4)
326
327 ## boys/normal bp
328 invlogit(coef(m.bin4)[1])
329 15/(206+15)
330
331 ## girls/normal bp
332 invlogit(coef(m.bin4)[1] + coef(m.bin4)[2])
333 25/(25+182)
334
335 ## boys/hypertension
336 invlogit(coef(m.bin4)[1] + coef(m.bin4)[3])
337 12/(12+31)
338
339 ## girls/hypertension
340 invlogit(coef(m.bin4)[1] + coef(m.bin4)[2] + coef(m.bin4)[4])
341 8/(8+21)
342
343 prop.table(table(births$lowbw,births$hyp,births$sex),c(2,3))
344
345 ftable(prop.table(table(births$lowbw,births$hyp,births$sex),c(2,3)))
346
347
348 #### shorter model definition
349 m.bin4 <- glm(lowbw ~ sex/hyp, family=binomial, data=births)
350
351 #### Binomial/Logistic Regression
352 m.bin5 <- glm(lowbw ~ gestwks, family=binomial, data=births)
353
354 invlogit(coef(m.bin5)[1])
355
356 exp(coef(m.bin5)[2])
357
358 Effect("gestwks",m.bin5)
359
360 Effect("gestwks",m.bin5,xlevels = list(gestwks = c(20,30,40)))
361
362
363 require(ggplot2)
364 ggplot(births,aes(x = gestwks, y = as.numeric(lowbw)-1)) +
365 geom_smooth(method = "glm", family = "binomial",se = F,size = 2) +
366 geom_point(shape="|") ## adds actual values
367
368
369 #### O-Ring Example
370 library(faraway)
371 data(orings)
372 head(orings)
373
374
375 m.or <- glm(cbind(damage,6-damage) ~ temp,
376 family=binomial, orings)
377
378
379 summary(m.or)
380
381 invlogit(coef(m)[1])
382
383 tf <- 20:100
384 pd <- predict(m,newdata=list(temp=tf), type="response")
385
386 plot(tf,pd,type="l",
387 xlab=expression(paste("temperature (",degree,"F)",sep=" ")),
388 ylab="probability of damage")
389
390
391 ## ggplot
392 orings$trials <- 6
393 ggplot(orings,aes(x=temp,y=damage/trials)) +
394 geom_point() +
395 geom_smooth(method = "glm", family = "binomial", aes(weight = trials)) +
396 xlab(expression(paste("temperature (",degree,"F)"))) +
397 ylab("probability of damage") +
398 scale_y_continuous(labels = percent)
399 ggsave("img/challenger2.png")
400
401 ## Binary Ancova
402 infection <- read.table("data/infection.txt",header=T)
403 summary(infection)
404
405 infection$sex <- factor(infection$sex, labels = c("male","female"))
406 infection$infected <- factor(infection$infected, labels = c("non infected","infected"))
407
408
409 m.inf <- glm(infected~age*sex,family=binomial,
410 data=infection)
411
412 summary(m.inf)
413 invlogit(coef(m.inf)[1])
414
415 invlogit(coef(m.inf)[1]+coef(m.inf)[3])
416
417 solve(0.015657,3.000513)
418 solve(0.02670685,2.883849)
419
420
421 allEffects(m)
422
423 ###############################################################
424 ###################### Exercise #############################
425 ###############################################################
426
427 ## change sex and infected to factors with the labels male and
428 ## female, resp. infected/non infected if not already done
429
430
431
432
433
434 ## Try to reproduce the plot! Hints:
435
436
437 ## set up a ggplot object, think about the aesthetics aes().
438 ## Which quality of the graph you wanna set to which variable?
439
440
441
442 ## begin with the lines (geom_smooth())
443
444
445
446
447 ## add the points (geom\_jitter());
448 ## do not think about the symbols in the first place;
449 ## try to adjust the width and height appropriately
450
451
452
453
454 ## change the colour of the lines and points
455 ## (scale_colour_manual()); I used midnightblue
456 ## for male and deeppink for female
457
458
459
460 ## change the symbols (scale_shape_manual()); use
461 ## values = c("male" = "\u2642","female" = "\u2640")) as values
462
463
464
465 ## set the axes titles
466
467
468 ## change to text of the y axis to percentage, etc
469
Gespeicherte Dateianhänge
Um Dateianhänge in eine Seite einzufügen sollte unbedingt eine Angabe wie attachment:dateiname benutzt werden, wie sie auch in der folgenden Liste der Dateien erscheint. Es sollte niemals die URL des Verweises ("laden") kopiert werden, da sich diese jederzeit ändern kann und damit der Verweis auf die Datei brechen würde.Sie dürfen keine Anhänge an diese Seite anhängen!