welcome: please sign in

Seiteninhalt hochladen

Sie können für die unten genannte Seite Inhalt hochladen. Wenn Sie den Seitennamen ändern, können Sie auch Inhalt für eine andere Seite hochladen. Wenn der Seitenname leer ist, leiten wir den Seitennamen vom Dateinamen ab.

Datei, aus der der Seiteninhalt geladen wird
Seitenname
Kommentar

Revision 1 vom 2015-05-01 13:03:45

location: RstatisTik / RstatisTikPortal / RcourSe / FinalFunction / CombMoreDataFrames

Introduction

Remember: Implicit Loops

A common application of loops is to apply a function to each element of a set of values and collect the results in a single structure. In R this is done by the functions:

Exercises

Given the two R object m and l below use * use lapply() to get the class and the length of each element of l (two steps) * apply() to get the maximum of each column in m

   1 > m <- matrix(1:100, nrow=10)
   2 > l <- list(a=1:10,b=rep(c(T,F),2),c=letters)  

read one file

   1 > file <- "../session1/session1data/pre001.txt"
   2 > pre1 <- read.file(file,skip=3)
   3 [1] "read ../session1/session1data/pre001.txt"
   4 > file <- "data/pretest/pre_001.txt"
   5 > pre1v2 <- read.file(file,skip=0)
   6 [1] "read ../session2/data/pretest/pre_001.txt"

read several files

get files names

   1 > files <- dir("../session2/data",full.names = T, 
   2 +                recursive = T,pattern = "[0-9]{3}\\.txt$")
   3 > files
   4 [1] "../session2/data/posttest/post_001.txt"   
   5 [2] "../session2/data/posttest/post_002.txt"   
   6 [3] "../session2/data/posttest/post_003.txt"   
   7 [4] "../session2/data/posttest/post_004.txt"   
   8 [5] "../session2/data/posttest/post_005.txt"   
   9 [6] "../session2/data/posttest/post_006.txt"   
  10 [7] "../session2/data/posttest/post_007.txt"   

read in files

   1 > source("function.r")
   2 > df.list <- lapply(files,read.file,skip=0)
   3 [1] "read ../session2/data/posttest/post_001.txt"
   4 [1] "read ../session2/data/posttest/post_002.txt"
   5 [1] "read ../session2/data/posttest/post_003.txt"
   6 [1] "read ../session2/data/posttest/post_004.txt"
   7 [1] "read ../session2/data/posttest/post_005.txt"
   8 [1] "read ../session2/data/posttest/post_006.txt"
   9 [1] "read ../session2/data/posttest/post_007.txt"
  10 [1] "read ../session2/data/posttest/post_008.txt"

Result

   1 > length(files)
   2 [1] 195  

   1 > length(df.list)
   2 [1] 195  

   1 > table(sapply(df.list,class))
   2 192          2   

Remember: Combining Data Frames

We learned about three basis functions to combine data frame

Reduce()

Example

   1 > (d1 <- data.frame(id=LETTERS[c(1,2,3)],day1=sample(10,3)))
   2 id day1
   3 1  A    3
   4 2  B    1
   5 3  C    7
   6 > (d2 <- data.frame(id=LETTERS[c(1,3,5,6)],day2=sample(10,4)))
   7 id day2
   8 1  A    8
   9 2  C    2
  10 3  E    5
  11 4  F    3
  12 > (d3 <- data.frame(id=LETTERS[c(2,4:6)],day3=sample(10,4)))
  13 id day3
  14 1  B    8
  15 2  D    3
  16 3  E    4
  17 4  F   10
  18 > (d4 <- data.frame(id=LETTERS[c(1:5)],day4=sample(10,5)))
  19 id day4
  20 1  A    2
  21 2  B    7
  22 3  C    8
  23 4  D    9
  24 5  E    1

   1 > Reduce(merge,list(d1,d2,d3,d4))
   2 [1] id   day1 day2 day3 day4

   1 > Reduce(function(x,y) { merge(x,y, all=T) },
   2 +        list(d1,d2,d3,d4))
   3 id day1 day2 day3 day4
   4 1  A    3    8   NA    2
   5 2  B    1   NA    8    7
   6 3  C    7    2   NA    8
   7 4  E   NA    5    4    1
   8 5  F   NA    3   10   NA
   9 6  D   NA   NA    3    9

   1 > d4$day <- names(d4)[2]
   2 > names(d4)[2] <- "score"
   3 > Reduce(function(x,y) { y$day <- names(y)[2]
   4 +                        names(y)[2] <- "score"
   5 +                        rbind(x,y) } ,
   6 +        list(d1,d2,d3), init = d4)
   7 id score  day
   8 1   A     2 day4
   9 2   B     7 day4
  10 3   C     8 day4
  11 4   D     9 day4

Reduce() Exercise

   1 > ml <- list(vl <- c(TRUE,FALSE),
   2 +            vn <- 1:10,
   3 +            vc <- letters)

Reduce() Exercise

   1 > lapply(ml,class)
   2 [1] "logical"
   3 [1] "integer"
   4 [1] "character"

   1 > rv <- Reduce(c,ml)
   2 > rv
   3 [1] "1"  "0"  "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "a"  "b"  "c" 
   4 [16] "d"  "e"  "f"  "g"  "h"  "i"  "j"  "k"  "l"  "m"  "n"  "o"  "p"  "q"  "r" 
   5 [31] "s"  "t"  "u"  "v"  "w"  "x"  "y"  "z" 
   6 > class(rv)
   7 [1] "character"

Combine all data frames

   1 > df.list <- lapply(files,read.file,skip=0)
   2 [1] "read data/posttest/post_001.txt"
   3 [1] "read data/posttest/post_002.txt"
   4 [1] "read data/posttest/post_003.txt"
   5 [1] "read data/posttest/post_004.txt"
   6 [1] "read data/posttest/post_005.txt"
   7 > table(sapply(df.list,class))
   8 192          3   

Combine all data frames - exercise

Combine all data frames - exercise

   1 > data <- Reduce(rbind,df.list)
   2 > nrow(data)
   3 [1] 12704
   4 > table(data$Subject)
   5 001_test2 002_test2 003_test2 004_test2 005_test2 006_test2 007_test2 008_test2 
   6 93        91        96        93        95        95        93        96 
   7 009_test2 010_test2 011_test2 012_test2 013_test2 014_test2 015_test2 016_test2 
   8 92        94        95        96        96        95        96        94 
   9 017_test2 018_test2 019_test2 020_test2 001_test1 002_test1 003_test1 004_test1 
  10 95        94        96        95        95        95        96        94 
  11 005_test1 006_test1 007_test1 008_test1 009_test1 010_test1 011_test1 012_test1 
  12 96        95        94        90        96        95        91        96 
  13 013_test1 014_test1 015_test1 016_test1 017_test1 018_test1 019_test1 020_test1 
  14 95        96        95        91        96        96        96        96 
  15 001_1     002_1     003_1     004_1     005_1     006_1    CHGU_1    008_1a 
  16 60        59        60        54        60        59        60        60 
  17 009_1     010_1     RMK_1     013_1     014_1     015_1     016_1    IJ2K_1 
  18 60        60        59        59        60        58        59        58 
  19 018_1     019_1     020_1     001_2     002_2     003_2     004_2     005_2 
  20 60        59        60        59        59        57        58        57 
  21 006_2     007_2     008_2     009_2     010_2     011_2     012_2     013_2 
  22 58        58        54        58        58        59        59        56 
  23 014_2     015_2     016_2     017_2     018_2     019_2     020_2     001_3 

The Function no 2

   1 > read.files <- function(filesdir,skip=3,recursive=F,pattern="."){
   2 +     files <- dir(filesdir,
   3 +                  full.names = T,
   4 +                  recursive = recursive,
   5 +                  pattern = pattern)
   6 +     Reduce(rbind,lapply(files,read.file,skip=skip))}
   7 > data <- read.files("data",recursive = T,skip=0,pattern = "\\.txt$")
   8 [1] "read data/posttest/post_001.txt"
   9 [1] "read data/posttest/post_002.txt"
  10 [1] "read data/posttest/post_003.txt"
  11 [1] "read data/posttest/post_004.txt"
  12 [1] "read data/posttest/post_005.txt"

The Function no 2

   1 > sub1 <- read.files("../session2/data",
   2 +                    skip = 0, recursive = T,pattern="\\002\\.txt$")
   3 [1] "read ../session2/data/posttest/post_002.txt"
   4 [1] "read ../session2/data/pretest/pre_002.txt"
   5 [1] "read ../session2/data/training_1/train_002.txt"
   6 [1] "read ../session2/data/training_2/train_002.txt"
   7 [1] "read ../session2/data/training_3/train_002.txt"
   8 > test <- read.files("../session2/data",
   9 +                    skip = 0, recursive = T,pattern="p[ro].+\\.txt$")
  10 [1] "read ../session2/data/posttest/post_001.txt"
  11 [1] "read ../session2/data/posttest/post_002.txt"
  12 [1] "read ../session2/data/pretest/pre_001.txt"
  13 [1] "read ../session2/data/pretest/pre_002.txt"

The Subject column

The Subject column

   1 > table(data$Subject)
   2 001_test2 002_test2 003_test2 004_test2 005_test2 006_test2 007_test2 008_test2 
   3 93        91        96        93        95        95        93        96 
   4 009_test2 010_test2 011_test2 012_test2 013_test2 014_test2 015_test2 016_test2 
   5 92        94        95        96        96        95        96        94 
   6 017_test2 018_test2 019_test2 020_test2 001_test1 002_test1 003_test1 004_test1 
   7 95        94        96        95        95        95        96        94 

The Subject column

   1 > data$persid <- sapply(data$Subject,function(x)
   2 +     str_split(x,pattern = "_")[[1]][1])
   3 > data$testid <- sapply(data$Subject,function(x)
   4 +     str_split(x,pattern = "_")[[1]][2])

The Subject column

   1 > data$testid <- str_replace(data$Subject,"^.+_","")
   2 > data$persid <- str_replace(data$Subject,"_.+$","")
   3 > data$Subject <- NULL

The Subject column

The Subject column

   1 > table(data$persid)
   2 001  002  003  004  005  006  007  008  009  010  011  012  013  014  015  016 
   3 665  662  666  587  608  588  600  654  536  543  600  523  589  669  662  663 
   4 017  018  019  020 CHGU GA3K IJ2K Kj6K  RMK 
   5 604  668  667  656   60   58   58   59   59 
   6 > data$persid[data$persid=="CHGU"] <- "007"

The Subject column Exercises

The Subject column Exercises

   1 > data$persid[data$persid=="RMK"] <- "011"
   2 > data$persid[data$persid=="IJ2K"] <- "017"
   3 > data$persid[data$persid=="GA3K"] <- "004"
   4 > data$persid[data$persid=="Kj6K"] <- "006"

Merging

* now read in the file subjectsdemographics.txt using the appropriate command * join the demographics with our data data frame (there is a little problem left - compare the persid and Subject columns)

The Subject column Exercises

   1 > persdat <- read.table("data/subjectdemographics.txt",
   2 +                       sep="\t",
   3 +                       header=T)
   4 > persdat$Subject
   5 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
   6 > unique(data$persid)
   7 [1] "001" "002" "003" "004" "005" "006" "007" "008" "009" "010" "011" "012"
   8 [13] "013" "014" "015" "016" "017" "018" "019" "020"
   9 > data$persid <- as.numeric(data$persid)
  10 > unique(data$persid)
  11 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
  12 > data <- merge(persdat,data,by.x = "Subject",by.y = "persid",all=T)
  13 > head(data)
  14 > summary(data)
  15 Subject      Sex       Age_PRETEST        Trial          Event.Type   
  16 1st Qu.: 5.00   m:5046   1st Qu.:3.110   1st Qu.:112.0   Response:12704  
  17 Median :11.00            Median :4.400   Median :222.0   Sound   :    0  
  18 Mean   :10.53            Mean   :4.154   Mean   :223.1   Pause   :    0  
  19 3rd Qu.:16.00            3rd Qu.:4.600   3rd Qu.:332.0   Resume  :    0  

Summary Graphics

Just run the code and try to understand it. We will cover the ggplot graphics soon.

   1 > ggplot(data,aes(x=factor(Subject),fill=..count..)) +
   2 +     geom_bar() +
   3 +     facet_wrap(~testid)

attachment:graph1.png

Summary Graphics

   1 > data$testid <- str_replace(data$testid,"[a-z]$","")
   2 > data$testid <- factor(data$testid,
   3 +                       levels=c("test1","1","2","3","4","5","6","7","8","test2"))
   4 > table(data$Subject,data$testid)
   5 test1  1  2  3  4  5  6  7  8 test2
   6 1     95 60 59 60 59 59 60 60 60    93
   7 2     95 59 59 58 60 60 60 60 60    91
   8 3     96 60 57 60 60 60 60 59 58    96
   9 4     94 54 58 60 60 55 53 60 58    93
  10 5     96 60 57 60 60 60 60 60  0    95
  11 6     95 59 58 59 58 59 55 54 55    95
  12 7     94 60 58 60 58 59 60 59 59    93
  13 8     90 60 54 55 60 60 60 59 60    96
  14 9     96 60 58 59 57  0  0 58 56    92

Summary Graphics

   1 > ggplot(data,aes(x=factor(Subject),fill=..count..)) +
   2 +     geom_bar() +
   3 +     facet_wrap(~testid)

attachment:graph2.png

Summary Graphics

And another one.

   1 > ggplot(data,aes(x=testid,fill=Stim.Type)) +
   2 +     geom_bar(position=position_fill()) +
   3 +     facet_wrap(~Subject)

attachment:graph3.png