welcome: please sign in

Seiteninhalt hochladen

Sie können für die unten genannte Seite Inhalt hochladen. Wenn Sie den Seitennamen ändern, können Sie auch Inhalt für eine andere Seite hochladen. Wenn der Seitenname leer ist, leiten wir den Seitennamen vom Dateinamen ab.

Datei, aus der der Seiteninhalt geladen wird
Seitenname
Kommentar

Revision 1 vom 2015-03-15 19:29:14

location: RstatisTik / RstatisTikPortal / RcourSe / FinalFunction / DplyR

dplyr

Introduction

The dplyr package makes each of these steps as fast and easy as possible by:

filter()

filter() example

   1 > require(dplyr)
   2 > sub1 <- filter(data, Subject == 1)
   3 > table(sub1$Subject)
   4 1 
   5 665   
   6 > sub1 <- filter(data, Subject == 1, Stim.Type == "incorrect")
   7 > table(sub1$Subject,sub1$Stim.Type)
   8 hit incorrect other miss
   9 1   0       293     0    0
  10 > subframe <- filter(data, Age_PRETEST < 3.5 | Sex == "m" )
  11 > table(subframe$Age_PRETEST < 3.5, subframe$Sex)
  12 f    m
  13 FALSE    0 3202
  14 TRUE  1333 1844

select()

select() example

   1 > subframe <- select(data, Subject, Sex, Age_PRETEST)
   2 > head(subframe)
   3 Subject Sex Age_PRETEST
   4 1       1   f        3.11
   5 2       1   f        3.11
   6 3       1   f        3.11
   7 4       1   f        3.11
   8 5       1   f        3.11
   9 6       1   f        3.11
  10 > subframe <- select(data, Subject, Sex, Age_PRETEST) %>%
  11 +     filter(Age_PRETEST < 3.2)
  12 > table(subframe$Subject)
  13 1   4   9  16  18 
  14 665 645 536 663 668 

arrange()

   1 > arr.frame <- arrange(data, TTime, Time)
   2 > head(arr.frame)
   3 Subject Sex Age_PRETEST Trial Event.Type Code     Time TTime Uncertainty
   4 1       2   m        4.50   255   Response    1  9250486     2           1
   5 2      15   f        4.11   381   Response    2  7850406    10           1
   6 3      14   m        4.60   297   Response    1 11254989    13           1
   7 4      17   m        4.90   234   Response    2 12267915    13           1
   8 5       9   m        3.11   127   Response    1  1445239    16           1
   9 6       2   m        4.50   332   Response    2  3580014    24           1
  10 Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
  11 1      200             2       0   next       hit        220 Picture   TO18.jpg
  12 2      200             2       0   next       hit        328 Picture   TO22.jpg
  13 3      200             2       0   next       hit        258 Picture   TS05.jpg
  14 4      200             2       0   next incorrect        202 Picture   TO03.jpg
  15 5      200             2       0   next       hit        126 Picture   RS21.jpg
  16 6      200             2       0   next       hit        333 Picture   RS30.jpg
  17 Time1 testid EC1
  18 1  9250484      1  TO
  19 2  7850396      4  TO
  20 3 11254976      5  TS
  21 4 12267902      6  TO
  22 5  1445223  test2  RS
  23 6  3579990  test2  RS  

mutate()

mutate() example

   1 > subframe <- filter(data, Subject == 1) %>%
   2 +     mutate(Event.Code = str_replace(Event.Code,".jpg",""),
   3 +            TTime.calc = Time - Time1)
   4 > head(subframe)
   5 Subject Sex Age_PRETEST Trial Event.Type Code   Time TTime Uncertainty
   6 1       1   f        3.11     7   Response    2 103745  2575           1
   7 2       1   f        3.11    12   Response    2 156493  2737           1
   8 3       1   f        3.11    17   Response    2 214772  6630           1
   9 4       1   f        3.11    22   Response    1 262086  5957           1
  10 5       1   f        3.11    27   Response    2 302589   272           1
  11 6       1   f        3.11    32   Response    1 352703  7197           1
  12 Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
  13 1     2599             3       0   next       hit          7 Picture       RO26
  14 2     2800             2       0   next incorrect         12 Picture       RO19
  15 3     6798             2       0   next       hit         17 Picture       RS23
  16 4     5999             2       0   next incorrect         22 Picture       OF22
  17 5      400             2       0   next       hit         27 Picture       AT08
  18 6     7398             2       0   next       hit         32 Picture       AT30
  19 Time1 testid EC1 TTime.calc
  20 1 101170  test2  RO       2575
  21 2 153756  test2  RO       2737
  22 3 208142  test2  RS       6630
  23 4 256129  test2  OF       5957
  24 5 302317  test2  AT        272
  25 6 345506  test2  AT       7197
  26 > table(subframe$Subject)
  27 1 
  28 665   

transmute()

   1 > mut.frame <- transmute(data,
   2 +                     Event.Code = str_replace(Event.Code,".jpg",""),
   3 +                     TTime.calc = Time - Time1)
   4 > head(mut.frame)
   5 1       RO26       2575
   6 2       RO19       2737
   7 3       RS23       6630
   8 4       OF22       5957
   9 5       AT08        272
  10 6       AT30       7197

summarise()

   1 > sum.frame <- summarise(data, mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   2 > sum.frame
   3 1   18393.74 17876.12  

summarise() example

   1 > sum.frame <- group_by(data, Subject) %>%
   2 +     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   3 > sum.frame
   4 Subject mean.ttime sd.ttime
   5 1        1  11717.854 13035.85
   6 2        2  13100.568 13607.71
   7 3        3  15709.598 16464.09
   8 4        4  24778.592 20205.91
   9 5        5  14759.785 14863.84
  10 6        6  14081.377 14834.64
  11 7        7  11551.482 12814.57
  12 8        8  22739.310 18215.68
  13 9        9  20490.722 19399.49

summarise() example

   1 > sum.frame <- group_by(data, Subject, testid) %>%
   2 +     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   3 > head(sum.frame)
   4 Subject testid mean.ttime  sd.ttime
   5 1       1  test1   8621.674  7571.462
   6 2       1      1   9256.367  8682.833
   7 3       1      2   9704.712 10479.788
   8 4       1      3  14189.550 13707.021
   9 5       1      4  13049.831 11344.656
  10 6       1      5  14673.525 15575.355

dplyr Exercises

* use select() and filter() in combination ($>$) to select all rows belonging to the post or the pre test, keep the Subject, Sex, Age_PRETEST and Stim.Type column. Create a new data frame named data2 or something like this. * add two new variables containing the counts of hit and incorrect. Use mutate() and sum(Stim.Type=='hit'). * use group_by() and summarise() to extract the minimum and maximum TTime per person from the original data frame * repeat the last exercise, but now group per person and EC1

dplyr Exercise 1 Solution

   1 > data2 <- filter(data, testid %in% c("test1","test2") )%>%
   2 +     select(Subject,Sex,Age_PRETEST,Stim.Type)
   3 > head(data2)
   4 Subject Sex Age_PRETEST Stim.Type
   5 1       1   f        3.11       hit
   6 2       1   f        3.11 incorrect
   7 3       1   f        3.11       hit
   8 4       1   f        3.11 incorrect
   9 5       1   f        3.11       hit
  10 6       1   f        3.11       hit

dplyr Exercises 2 Solution

   1 > data2 <- mutate(data2,n.hit=sum(Stim.Type=='hit'),
   2 +                 n.incorrect=sum(Stim.Type=='incorrect'))
   3 > head(data2)
   4 Subject Sex Age_PRETEST Stim.Type n.hit n.incorrect
   5 1       1   f        3.11       hit  2561        1223
   6 2       1   f        3.11 incorrect  2561        1223
   7 3       1   f        3.11       hit  2561        1223
   8 4       1   f        3.11 incorrect  2561        1223
   9 5       1   f        3.11       hit  2561        1223
  10 6       1   f        3.11       hit  2561        1223

dplyr Exercises 3 Solution

   1 > sum.frame <- group_by(data, Subject) %>% 
   2 +     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
   3 > head(sum.frame[,c(1:3,20:22)])
   4 Subject Sex Age_PRETEST EC1 min.ttime max.ttime
   5 1       1   f        3.11  RO        46     96434
   6 2       1   f        3.11  RO        46     96434
   7 3       1   f        3.11  RS        46     96434
   8 4       1   f        3.11  OF        46     96434
   9 5       1   f        3.11  AT        46     96434
  10 6       1   f        3.11  AT        46     96434

dplyr Exercises 4 Solution

   1 > sum.frame <- group_by(data, Subject, EC1) %>% 
   2 +     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
   3 > head(sum.frame[,c(1:3,20:22)])
   4 Subject Sex Age_PRETEST EC1 min.ttime max.ttime
   5 1       1   f        3.11  RO       365     30510
   6 2       1   f        3.11  RO       365     30510
   7 3       1   f        3.11  RS       423     54085
   8 4       1   f        3.11  OF       298     58939
   9 5       1   f        3.11  AT       272     17344
  10 6       1   f        3.11  AT       272     17344