welcome: please sign in

Revision 1 vom 2015-03-15 19:29:14

Nachricht löschen
location: RstatisTik / RstatisTikPortal / RcourSe / FinalFunction / DplyR

dplyr

Introduction

The dplyr package makes each of these steps as fast and easy as possible by:

filter()

filter() example

   1 > require(dplyr)
   2 > sub1 <- filter(data, Subject == 1)
   3 > table(sub1$Subject)
   4 1 
   5 665   
   6 > sub1 <- filter(data, Subject == 1, Stim.Type == "incorrect")
   7 > table(sub1$Subject,sub1$Stim.Type)
   8 hit incorrect other miss
   9 1   0       293     0    0
  10 > subframe <- filter(data, Age_PRETEST < 3.5 | Sex == "m" )
  11 > table(subframe$Age_PRETEST < 3.5, subframe$Sex)
  12 f    m
  13 FALSE    0 3202
  14 TRUE  1333 1844

select()

select() example

   1 > subframe <- select(data, Subject, Sex, Age_PRETEST)
   2 > head(subframe)
   3 Subject Sex Age_PRETEST
   4 1       1   f        3.11
   5 2       1   f        3.11
   6 3       1   f        3.11
   7 4       1   f        3.11
   8 5       1   f        3.11
   9 6       1   f        3.11
  10 > subframe <- select(data, Subject, Sex, Age_PRETEST) %>%
  11 +     filter(Age_PRETEST < 3.2)
  12 > table(subframe$Subject)
  13 1   4   9  16  18 
  14 665 645 536 663 668 

arrange()

   1 > arr.frame <- arrange(data, TTime, Time)
   2 > head(arr.frame)
   3 Subject Sex Age_PRETEST Trial Event.Type Code     Time TTime Uncertainty
   4 1       2   m        4.50   255   Response    1  9250486     2           1
   5 2      15   f        4.11   381   Response    2  7850406    10           1
   6 3      14   m        4.60   297   Response    1 11254989    13           1
   7 4      17   m        4.90   234   Response    2 12267915    13           1
   8 5       9   m        3.11   127   Response    1  1445239    16           1
   9 6       2   m        4.50   332   Response    2  3580014    24           1
  10 Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
  11 1      200             2       0   next       hit        220 Picture   TO18.jpg
  12 2      200             2       0   next       hit        328 Picture   TO22.jpg
  13 3      200             2       0   next       hit        258 Picture   TS05.jpg
  14 4      200             2       0   next incorrect        202 Picture   TO03.jpg
  15 5      200             2       0   next       hit        126 Picture   RS21.jpg
  16 6      200             2       0   next       hit        333 Picture   RS30.jpg
  17 Time1 testid EC1
  18 1  9250484      1  TO
  19 2  7850396      4  TO
  20 3 11254976      5  TS
  21 4 12267902      6  TO
  22 5  1445223  test2  RS
  23 6  3579990  test2  RS  

mutate()

mutate() example

   1 > subframe <- filter(data, Subject == 1) %>%
   2 +     mutate(Event.Code = str_replace(Event.Code,".jpg",""),
   3 +            TTime.calc = Time - Time1)
   4 > head(subframe)
   5 Subject Sex Age_PRETEST Trial Event.Type Code   Time TTime Uncertainty
   6 1       1   f        3.11     7   Response    2 103745  2575           1
   7 2       1   f        3.11    12   Response    2 156493  2737           1
   8 3       1   f        3.11    17   Response    2 214772  6630           1
   9 4       1   f        3.11    22   Response    1 262086  5957           1
  10 5       1   f        3.11    27   Response    2 302589   272           1
  11 6       1   f        3.11    32   Response    1 352703  7197           1
  12 Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
  13 1     2599             3       0   next       hit          7 Picture       RO26
  14 2     2800             2       0   next incorrect         12 Picture       RO19
  15 3     6798             2       0   next       hit         17 Picture       RS23
  16 4     5999             2       0   next incorrect         22 Picture       OF22
  17 5      400             2       0   next       hit         27 Picture       AT08
  18 6     7398             2       0   next       hit         32 Picture       AT30
  19 Time1 testid EC1 TTime.calc
  20 1 101170  test2  RO       2575
  21 2 153756  test2  RO       2737
  22 3 208142  test2  RS       6630
  23 4 256129  test2  OF       5957
  24 5 302317  test2  AT        272
  25 6 345506  test2  AT       7197
  26 > table(subframe$Subject)
  27 1 
  28 665   

transmute()

   1 > mut.frame <- transmute(data,
   2 +                     Event.Code = str_replace(Event.Code,".jpg",""),
   3 +                     TTime.calc = Time - Time1)
   4 > head(mut.frame)
   5 1       RO26       2575
   6 2       RO19       2737
   7 3       RS23       6630
   8 4       OF22       5957
   9 5       AT08        272
  10 6       AT30       7197

summarise()

   1 > sum.frame <- summarise(data, mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   2 > sum.frame
   3 1   18393.74 17876.12  

summarise() example

   1 > sum.frame <- group_by(data, Subject) %>%
   2 +     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   3 > sum.frame
   4 Subject mean.ttime sd.ttime
   5 1        1  11717.854 13035.85
   6 2        2  13100.568 13607.71
   7 3        3  15709.598 16464.09
   8 4        4  24778.592 20205.91
   9 5        5  14759.785 14863.84
  10 6        6  14081.377 14834.64
  11 7        7  11551.482 12814.57
  12 8        8  22739.310 18215.68
  13 9        9  20490.722 19399.49

summarise() example

   1 > sum.frame <- group_by(data, Subject, testid) %>%
   2 +     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
   3 > head(sum.frame)
   4 Subject testid mean.ttime  sd.ttime
   5 1       1  test1   8621.674  7571.462
   6 2       1      1   9256.367  8682.833
   7 3       1      2   9704.712 10479.788
   8 4       1      3  14189.550 13707.021
   9 5       1      4  13049.831 11344.656
  10 6       1      5  14673.525 15575.355

dplyr Exercises

* use select() and filter() in combination ($>$) to select all rows belonging to the post or the pre test, keep the Subject, Sex, Age_PRETEST and Stim.Type column. Create a new data frame named data2 or something like this. * add two new variables containing the counts of hit and incorrect. Use mutate() and sum(Stim.Type=='hit'). * use group_by() and summarise() to extract the minimum and maximum TTime per person from the original data frame * repeat the last exercise, but now group per person and EC1

dplyr Exercise 1 Solution

   1 > data2 <- filter(data, testid %in% c("test1","test2") )%>%
   2 +     select(Subject,Sex,Age_PRETEST,Stim.Type)
   3 > head(data2)
   4 Subject Sex Age_PRETEST Stim.Type
   5 1       1   f        3.11       hit
   6 2       1   f        3.11 incorrect
   7 3       1   f        3.11       hit
   8 4       1   f        3.11 incorrect
   9 5       1   f        3.11       hit
  10 6       1   f        3.11       hit

dplyr Exercises 2 Solution

   1 > data2 <- mutate(data2,n.hit=sum(Stim.Type=='hit'),
   2 +                 n.incorrect=sum(Stim.Type=='incorrect'))
   3 > head(data2)
   4 Subject Sex Age_PRETEST Stim.Type n.hit n.incorrect
   5 1       1   f        3.11       hit  2561        1223
   6 2       1   f        3.11 incorrect  2561        1223
   7 3       1   f        3.11       hit  2561        1223
   8 4       1   f        3.11 incorrect  2561        1223
   9 5       1   f        3.11       hit  2561        1223
  10 6       1   f        3.11       hit  2561        1223

dplyr Exercises 3 Solution

   1 > sum.frame <- group_by(data, Subject) %>% 
   2 +     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
   3 > head(sum.frame[,c(1:3,20:22)])
   4 Subject Sex Age_PRETEST EC1 min.ttime max.ttime
   5 1       1   f        3.11  RO        46     96434
   6 2       1   f        3.11  RO        46     96434
   7 3       1   f        3.11  RS        46     96434
   8 4       1   f        3.11  OF        46     96434
   9 5       1   f        3.11  AT        46     96434
  10 6       1   f        3.11  AT        46     96434

dplyr Exercises 4 Solution

   1 > sum.frame <- group_by(data, Subject, EC1) %>% 
   2 +     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
   3 > head(sum.frame[,c(1:3,20:22)])
   4 Subject Sex Age_PRETEST EC1 min.ttime max.ttime
   5 1       1   f        3.11  RO       365     30510
   6 2       1   f        3.11  RO       365     30510
   7 3       1   f        3.11  RS       423     54085
   8 4       1   f        3.11  OF       298     58939
   9 5       1   f        3.11  AT       272     17344
  10 6       1   f        3.11  AT       272     17344