= dplyr =

<<TableOfContents(3)>>

== Introduction ==
The dplyr package makes each of these steps as fast and easy as possible by:
   *  Elucidating the most common data manipulation operations, so that your options are helpfully constrained when thinking about how to tackle a problem.
   *  Providing simple functions that correspond to the most common data manipulation verbs, so that you can easily translate your thoughts into code.
   * Using efficient data storage backends, so that you spend as little time waiting for the computer as possible.
== filter() ==
   *  Base R approach to filtering forces you to repeat the data frame’s name
   * dplyr approach is simpler to write and read
   * Command structure (for all dplyr verbs):
      * first argument is a data frame
      * return value is a data frame
      * nothing is modified in place
   * Note: dplyr generally does not preserve row names
=== filter() example ===
{{{#!highlight r
> require(dplyr)
> sub1 <- filter(data, Subject == 1)
> table(sub1$Subject)
1 
665   
> sub1 <- filter(data, Subject == 1, Stim.Type == "incorrect")
> table(sub1$Subject,sub1$Stim.Type)
hit incorrect other miss
1   0       293     0    0
> subframe <- filter(data, Age_PRETEST < 3.5 | Sex == "m" )
> table(subframe$Age_PRETEST < 3.5, subframe$Sex)
f    m
FALSE    0 3202
TRUE  1333 1844
}}}
== select() ==
   * select() selects colums
   * it can be used in combination with filter()
   * combining can be done by the infix %>%
=== select() example ===
{{{#!highlight r
> subframe <- select(data, Subject, Sex, Age_PRETEST)
> head(subframe)
Subject Sex Age_PRETEST
1       1   f        3.11
2       1   f        3.11
3       1   f        3.11
4       1   f        3.11
5       1   f        3.11
6       1   f        3.11
> subframe <- select(data, Subject, Sex, Age_PRETEST) %>%
+     filter(Age_PRETEST < 3.2)
> table(subframe$Subject)
1   4   9  16  18 
665 645 536 663 668 
}}}
== arrange() ==
   * arrange can be used to change the order of rows
{{{#!highlight r
> arr.frame <- arrange(data, TTime, Time)
> head(arr.frame)
Subject Sex Age_PRETEST Trial Event.Type Code     Time TTime Uncertainty
1       2   m        4.50   255   Response    1  9250486     2           1
2      15   f        4.11   381   Response    2  7850406    10           1
3      14   m        4.60   297   Response    1 11254989    13           1
4      17   m        4.90   234   Response    2 12267915    13           1
5       9   m        3.11   127   Response    1  1445239    16           1
6       2   m        4.50   332   Response    2  3580014    24           1
Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
1      200             2       0   next       hit        220 Picture   TO18.jpg
2      200             2       0   next       hit        328 Picture   TO22.jpg
3      200             2       0   next       hit        258 Picture   TS05.jpg
4      200             2       0   next incorrect        202 Picture   TO03.jpg
5      200             2       0   next       hit        126 Picture   RS21.jpg
6      200             2       0   next       hit        333 Picture   RS30.jpg
Time1 testid EC1
1  9250484      1  TO
2  7850396      4  TO
3 11254976      5  TS
4 12267902      6  TO
5  1445223  test2  RS
6  3579990  test2  RS  
}}}
== mutate() ==
   * mutate() can be used to transform or add columns
   * you can add/change more than one column at once
=== mutate() example ===
{{{#!highlight r
> subframe <- filter(data, Subject == 1) %>%
+     mutate(Event.Code = str_replace(Event.Code,".jpg",""),
+            TTime.calc = Time - Time1)
> head(subframe)
Subject Sex Age_PRETEST Trial Event.Type Code   Time TTime Uncertainty
1       1   f        3.11     7   Response    2 103745  2575           1
2       1   f        3.11    12   Response    2 156493  2737           1
3       1   f        3.11    17   Response    2 214772  6630           1
4       1   f        3.11    22   Response    1 262086  5957           1
5       1   f        3.11    27   Response    2 302589   272           1
6       1   f        3.11    32   Response    1 352703  7197           1
Duration Uncertainty.1 ReqTime ReqDur Stim.Type Pair.Index    Type Event.Code
1     2599             3       0   next       hit          7 Picture       RO26
2     2800             2       0   next incorrect         12 Picture       RO19
3     6798             2       0   next       hit         17 Picture       RS23
4     5999             2       0   next incorrect         22 Picture       OF22
5      400             2       0   next       hit         27 Picture       AT08
6     7398             2       0   next       hit         32 Picture       AT30
Time1 testid EC1 TTime.calc
1 101170  test2  RO       2575
2 153756  test2  RO       2737
3 208142  test2  RS       6630
4 256129  test2  OF       5957
5 302317  test2  AT        272
6 345506  test2  AT       7197
> table(subframe$Subject)
1 
665   
}}}
== transmute() ==
   * transmute() does the same like mutate{} but keeps only the new columns
{{{#!highlight r
> mut.frame <- transmute(data,
+                     Event.Code = str_replace(Event.Code,".jpg",""),
+                     TTime.calc = Time - Time1)
> head(mut.frame)
1       RO26       2575
2       RO19       2737
3       RS23       6630
4       OF22       5957
5       AT08        272
6       AT30       7197
}}}
== summarise() ==
   * summarise() makes summary statistics
{{{#!highlight r
> sum.frame <- summarise(data, mean.ttime=mean(TTime), sd.ttime = sd(TTime))
> sum.frame
1   18393.74 17876.12  
}}}
=== summarise() example ===
   * summarise() gets really interesting in combination with group\by() also included in the dplyr package
{{{#!highlight r
> sum.frame <- group_by(data, Subject) %>%
+     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
> sum.frame
Subject mean.ttime sd.ttime
1        1  11717.854 13035.85
2        2  13100.568 13607.71
3        3  15709.598 16464.09
4        4  24778.592 20205.91
5        5  14759.785 14863.84
6        6  14081.377 14834.64
7        7  11551.482 12814.57
8        8  22739.310 18215.68
9        9  20490.722 19399.49
}}}
=== summarise() example ===
{{{#!highlight r
> sum.frame <- group_by(data, Subject, testid) %>%
+     summarise(mean.ttime=mean(TTime), sd.ttime = sd(TTime))
> head(sum.frame)
Subject testid mean.ttime  sd.ttime
1       1  test1   8621.674  7571.462
2       1      1   9256.367  8682.833
3       1      2   9704.712 10479.788
4       1      3  14189.550 13707.021
5       1      4  13049.831 11344.656
6       1      5  14673.525 15575.355
}}}
== dplyr Exercises ==
* use select() and filter() in combination ($>$) to select all rows belonging to the post or the pre test, keep the Subject, Sex, Age_PRETEST and Stim.Type column. Create a new data frame named data2 or something like this.
* add two new variables containing the counts of hit and incorrect. Use mutate() and sum(Stim.Type=='hit').
* use group_by() and summarise() to extract the minimum and maximum TTime per person from the original data frame
* repeat the last exercise, but now group per person and EC1
=== dplyr Exercise 1 Solution ===
   * use select() and filter() in combination (%>%) to select all rows belonging to the post or the pre test, keep the Subject, Sex, Age_PRETEST and Stim.Type column. Create a new data frame named data2 or something like this.
{{{#!highlight r
> data2 <- filter(data, testid %in% c("test1","test2") )%>%
+     select(Subject,Sex,Age_PRETEST,Stim.Type)
> head(data2)
Subject Sex Age_PRETEST Stim.Type
1       1   f        3.11       hit
2       1   f        3.11 incorrect
3       1   f        3.11       hit
4       1   f        3.11 incorrect
5       1   f        3.11       hit
6       1   f        3.11       hit
}}}
=== dplyr Exercises 2 Solution ===
   * add two new variables containing the counts of hit and incorrect. Use mutate() and sum(Stim.Type=='hit')
{{{#!highlight r
> data2 <- mutate(data2,n.hit=sum(Stim.Type=='hit'),
+                 n.incorrect=sum(Stim.Type=='incorrect'))
> head(data2)
Subject Sex Age_PRETEST Stim.Type n.hit n.incorrect
1       1   f        3.11       hit  2561        1223
2       1   f        3.11 incorrect  2561        1223
3       1   f        3.11       hit  2561        1223
4       1   f        3.11 incorrect  2561        1223
5       1   f        3.11       hit  2561        1223
6       1   f        3.11       hit  2561        1223
}}}
=== dplyr Exercises 3 Solution ===
   * use group_by() and summarise() to extract the minimum and maximum TTime per person from the original data frame
{{{#!highlight r
> sum.frame <- group_by(data, Subject) %>% 
+     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
> head(sum.frame[,c(1:3,20:22)])
Subject Sex Age_PRETEST EC1 min.ttime max.ttime
1       1   f        3.11  RO        46     96434
2       1   f        3.11  RO        46     96434
3       1   f        3.11  RS        46     96434
4       1   f        3.11  OF        46     96434
5       1   f        3.11  AT        46     96434
6       1   f        3.11  AT        46     96434
}}}
=== dplyr Exercises 4 Solution ===
   * repeat the last exercise, but now group per person and EC1
{{{#!highlight r
> sum.frame <- group_by(data, Subject, EC1) %>% 
+     mutate(min.ttime = min(TTime), max.ttime=max(TTime))
> head(sum.frame[,c(1:3,20:22)])
Subject Sex Age_PRETEST EC1 min.ttime max.ttime
1       1   f        3.11  RO       365     30510
2       1   f        3.11  RO       365     30510
3       1   f        3.11  RS       423     54085
4       1   f        3.11  OF       298     58939
5       1   f        3.11  AT       272     17344
6       1   f        3.11  AT       272     17344
}}}