Unterschiede zwischen den Revisionen 1 und 5 (über 4 Versionen hinweg)

Final Functions

Funtion: Reading File

Here is the function as whole, below we go through it line by line.

   1 read.file <- function(file,skip,verbose=T){
   2     if(verbose) print(paste("read", file))
   3     tmp <- read.table(file,skip = skip,sep = "\t",
   4                       header=T,na.strings = c(" +",""),
   5                       fill=T)
   6     
   7     tmp <- tmp[!is.na(tmp$Subject),] 
   8     
   9     if(sum(str_detect(tmp[,1],"CH|GA|IJ|Kj|RMK"))) print(paste("id",tmp[3,1]))
  10 
  11     if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL)
  12 
  13     tmp <- lapply(tmp,function(x) {
  14         if( class(x) %in% c("character","factor") ){
  15             x <- factor(gsub(" ","",as.character(x)))
  16             return(x)}else{ return(x) }})
  17     
  18     tmp <- as.data.frame(tmp)
  19     
  20     pause <- which(tmp$Event.Type=="Picture" & tmp$Code=="Pause")
  21     if(length(pause)>0){
  22         drei <- which(tmp$Code==3 & !is.na(tmp$Code))
  23         drei <- drei[drei > pause][1:2]
  24         if(pause + 1 < drei[1]){
  25             tmp <- tmp[-(pause:drei[2]),]
  26         }}
  27 
  28     
  29     tmp <- tmp[!(tmp$Event.Type %in% c("Pause","Resume")), ]
  30 
  31     first.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) )) - 1
  32     tmp <- tmp[-(1:first.pic),]
  33 
  34     last.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) &
  35                               tmp$Code=="Fertig!" & !is.na(tmp$Code)))
  36     tmp <- tmp[-(last.pic:nrow(tmp)),]
  37 
  38     zeilen <- which(tmp$Event.Type %in% c("Response"))
  39     zeilen <- sort(unique(c(zeilen,zeilen-1)))
  40     zeilen <- zeilen[zeilen>0]
  41     tmp <- tmp[zeilen,]
  42     
  43     responses <- which(tmp$Code %in% c(1,2))
  44     events <- responses-1
  45     tmp$Type <- NA
  46     tmp$Type[responses] <- as.character(tmp$Event.Type[events])
  47 
  48     if(length(tmp$Type[responses])!=length(tmp$Event.Type[events])) { print(file)}
  49     tmp$Event.Code <- NA
  50     tmp$Event.Code[responses] <- as.character(tmp$Code[events])
  51     tmp$Time1 <- NA
  52     tmp$Time1[responses] <- tmp$Time[events]
  53     tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
  54     tmp$Duration[responses] <- as.character(tmp$Duration[events])
  55     tmp$Uncertainty.1[responses] <- as.character(tmp$Uncertainty.1[events])
  56     tmp$ReqTime[responses] <- as.character(tmp$ReqTime[events])
  57     tmp$ReqDur[responses] <- as.character(tmp$ReqDur[events])
  58     tmp$Pair.Index[responses] <- as.character(tmp$Pair.Index[events])
  59     
  60 
  61     tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
  62     tmp <- tmp[tmp$Event.Type=="Response" & !is.na(tmp$Type),]
  63     tmp <- tmp[tmp$Type=="Picture" & !is.na(tmp$Type),]
  64     return(tmp)
  65 }

line 1

line 1 gives the function its name including arguments and their default values
the file argument will take the file name and is without a default
skip takes a number which indicates how many lines will be skipped at the beginning of the file
verbose indicates if the file name will be printed out while reading

   1  read.file <- function(file,skip=3,verbose=T){

line 2

   1  if(verbose) print(paste("read", file))

this line just prints out the name of the file while reading it unless verbose is set to wrong

Line 3-5

here we have the command to read in the text file
it takes the skip argument from above
we are setting sep which indicates the field separator to tab
set header to T because the file contains the columns names
with setting na.strings to the empty string or any string containing only spaces to we indicate to code this fields as missings
more on reading files

   1  tmp <- read.table(file,skip = skip,sep = "\t",
   2                           header=T,na.strings = c(" +",""),
   3                           fill=T)

Line 7

here we remove all rows with a missing Subject field
therefore we need indexing
is.na(x) gives back a logical vector, containing TRUE for missings in x and FALSE for any existing value
read more on indexing/subscripting

   1     tmp <- tmp[!is.na(tmp$Subject),]

Line 8

Line 8 prints the content of Subject to stdout if one of those strings "CH|GA|IJ|Kj|RMK" were detected in this fields because they are not in standard form

   1 if(sum(str_detect(tmp[,1],"CH|GA|IJ|Kj|RMK"))) print(paste("id",tmp[3,1]))

Line 18:

Line 22:

Funtion: Reading All Files from a DIRECTORY

   1 read.files <- function(filesdir,skip=3,...){
   2     files <- paste(filedir,dir(filedir),sep="/")
   3     Reduce(rbind,lapply(files,read.file,skip=skip))}

RstatisTik/RstatisTikPortal/RcourSe/FinalFunction (zuletzt geändert am 2015-03-16 17:23:05 durch mandy.vogel@googlemail.com)

-  ⇤ ← Revision 1 vom 2015-03-15 08:06:07 → 
  Größe: 2352
  Autor: mandy.vogel@googlemail.com
  Kommentar:
+   ← Revision 5 vom 2015-03-15 14:39:56 → ⇥
  Größe: 5105
  Autor: mandy.vogel@googlemail.com
  Kommentar:
-Gelöschter Text ist auf diese Art markiert.
+Hinzugefügter Text ist auf diese Art markiert.
 Zeile 4:
+Here is the function as whole, below we go through it line by line.
-Zeile 5:
+Zeile 7:
-read.file <- function(file,skip=3,verbose=T){
+read.file <- function(file,skip,verbose=T){
-Zeile 7:
+Zeile 9:
-    tmp <- read.table(file,skip = skip,sep = "\t",header=T,na.strings = c(" +",""))
    tmp <- tmp[-1,]
+    tmp <- read.table(file,skip = skip,sep = "\t",
                      header=T,na.strings = c(" +",""),
                      fill=T)
    
    tmp <- tmp[!is.na(tmp$Subject),] 
    
    if(sum(str_detect(tmp[,1],"CH|GA|IJ|Kj|RMK"))) print(paste("id",tmp[3,1]))

    if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL)
-Zeile 13:
+Zeile 23:
-Zeile 14:
+Zeile 25:
+         pause <- which(tmp$Event.Type=="Picture" & tmp$Code=="Pause")
    if(length(pause)>0){
        drei <- which(tmp$Code==3 & !is.na(tmp$Code))
        drei <- drei[drei > pause][1:2]
        if(pause + 1 < drei[1]){
            tmp <- tmp[-(pause:drei[2]),]
        }}
-Zeile 16:
+Zeile 37:
-    first3 <- min(which(tmp$Code==3 & !is.na(tmp$Code)))
    equalsnot3 <- which(!(tmp$Code==3 & !is.na(tmp$Code)))
    first <- min(equalsnot3[equalsnot3 > first3])
    tmp <- tmp[-c(1:first),]
+    first.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) )) - 1
    tmp <- tmp[-(1:first.pic),]
-Zeile 22:
+Zeile 40:
-    letzte <- max(which(tmp$Code==3 & !is.na(tmp$Code)))
    tmp <- tmp[-c(letzte:nrow(tmp)),]
+    last.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) &
                              tmp$Code=="Fertig!" & !is.na(tmp$Code)))
    tmp <- tmp[-(last.pic:nrow(tmp)),]
-Zeile 38:
+Zeile 57:
+    tmp$Time1 <- NA
    tmp$Time1[responses] <- tmp$Time[events]
-Zeile 44:
+Zeile 65:
-Zeile 52:
+Zeile 74:
-[[#CA-efbf8a93fbd22f17bfbdcb535a155bbb9c389093_18|Line 18]]:
+=== line 1 ===
 * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_1|line 1]] gives the function its name including arguments and their default values
 * the file argument will take the file name and is without a default
 * skip takes a number which indicates how many lines will be skipped at the beginning of the file
 * verbose indicates if the file name will be printed out while reading

{{{#!highlight r
 read.file <- function(file,skip=3,verbose=T){
}}}

=== line 2 ===
{{{#!highlight r
 if(verbose) print(paste("read", file))
}}}

 * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_2|this line]] just prints out the name of the file while reading it unless verbose is set to wrong

=== Line 3-5 ===
 * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_3|here]] we have the command to read in the text file
 * it takes the skip argument from above
 * we are setting sep which indicates the field separator to tab
 * set header to T because the file contains the columns names
 * with setting na.strings to the empty string or any string containing only spaces to we indicate to code this fields as missings
 * [[/ReadingFiles|more on reading files]] 
{{{#!highlight r
 tmp <- read.table(file,skip = skip,sep = "\t",
                          header=T,na.strings = c(" +",""),
                          fill=T)
}}}

=== Line 7 ===
 * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_7|here]] we remove all rows with a missing Subject field
 * therefore we need indexing
 * is.na(x) gives back a logical vector, containing TRUE for missings in x and FALSE for any existing value
 * [[/indeXing|read more on indexing/subscripting]]
{{{#!highlight r
    tmp <- tmp[!is.na(tmp$Subject),] 

}}}

=== Line 8 ===
 * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_8|Line 8]] prints the content of Subject to stdout if one of those strings "CH|GA|IJ|Kj|RMK" were detected in this fields because they are not in standard form


{{{#!highlight r
if(sum(str_detect(tmp[,1],"CH|GA|IJ|Kj|RMK"))) print(paste("id",tmp[3,1]))
}}}


[[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_18|Line 18]]:

Quick Links

Search Wiki

Page Tools