Größe: 2352
Kommentar:
|
← Revision 16 vom 2015-03-16 17:23:05 ⇥
Größe: 6105
Kommentar:
|
Gelöschter Text ist auf diese Art markiert. | Hinzugefügter Text ist auf diese Art markiert. |
Zeile 2: | Zeile 2: |
== Funtion: Reading File == | == Reading File() - going through the function line by line == Here is the function as whole, below we go through it line by line. |
Zeile 5: | Zeile 7: |
read.file <- function(file,skip=3,verbose=T){ | read.file <- function(file,skip,verbose=T){ |
Zeile 7: | Zeile 9: |
tmp <- read.table(file,skip = skip,sep = "\t",header=T,na.strings = c(" +","")) tmp <- tmp[-1,] |
tmp <- read.table(file,skip = skip,sep = "\t", header=T,na.strings = c(" +",""), fill=T) tmp <- tmp[!is.na(tmp$Subject),] if(sum(!str_detect(tmp[,1],"^0[012][0-9]_[1-8]$|^0[012][0-9]_test[12]$"))) print(paste("id",tmp$Subject[1])) if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL) |
Zeile 12: | Zeile 23: |
return(x)}else{ return(x) }}) | return(x) } else { return(x) } }) |
Zeile 14: | Zeile 30: |
pause <- which(tmp$Event.Type=="Picture" & tmp$Code=="Pause") if(length(pause)>0){ drei <- which(tmp$Code==3 & !is.na(tmp$Code)) drei <- drei[drei > pause][1:2] if(pause + 1 < drei[1]){ tmp <- tmp[-(pause:drei[2]),] }} |
|
Zeile 16: | Zeile 42: |
first3 <- min(which(tmp$Code==3 & !is.na(tmp$Code))) equalsnot3 <- which(!(tmp$Code==3 & !is.na(tmp$Code))) first <- min(equalsnot3[equalsnot3 > first3]) tmp <- tmp[-c(1:first),] |
first.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) )) - 1 tmp <- tmp[-(1:first.pic),] |
Zeile 22: | Zeile 45: |
letzte <- max(which(tmp$Code==3 & !is.na(tmp$Code))) tmp <- tmp[-c(letzte:nrow(tmp)),] |
last.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) & tmp$Code=="Fertig!" & !is.na(tmp$Code))) tmp <- tmp[-(last.pic:nrow(tmp)),] |
Zeile 38: | Zeile 62: |
tmp$Time1 <- NA tmp$Time1[responses] <- tmp$Time[events] |
|
Zeile 44: | Zeile 70: |
Zeile 52: | Zeile 79: |
[[#CA-efbf8a93fbd22f17bfbdcb535a155bbb9c389093_18|Line 18]]: | === line 1 === * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_1|line 1]] gives the function its name including arguments and their default values * the file argument will take the file name and is without a default * skip takes a number which indicates how many lines will be skipped at the beginning of the file * verbose indicates if the file name will be printed out while reading {{{#!highlight r read.file <- function(file,skip=3,verbose=T){ }}} === line 2 === {{{#!highlight r if(verbose) print(paste("read", file)) }}} * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_2|this line]] just prints out the name of the file while reading it unless verbose is set to wrong === Line 3-5 === * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_3|here]] we have the command to read in the text file * it takes the skip argument from above * we are setting sep which indicates the field separator to tab * set header to T because the file contains the columns names * with setting na.strings to the empty string or any string containing only spaces to we indicate to code this fields as missings * [[/ReadingFiles|more on reading files]] {{{#!highlight r tmp <- read.table(file,skip = skip,sep = "\t", header=T,na.strings = c(" +",""), fill=T) }}} === Line 7 === * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_7|here]] we remove all rows with a missing Subject field * therefore we need indexing * is.na(x) gives back a logical vector, containing TRUE for missings in x and FALSE for any existing value * [[/indeXing|read more on indexing/subscripting]] {{{#!highlight r tmp <- tmp[!is.na(tmp$Subject),] }}} === Line 9-10 === * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_9|Line 9 and 10]] print the content of Subject to stdout if the content is not in standard form * str_detect() is a R function and part of the stringr package it gives back a logical value dependend on if the pattern is contained in the given string * the pattern is a regular expression which is more flexible than to use absolut strings * so we check every entry of Subject, take the negation and sum the resulting logical vector - this sum is zero if no deviant Subject coding is found, otherwise the print command is executed * [[/StringOperations|some basic information about strings and regular expression can be found here]] {{{#!highlight r if(sum(!str_detect(tmp$Subject,"^0[012][0-9]_[1-8]$|^0[012][0-9]_test[12]$"))) print(paste("id",tmp$Subject[1])) }}} === Line 12 === * [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_12|Line 12]] checks if there is at least one line containing the code for a correct (hit) or incorrect (incorrect) answer * if there is no such line the function gives back a NULL value {{{#!highlight r if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL) }}} [[#CA-a8692f97d06bd15e4a572260cf8b72ef3c9d984e_18|Line 18]]: |
Final Functions
Reading File() - going through the function line by line
Here is the function as whole, below we go through it line by line.
1 read.file <- function(file,skip,verbose=T){
2 if(verbose) print(paste("read", file))
3 tmp <- read.table(file,skip = skip,sep = "\t",
4 header=T,na.strings = c(" +",""),
5 fill=T)
6
7 tmp <- tmp[!is.na(tmp$Subject),]
8
9 if(sum(!str_detect(tmp[,1],"^0[012][0-9]_[1-8]$|^0[012][0-9]_test[12]$")))
10 print(paste("id",tmp$Subject[1]))
11
12 if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL)
13
14 tmp <- lapply(tmp,function(x) {
15 if( class(x) %in% c("character","factor") ){
16 x <- factor(gsub(" ","",as.character(x)))
17 return(x)
18 } else {
19 return(x)
20 }
21 })
22
23 tmp <- as.data.frame(tmp)
24
25 pause <- which(tmp$Event.Type=="Picture" & tmp$Code=="Pause")
26 if(length(pause)>0){
27 drei <- which(tmp$Code==3 & !is.na(tmp$Code))
28 drei <- drei[drei > pause][1:2]
29 if(pause + 1 < drei[1]){
30 tmp <- tmp[-(pause:drei[2]),]
31 }}
32
33
34 tmp <- tmp[!(tmp$Event.Type %in% c("Pause","Resume")), ]
35
36 first.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) )) - 1
37 tmp <- tmp[-(1:first.pic),]
38
39 last.pic <- min(which(tmp$Event.Type=="Picture" & !is.na(tmp$Event.Type) &
40 tmp$Code=="Fertig!" & !is.na(tmp$Code)))
41 tmp <- tmp[-(last.pic:nrow(tmp)),]
42
43 zeilen <- which(tmp$Event.Type %in% c("Response"))
44 zeilen <- sort(unique(c(zeilen,zeilen-1)))
45 zeilen <- zeilen[zeilen>0]
46 tmp <- tmp[zeilen,]
47
48 responses <- which(tmp$Code %in% c(1,2))
49 events <- responses-1
50 tmp$Type <- NA
51 tmp$Type[responses] <- as.character(tmp$Event.Type[events])
52
53 if(length(tmp$Type[responses])!=length(tmp$Event.Type[events])) { print(file)}
54 tmp$Event.Code <- NA
55 tmp$Event.Code[responses] <- as.character(tmp$Code[events])
56 tmp$Time1 <- NA
57 tmp$Time1[responses] <- tmp$Time[events]
58 tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
59 tmp$Duration[responses] <- as.character(tmp$Duration[events])
60 tmp$Uncertainty.1[responses] <- as.character(tmp$Uncertainty.1[events])
61 tmp$ReqTime[responses] <- as.character(tmp$ReqTime[events])
62 tmp$ReqDur[responses] <- as.character(tmp$ReqDur[events])
63 tmp$Pair.Index[responses] <- as.character(tmp$Pair.Index[events])
64
65
66 tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
67 tmp <- tmp[tmp$Event.Type=="Response" & !is.na(tmp$Type),]
68 tmp <- tmp[tmp$Type=="Picture" & !is.na(tmp$Type),]
69 return(tmp)
70 }
line 1
line 1 gives the function its name including arguments and their default values
- the file argument will take the file name and is without a default
- skip takes a number which indicates how many lines will be skipped at the beginning of the file
- verbose indicates if the file name will be printed out while reading
1 read.file <- function(file,skip=3,verbose=T){
line 2
1 if(verbose) print(paste("read", file))
this line just prints out the name of the file while reading it unless verbose is set to wrong
Line 3-5
here we have the command to read in the text file
- it takes the skip argument from above
- we are setting sep which indicates the field separator to tab
- set header to T because the file contains the columns names
- with setting na.strings to the empty string or any string containing only spaces to we indicate to code this fields as missings
Line 7
here we remove all rows with a missing Subject field
- therefore we need indexing
- is.na(x) gives back a logical vector, containing TRUE for missings in x and FALSE for any existing value
1 tmp <- tmp[!is.na(tmp$Subject),]
Line 9-10
Line 9 and 10 print the content of Subject to stdout if the content is not in standard form
- str_detect() is a R function and part of the stringr package it gives back a logical value dependend on if the pattern is contained in the given string
- the pattern is a regular expression which is more flexible than to use absolut strings
- so we check every entry of Subject, take the negation and sum the resulting logical vector - this sum is zero if no deviant Subject coding is found, otherwise the print command is executed
some basic information about strings and regular expression can be found here
Line 12
Line 12 checks if there is at least one line containing the code for a correct (hit) or incorrect (incorrect) answer
- if there is no such line the function gives back a NULL value
1 if(sum(tmp$Stim.Type %in% c("hit","incorrect"))==0) return(NULL)