welcome: please sign in
location: Änderungen von "RstatisTik/RstatisTikPortal/RcourSe/DataWrangling"
Unterschiede zwischen den Revisionen 1 und 3 (über 2 Versionen hinweg)
Revision 1 vom 2015-03-04 16:34:22
Größe: 127
Kommentar:
Revision 3 vom 2015-03-05 07:54:42
Größe: 2352
Kommentar:
Gelöschter Text ist auf diese Art markiert. Hinzugefügter Text ist auf diese Art markiert.
Zeile 1: Zeile 1:
= Final Functions =
== Funtion: Reading File ==
Zeile 2: Zeile 5:
x <- x %>% group_by(group) %>%
           mutate(sum=sum(var))
mean(x)
mm <- lm(y ~ x, data = data)
read.file <- function(file,skip=3,verbose=T){
    if(verbose) print(paste("read", file))
    tmp <- read.table(file,skip = skip,sep = "\t",header=T,na.strings = c(" +",""))
    tmp <- tmp[-1,]
    tmp <- lapply(tmp,function(x) {
        if( class(x) %in% c("character","factor") ){
            x <- factor(gsub(" ","",as.character(x)))
            return(x)}else{ return(x) }})
    tmp <- as.data.frame(tmp)
    tmp <- tmp[!(tmp$Event.Type %in% c("Pause","Resume")), ]

    first3 <- min(which(tmp$Code==3 & !is.na(tmp$Code)))
    equalsnot3 <- which(!(tmp$Code==3 & !is.na(tmp$Code)))
    first <- min(equalsnot3[equalsnot3 > first3])
    tmp <- tmp[-c(1:first),]
    

    letzte <- max(which(tmp$Code==3 & !is.na(tmp$Code)))
    tmp <- tmp[-c(letzte:nrow(tmp)),]

    zeilen <- which(tmp$Event.Type %in% c("Response"))
    zeilen <- sort(unique(c(zeilen,zeilen-1)))
    zeilen <- zeilen[zeilen>0]
    tmp <- tmp[zeilen,]
    
    responses <- which(tmp$Code %in% c(1,2))
    events <- responses-1
    tmp$Type <- NA
    tmp$Type[responses] <- as.character(tmp$Event.Type[events])

    if(length(tmp$Type[responses])!=length(tmp$Event.Type[events])) { print(file)}
    tmp$Event.Code <- NA
    tmp$Event.Code[responses] <- as.character(tmp$Code[events])
    tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
    tmp$Duration[responses] <- as.character(tmp$Duration[events])
    tmp$Uncertainty.1[responses] <- as.character(tmp$Uncertainty.1[events])
    tmp$ReqTime[responses] <- as.character(tmp$ReqTime[events])
    tmp$ReqDur[responses] <- as.character(tmp$ReqDur[events])
    tmp$Pair.Index[responses] <- as.character(tmp$Pair.Index[events])

    tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
    tmp <- tmp[tmp$Event.Type=="Response" & !is.na(tmp$Type),]
    tmp <- tmp[tmp$Type=="Picture" & !is.na(tmp$Type),]
    return(tmp)
}
Zeile 7: Zeile 51:

[[#CA-efbf8a93fbd22f17bfbdcb535a155bbb9c389093_18|Line 18]]:


[[#CA-efbf8a93fbd22f17bfbdcb535a155bbb9c389093_22|Line 22]]:



== Funtion: Reading All Files from a DIRECTORY ==

{{{#!highlight r
read.files <- function(filesdir,skip=3,...){
    files <- paste(filedir,dir(filedir),sep="/")
    Reduce(rbind,lapply(files,read.file,skip=skip))}
}}}

Final Functions

Funtion: Reading File

   1 read.file <- function(file,skip=3,verbose=T){
   2     if(verbose) print(paste("read", file))
   3     tmp <- read.table(file,skip = skip,sep = "\t",header=T,na.strings = c(" +",""))
   4     tmp <- tmp[-1,]
   5     tmp <- lapply(tmp,function(x) {
   6         if( class(x) %in% c("character","factor") ){
   7             x <- factor(gsub(" ","",as.character(x)))
   8             return(x)}else{ return(x) }})
   9     tmp <- as.data.frame(tmp)
  10     tmp <- tmp[!(tmp$Event.Type %in% c("Pause","Resume")), ]
  11 
  12     first3 <- min(which(tmp$Code==3 & !is.na(tmp$Code)))
  13     equalsnot3 <- which(!(tmp$Code==3 & !is.na(tmp$Code)))
  14     first <- min(equalsnot3[equalsnot3 > first3])
  15     tmp <- tmp[-c(1:first),]
  16     
  17 
  18     letzte <- max(which(tmp$Code==3 & !is.na(tmp$Code)))
  19     tmp <- tmp[-c(letzte:nrow(tmp)),]
  20 
  21     zeilen <- which(tmp$Event.Type %in% c("Response"))
  22     zeilen <- sort(unique(c(zeilen,zeilen-1)))
  23     zeilen <- zeilen[zeilen>0]
  24     tmp <- tmp[zeilen,]
  25     
  26     responses <- which(tmp$Code %in% c(1,2))
  27     events <- responses-1
  28     tmp$Type <- NA
  29     tmp$Type[responses] <- as.character(tmp$Event.Type[events])
  30 
  31     if(length(tmp$Type[responses])!=length(tmp$Event.Type[events])) { print(file)}
  32     tmp$Event.Code <- NA
  33     tmp$Event.Code[responses] <- as.character(tmp$Code[events])
  34     tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
  35     tmp$Duration[responses] <- as.character(tmp$Duration[events])
  36     tmp$Uncertainty.1[responses] <- as.character(tmp$Uncertainty.1[events])
  37     tmp$ReqTime[responses] <- as.character(tmp$ReqTime[events])
  38     tmp$ReqDur[responses] <- as.character(tmp$ReqDur[events])
  39     tmp$Pair.Index[responses] <- as.character(tmp$Pair.Index[events])
  40 
  41     tmp$Stim.Type[responses] <- as.character(tmp$Stim.Type[events])
  42     tmp <- tmp[tmp$Event.Type=="Response" & !is.na(tmp$Type),]
  43     tmp <- tmp[tmp$Type=="Picture" & !is.na(tmp$Type),]
  44     return(tmp)
  45 }

Line 18:

Line 22:

Funtion: Reading All Files from a DIRECTORY

   1 read.files <- function(filesdir,skip=3,...){
   2     files <- paste(filedir,dir(filedir),sep="/")
   3     Reduce(rbind,lapply(files,read.file,skip=skip))}

RstatisTik/RstatisTikPortal/RcourSe/DataWrangling (zuletzt geändert am 2015-03-05 07:54:42 durch hayd@cbs.mpg.de)