R:Wide to tall for multiple variables

6/14/2016

  • R
  • data management
  • reshape

A snippet! This is a walk through of a logical way of going from wide to tall in a longitudinal data set, beginning with manual specification and culminating in a function that can handle multiple variables.

Basic R knowledge is assumed, this technique is built for readability over efficiency (e.g. use of 'for' loops rather than apply).

This will be most useful as a learning tool if you copy the below code into R, the run through each section one at a time.

# Make some dummy 'wide' data: 10 participants x three measurement occasions for three variables.
      
      dummy<-data.frame(paste0("person_",seq(1:10)),
                        replicate(8, rnorm(200)),
                        sample( LETTERS[1:4], 200, replace=TRUE),
                        sample((0:1), 200, replace=TRUE,prob=c(.2,.8)))
      names(dummy)<-c("ID",paste0("a_",seq(1:3)),paste0("b_",seq(1:3)),paste0("c_",seq(1:3)),"missing")
      dummy<-dummy[1:10,]
      
      # 1) Wide to long for one variable, manual
      
      dummy_tall_manual<-data.frame(rbind(cbind(dummy$ID,dummy$a_1,rep(1,nrow(dummy))),
                                          cbind(dummy$ID,dummy$a_2,rep(2,nrow(dummy))),
                                          cbind(dummy$ID,dummy$a_3,rep(3,nrow(dummy)))))
      names(dummy_tall_manual)<-c("ID","a","occasion")
      # Check: 10 participants x 3 occasions
      table(dummy_tall_manual$ID,dummy_tall_manual$occasion) # Yes: one response per cell.


# 2) Wide to long for one variable, functional (assumes consistent suffix of underscore and number to denote occasion).
      
      wide_to_tall_one<-function(variable, data,maxocs=10){
        # Tests for maximum possible occasions, defaults to 10 but you can choose more.
        possible_ocs<-paste0(variable,"_",seq(1:maxocs))
        test_ocs<-possible_ocs %in% names(data)
        confirmed_ocs<-which(test_ocs)
        to_reshape<-paste0(variable,"_",confirmed_ocs)
        # Loops for each occasion found, binds together as tall.
        accum<-data.frame(ID=as.character(),occasion=as.character(),variable=as.character())
        
        for(i in 1:length(to_reshape)){tallafied<-data.frame(data[,"ID"],paste0("occasion_",i),data[,to_reshape[i]])
        names(tallafied)<-c("ID","occasion",variable)
        accum<-rbind(accum,tallafied)}
        names(accum)<-c("ID","occasion",variable)
        accum<-data.frame(accum)
        return(accum)}
      
      wlo<-wide_to_tall_one("a",dummy) # Reproduces manual result
      # Check: 10 participants x 3 occasions
      table(wlo$ID,wlo$occasion) # Yes: one response per cell. 
      
      wide_to_tall_one("c",dummy) # Beware mixing numeric/string input, check variable type when done.

# 3) Run the above for arbitrary number of variables.

      wide_to_tall_multi<-function(data, maxocs=10, ...){go_through<-c(...)
      # Run first
      accum<-wide_to_tall_one(variable=go_through[1], data=data,maxocs=maxocs)
      for(i in 2:length(go_through)){add<-wide_to_tall_one(variable=go_through[i], data=data,maxocs=maxocs)
      accum<-merge(accum,add,by=c("ID","occasion"),all=T)}
      return(accum)} 
      test<-wide_to_tall_multi(dummy,10,"a","b")
      
      # Check: 10 participants x 3 occasions
      table(test$ID,test$occasion)
      
      # Spot check: correspondence with initial dataset
      dummy[which(dummy$ID=="person_1"),]$a_2
      test[which(test$ID=="person_1" & test$occasion=="occasion_2"),]$a
      
      dummy[which(dummy$ID=="person_6"),]$b_1
      test[which(test$ID=="person_6" & test$occasion=="occasion_1"),]$b
      

(pretty R code courtesy of http://hilite.me/).

Return to blog index

Comments

Leave a Reply



(Your email will not be publicly displayed.)


Captcha Code

Click the image to see another captcha.