简体   繁体   中英

Easiest way to re-arrange data frame in R

There are a million and one plus sites for teaching data wrangling and organization in R, but I'm not sure what will be the most efficient given my problem/ I know how to do this easily in python, but what's an equivalent easy way to do this in R?

Say, for example, I have a data frame that looks like this:

ROI <- c("a_01","a_02","a_03","b_01","b_02","b_03")
summer_1 <- runif(6, min=0, max=1)
winter_1 <- runif(6, min=0, max=1)
summer_2 <- runif(6, min=0, max=1)
winter_2 <- runif(6, min=0, max=1)
summer_3 <- runif(6, min=0, max=1)
winter_3 <- runif(6, min=0, max=1)
summer_4 <- runif(6, min=0, max=1)
winter_4 <- runif(6, min=0, max=1)
df <- data.frame(ROI,summer_1,winter_1,summer_2,winter_2,summer_3,winter_3,summer_4,winter_4)

> head(df)

ROI   summer_1 winter_1 summer_2 winter_2 summer_3 winter_3 summer_4 winter_4
a_01  0.29930  0.65683  0.37349  0.88818  0.35568 0.95592 0.08095 0.07626
a_02  0.20637  0.91795  0.32142  0.81373  0.31344 0.92150 0.05090 0.04731
a_03  0.20925  0.92048  0.32336  0.155956 0.60364 0.155893 0.06320 0.05835
b_01  0.23676  0.108526 0.63557 0.92560  0.46017 0.76339 0.06265 0.05079

But I want to re-arrange the columns so that it looks like this:

ROI   no  season value
a     1   summer 81.33328 
a     2   summer 15.34663
...

and so forth

So far I have this:

library(stringr)
df$new <- str_split_fixed(dat$ROI, "_", 2)

How else can I best approach this?

We can do this with tidyverse

library(tidyverse)
#split the 'ROI' into two columns
res <- separate(df, ROI, into = c("ROI", 'no'), convert = TRUE) %>% 
          #reshape from wide to long format 
          gather(season, value, summer_1:winter_2) %>%
          #split the season column into two
          separate(season, into = c('season', 'n')) %>%
          #remove the columns that are not needed
          select(-n)

head(res)
#  ROI no season    value
#1   a  1 summer 29.25740
#2   a  2 summer 22.48911
#3   a  3 summer 70.42230
#4   b  1 summer 51.88971
#5   b  2 summer 66.26196
#6   b  3 summer 92.04438

Or another option is to split the columns with cSplit , use melt from data.table to convert it to 'long' format

library(splitstackshape)
res2 <- setnames(melt(cSplit(df, "ROI", sep="_"), id.var = c("ROI_1", "ROI_2"), 
  variable.name = "season"), 1:2, c("ROI", "no"))[, season := sub("_\\d+", "", season)][]
head(res2)
#   ROI no season    value
#1:   a  1 summer 29.25740
#2:   a  2 summer 22.48911
#3:   a  3 summer 70.42230
#4:   b  1 summer 51.88971
#5:   b  2 summer 66.26196
#6:   b  3 summer 92.04438

data

set.seed(24)
ROI <- c("a_01","a_02","a_03","b_01","b_02","b_03")
summer_1 <- runif(6, min=0, max=100)
winter_1 <- runif(6, min=0, max=100)
summer_2 <- runif(6, min=0, max=100)
winter_2 <- runif(6, min=0, max=100)
df <- data.frame(ROI,summer_1,winter_1,summer_2,winter_2)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM