简体   繁体   中英

Bring wide data into long format

I have a file like below and wanted to transform it into the R data frame like given in the output

        A    B    C     D   E   
2010    25   74   85    88  89
2011    27   86   97    99  
2012    37   115  131   
2013    47   146            
2014    56  

Output:

Year  Year_No   Division   Amount 
2010    1       A          25
2010    1       B          74
2010    1       C          85
2010    1       D          88
2010    1       E          89
2011    2       A          27
2011    2       B          86
2011    2       C          97
2011    2       D          99
2012    3       A          37
2012    3       B          115
2012    3       C          131
2013    4       A          47
2013    4       B          146
2014    5       A          56

I would really appreciate If anyone could help me in solve this issue

We can use functions from the tidyverse .

library(tidyverse)

dt2 <- dt %>%
  rownames_to_column("Year") %>%
  rowid_to_column("Year_No") %>%
  gather(Division, Amount, -Year, -Year_No, na.rm = TRUE) %>%
  arrange(Year_No, Division) %>% 
  select(Year_No, Year, Division, Amount)
dt2
   Year_No Year Division Amount
1        1 2010        A     25
2        1 2010        B     74
3        1 2010        C     85
4        1 2010        D     88
5        1 2010        E     89
6        2 2011        A     27
7        2 2011        B     86
8        2 2011        C     97
9        2 2011        D     99
10       3 2012        A     37
11       3 2012        B    115
12       3 2012        C    131
13       4 2013        A     47
14       4 2013        B    146
15       5 2014        A     56

Data

dt <- read.table(text = "        A    B    C     D   E   
2010    25   74   85    88  89
                 2011    27   86   97    99  NA 
                 2012    37   115  131   NA  NA
                 2013    47   146  NA    NA  NA           
                 2014    56  NA  NA  NA  NA",
                 header = TRUE)

We could convert to matrix and then melt it

library(reshape2)
library(data.table)
dt <- setDT(melt(as.matrix(df1), na.rm = TRUE))[ , YearNo := .GRP, Var1][order(Var1)]
setnames(dt, c("Var1", "Var2", "value"), c("Year", "Division", "Amount"))[]
#     Year Division Amount YearNo
# 1: 2010        A     25      1
# 2: 2010        B     74      1
# 3: 2010        C     85      1
# 4: 2010        D     88      1
# 5: 2010        E     89      1
# 6: 2011        A     27      2
# 7: 2011        B     86      2
# 8: 2011        C     97      2
# 9: 2011        D     99      2
#10: 2012        A     37      3
#11: 2012        B    115      3
#12: 2012        C    131      3
#13: 2013        A     47      4
#14: 2013        B    146      4
#15: 2014        A     56      5

NOTE: The assumption is that the missing values are NA as it seems to be numeric columns ie 'A' to 'E'

data

df1 <- structure(list(A = c(25L, 27L, 37L, 47L, 56L), B = c(74L, 86L, 
115L, 146L, NA), C = c(85L, 97L, 131L, NA, NA), D = c(88L, 99L, 
NA, NA, NA), E = c(89L, NA, NA, NA, NA)), .Names = c("A", "B", 
"C", "D", "E"), class = "data.frame", row.names = c("2010", "2011", 
"2012", "2013", "2014"))

A simple solution in base

# create the data
mat <- matrix(1:30,6,5)
for(z in 1:nrow(mat)){
  a <- (1:(5-z+1))*-1
  mat[z,a] <- NA
}
rownames(mat) <- 2012:2017
colnames(mat) <- LETTERS[1:5]
mat

# start the task
col <- rep(colnames(mat), each=nrow(mat))
value <- as.vector(mat)
row <- rownames(mat)
table <- data.frame(col,row,value)
table <- table[!is.na(table$value),]
table

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM