繁体   English   中英

R 复杂数据框重塑

[英]R complex data frame reshape

我需要从这个重塑一个数据框:

ID  Var1    Var2    Var3
1   c1      a       0,227067762
1   c1      b       0,241427375
2   c2      c       0,134629052
2   c2      j       0,23696906
3   c1      l       0,352996679
3   c1      j       0,646452101
3   c1      c       0,041323564
3   c1      k       0,191328019
4   c2      t       0,490643088
4   c2      m       0,271466925
4   c2      a       0,923731785
4   c2      r       0,105746835

对此:

ID  Var1    a           b           c           …   l           m           …
1   c1      0,227067762 0,241427375 0           …   0           0           …
2   c2      0           0           0,134629052 …   0           0           …
3   c1      0           0           0           …   0,352996679 0           …
4   c2      0,923731785 0           0           …   0           0,271466925 …

我尝试使用 reshape 的变化,但我没有得到我正在寻找的结果。
你能帮助我吗?

使用 reshape2 中的reshape2 dcast()

library(reshape2)
#Code
new <- dcast(ID+Var1~Var2,data=df,fill = 0)

Output:

  ID Var1           a           b           c           j           k           l           m           r           t
1  1   c1 0,227067762 0,241427375           0           0           0           0           0           0           0
2  2   c2           0           0 0,134629052  0,23696906           0           0           0           0           0
3  3   c1           0           0 0,041323564 0,646452101 0,191328019 0,352996679           0           0           0
4  4   c2 0,923731785           0           0           0           0           0 0,271466925 0,105746835 0,490643088

使用的一些数据:

#Data
df <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1", 
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j", 
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375", 
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564", 
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))

或使用base-R reshape() function 但需要额外的步骤:

#Code2
new <- reshape(df,timevar = 'Var2',idvar = c('ID','Var1'),direction = 'wide')
new[is.na(new)]<-0

Output:

  ID Var1      Var3.a      Var3.b      Var3.c      Var3.j      Var3.l      Var3.k      Var3.t      Var3.m      Var3.r
1  1   c1 0,227067762 0,241427375           0           0           0           0           0           0           0
3  2   c2           0           0 0,134629052  0,23696906           0           0           0           0           0
5  3   c1           0           0 0,041323564 0,646452101 0,352996679 0,191328019           0           0           0
9  4   c2 0,923731785           0           0           0           0           0 0,490643088 0,271466925 0,105746835

我们可以使用来自pivot_widertidyr

library(tidyr)
library(dplyr)
df1 %>%
   pivot_wider(names_from = Var2, values_from = Var3, values_fill = '0')

-输出

# A tibble: 4 x 11
#     ID Var1  a           b           c           j           l           k           t           m           r          
#  <int> <chr> <chr>       <chr>       <chr>       <chr>       <chr>       <chr>       <chr>       <chr>       <chr>      
#1     1 c1    0,227067762 0,241427375 0           0           0           0           0           0           0          
#2     2 c2    0           0           0,134629052 0,23696906  0           0           0           0           0          
#3     3 c1    0           0           0,041323564 0,646452101 0,352996679 0,191328019 0           0           0          
#4     4 c2    0,923731785 0           0           0           0           0           0,490643088 0,271466925 0,105746835

或使用来自dcastdata.table

library(data.table)
dcast(setDT(df1), ID + Var1 ~ Var2, value.var = 'Var3', fill = 0)

-输出

#   ID Var1           a           b           c           j           k           l           m           r           t
#1:  1   c1 0,227067762 0,241427375           0           0           0           0           0           0           0
#2:  2   c2           0           0 0,134629052  0,23696906           0           0           0           0           0
#3:  3   c1           0           0 0,041323564 0,646452101 0,191328019 0,352996679           0           0           0
#4:  4   c2 0,923731785           0           0           0           0           0 0,271466925 0,105746835 0,490643088

数据

df1 <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1", 
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j", 
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375", 
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564", 
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM