[英]R complex data frame reshape
我需要从这个重塑一个数据框:
ID Var1 Var2 Var3
1 c1 a 0,227067762
1 c1 b 0,241427375
2 c2 c 0,134629052
2 c2 j 0,23696906
3 c1 l 0,352996679
3 c1 j 0,646452101
3 c1 c 0,041323564
3 c1 k 0,191328019
4 c2 t 0,490643088
4 c2 m 0,271466925
4 c2 a 0,923731785
4 c2 r 0,105746835
对此:
ID Var1 a b c … l m …
1 c1 0,227067762 0,241427375 0 … 0 0 …
2 c2 0 0 0,134629052 … 0 0 …
3 c1 0 0 0 … 0,352996679 0 …
4 c2 0,923731785 0 0 … 0 0,271466925 …
我尝试使用 reshape 的变化,但我没有得到我正在寻找的结果。
你能帮助我吗?
使用 reshape2 中的reshape2
dcast()
:
library(reshape2)
#Code
new <- dcast(ID+Var1~Var2,data=df,fill = 0)
Output:
ID Var1 a b c j k l m r t
1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
2 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
3 3 c1 0 0 0,041323564 0,646452101 0,191328019 0,352996679 0 0 0
4 4 c2 0,923731785 0 0 0 0 0 0,271466925 0,105746835 0,490643088
使用的一些数据:
#Data
df <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1",
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j",
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375",
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564",
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))
或使用base-R
reshape()
function 但需要额外的步骤:
#Code2
new <- reshape(df,timevar = 'Var2',idvar = c('ID','Var1'),direction = 'wide')
new[is.na(new)]<-0
Output:
ID Var1 Var3.a Var3.b Var3.c Var3.j Var3.l Var3.k Var3.t Var3.m Var3.r
1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
3 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
5 3 c1 0 0 0,041323564 0,646452101 0,352996679 0,191328019 0 0 0
9 4 c2 0,923731785 0 0 0 0 0 0,490643088 0,271466925 0,105746835
我们可以使用来自pivot_wider
的tidyr
library(tidyr)
library(dplyr)
df1 %>%
pivot_wider(names_from = Var2, values_from = Var3, values_fill = '0')
-输出
# A tibble: 4 x 11
# ID Var1 a b c j l k t m r
# <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
#2 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
#3 3 c1 0 0 0,041323564 0,646452101 0,352996679 0,191328019 0 0 0
#4 4 c2 0,923731785 0 0 0 0 0 0,490643088 0,271466925 0,105746835
或使用来自dcast
的data.table
library(data.table)
dcast(setDT(df1), ID + Var1 ~ Var2, value.var = 'Var3', fill = 0)
-输出
# ID Var1 a b c j k l m r t
#1: 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
#2: 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
#3: 3 c1 0 0 0,041323564 0,646452101 0,191328019 0,352996679 0 0 0
#4: 4 c2 0,923731785 0 0 0 0 0 0,271466925 0,105746835 0,490643088
df1 <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1",
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j",
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375",
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564",
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.