I need to reshape a data frame from this:
ID Var1 Var2 Var3
1 c1 a 0,227067762
1 c1 b 0,241427375
2 c2 c 0,134629052
2 c2 j 0,23696906
3 c1 l 0,352996679
3 c1 j 0,646452101
3 c1 c 0,041323564
3 c1 k 0,191328019
4 c2 t 0,490643088
4 c2 m 0,271466925
4 c2 a 0,923731785
4 c2 r 0,105746835
to this:
ID Var1 a b c … l m …
1 c1 0,227067762 0,241427375 0 … 0 0 …
2 c2 0 0 0,134629052 … 0 0 …
3 c1 0 0 0 … 0,352996679 0 …
4 c2 0,923731785 0 0 … 0 0,271466925 …
I tried using variations of reshape but I don't get the result I'm looking for.
Can you help me?
Using dcast()
from reshape2
:
library(reshape2)
#Code
new <- dcast(ID+Var1~Var2,data=df,fill = 0)
Output:
ID Var1 a b c j k l m r t
1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
2 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
3 3 c1 0 0 0,041323564 0,646452101 0,191328019 0,352996679 0 0 0
4 4 c2 0,923731785 0 0 0 0 0 0,271466925 0,105746835 0,490643088
Some data used:
#Data
df <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1",
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j",
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375",
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564",
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))
Or using base-R
reshape()
function but will require an additional step:
#Code2
new <- reshape(df,timevar = 'Var2',idvar = c('ID','Var1'),direction = 'wide')
new[is.na(new)]<-0
Output:
ID Var1 Var3.a Var3.b Var3.c Var3.j Var3.l Var3.k Var3.t Var3.m Var3.r
1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
3 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
5 3 c1 0 0 0,041323564 0,646452101 0,352996679 0,191328019 0 0 0
9 4 c2 0,923731785 0 0 0 0 0 0,490643088 0,271466925 0,105746835
We can use pivot_wider
from tidyr
library(tidyr)
library(dplyr)
df1 %>%
pivot_wider(names_from = Var2, values_from = Var3, values_fill = '0')
-output
# A tibble: 4 x 11
# ID Var1 a b c j l k t m r
# <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#1 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
#2 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
#3 3 c1 0 0 0,041323564 0,646452101 0,352996679 0,191328019 0 0 0
#4 4 c2 0,923731785 0 0 0 0 0 0,490643088 0,271466925 0,105746835
Or using dcast
from data.table
library(data.table)
dcast(setDT(df1), ID + Var1 ~ Var2, value.var = 'Var3', fill = 0)
-output
# ID Var1 a b c j k l m r t
#1: 1 c1 0,227067762 0,241427375 0 0 0 0 0 0 0
#2: 2 c2 0 0 0,134629052 0,23696906 0 0 0 0 0
#3: 3 c1 0 0 0,041323564 0,646452101 0,191328019 0,352996679 0 0 0
#4: 4 c2 0,923731785 0 0 0 0 0 0,271466925 0,105746835 0,490643088
df1 <- structure(list(ID = c(1L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L), Var1 = c("c1", "c1", "c2", "c2", "c1", "c1", "c1", "c1",
"c2", "c2", "c2", "c2"), Var2 = c("a", "b", "c", "j", "l", "j",
"c", "k", "t", "m", "a", "r"), Var3 = c("0,227067762", "0,241427375",
"0,134629052", "0,23696906", "0,352996679", "0,646452101", "0,041323564",
"0,191328019", "0,490643088", "0,271466925", "0,923731785", "0,105746835"
)), class = "data.frame", row.names = c(NA, -12L))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.