简体   繁体   中英

Dataframe long to wide format

Given the following dataframe

df <-  structure(list(Group = c("A", "A", "B", "B", "C", "C", "D", "D", 
"E", "E"), TEST_A = c("neg", "neg", "neg", "neg", "neg", "neg", 
"neg", "pos", "pos", "neg"), TEST_B = c("neg", "neg", "neg", 
"neg", "neg", "neg", "neg", "neg", "neg", "neg"), RESULT_NOW = c(0L, 
0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L), RESULTS_BEFORE = c(0L, 0L, 
0L, NA, 0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 10L), class = "data.frame")

Looks as follow:

> df

    Group TEST_A TEST_B RESULT_NOW RESULTS_BEFORE
1      A    neg    neg          0              0
2      A    neg    neg          0              0
3      B    neg    neg          0              0
4      B    neg    neg         NA             NA
5      C    neg    neg          0              0
6      C    neg    neg          0              0
7      D    neg    neg          0              0
8      D    pos    neg          0              0
9      E    pos    neg          0              0
10     E    neg    neg          0              0

I would like to create the follwing dataframe:

Group TEST_A_1 TEST_A_2 TEST_B_1 TEST_B_1 RESULT_NOW_1  RESULT_NOW_2 RESULTS_BEFORE_1 RESULTS_BEFORE_2
    A   neg        neg     neg     neg       0              0            0         0
    B   neg        neg     neg     neg       0              NA           0        NA
    C   neg        neg     neg     neg       0              0            0         0
    ...
    E   pos        neg     neg     neg       0              0            0         0
   

So in fact is to move dataframe to wide format using the Group column. Note that the group column can contains up to 4 rows per group. Inn this exemple there are only two rows per group Thanks for your help.

You can try this:

library(tidyverse)
library(reshape2)
#Melt
df2 <- reshape2::melt(df,id.vars = "Group")
df2 %>% group_by(Group,variable) %>%mutate(index=1:length(variable),variable=paste0(variable,'.',index)) %>%
  select(-index) -> df2
#Pivot
df2 %>%
  group_by(variable) %>% 
  mutate(row = row_number()) %>%
  pivot_wider(id_cols = c(row, Group),
              names_from = variable, 
              values_from = c(value)) %>%
  select(-row)

# A tibble: 5 x 9
  Group TEST_A.1 TEST_A.2 TEST_B.1 TEST_B.2 RESULT_NOW.1 RESULT_NOW.2 RESULTS_BEFORE.1 RESULTS_BEFORE.2
  <chr> <chr>    <chr>    <chr>    <chr>    <chr>        <chr>        <chr>            <chr>           
1 A     neg      neg      neg      neg      0            0            0                0               
2 B     neg      neg      neg      neg      0            <NA>         0                <NA>            
3 C     neg      neg      neg      neg      0            0            0                0               
4 D     neg      pos      neg      neg      0            0            0                0               
5 E     pos      neg      neg      neg      0            0            0                0 

I hope this can help you.

The trick is to first add an id column. In the second step we can simply apply tidyr::pivot_wider to achieve the desired result:

df <-  structure(list(Group = c("A", "A", "B", "B", "C", "C", "D", "D", 
                                "E", "E"), TEST_A = c("neg", "neg", "neg", "neg", "neg", "neg", 
                                                      "neg", "pos", "pos", "neg"), TEST_B = c("neg", "neg", "neg", 
                                                                                              "neg", "neg", "neg", "neg", "neg", "neg", "neg"), RESULT_NOW = c(0L, 
                                                                                                                                                               0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L), RESULTS_BEFORE = c(0L, 0L, 
                                                                                                                                                                                                                       0L, NA, 0L, 0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 10L), class = "data.frame")

library(dplyr)
library(tidyr)

df %>% 
  group_by(Group) %>% 
  mutate(id = row_number(Group)) %>%
  ungroup() %>% 
  pivot_wider(names_from = id, values_from = c(TEST_A, TEST_B, RESULT_NOW, RESULTS_BEFORE))
#> # A tibble: 5 x 9
#>   Group TEST_A_1 TEST_A_2 TEST_B_1 TEST_B_2 RESULT_NOW_1 RESULT_NOW_2
#>   <chr> <chr>    <chr>    <chr>    <chr>           <int>        <int>
#> 1 A     neg      neg      neg      neg                 0            0
#> 2 B     neg      neg      neg      neg                 0           NA
#> 3 C     neg      neg      neg      neg                 0            0
#> 4 D     neg      pos      neg      neg                 0            0
#> 5 E     pos      neg      neg      neg                 0            0
#> # ... with 2 more variables: RESULTS_BEFORE_1 <int>, RESULTS_BEFORE_2 <int>

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM