简体   繁体   中英

Tidyverse: Reduce variables by group

I have a data frame that looks like this:

ID  pick1      pick2     pick3
1   NA         21/11/29  21/11/30
2   21/11/28   21/11/29  NA
3   21/11/28   NA        21/11/30   
4   NA         21/11/29  21/11/30

Each participant (ID) could pick 2 dates out of 3 options. Now I want to summarize the selected dates to get a tibble like this:

ID  date1      date2
1   21/11/29   21/11/30
2   21/11/28   21/11/29
3   21/11/28   21/11/30   
4   21/11/29   21/11/30

However, I can't get it working using tidyverse functions only. I have started to use this library but couldn't find a solution for my issue online

One option is with rowwise - group by rows, do the sort with na.last as TRUE, keep the sorted output in a list , unnest to multiple columns, and select only columns having at least one non-NA elements

library(dplyr)
library(tidyr)
library(stringr)
 df1 %>% 
   rowwise %>% 
   transmute(ID, date = list(sort(c_across(starts_with('pick')), 
       na.last = TRUE))) %>% 
   ungroup %>%
   unnest_wider(date) %>%
   rename_with(~ str_c('date', seq_along(.)), -ID) %>%
   select(where(~ any(!is.na(.))))

-output

# A tibble: 4 × 3
     ID date1    date2   
  <int> <chr>    <chr>   
1     1 21/11/29 21/11/30
2     2 21/11/28 21/11/29
3     3 21/11/28 21/11/30
4     4 21/11/29 21/11/30

or reshape to 'long' format with pivot_longer remove the NA s and reshape it back to 'wide' format

library(stringr)
df1 %>% 
   pivot_longer(cols = -ID, values_drop_na = TRUE) %>%
   group_by(ID) %>% 
   mutate(name = str_c('date', row_number())) %>%
   ungroup %>% 
   pivot_wider(names_from = name, values_from = value)

-output

# A tibble: 4 × 3
     ID date1    date2   
  <int> <chr>    <chr>   
1     1 21/11/29 21/11/30
2     2 21/11/28 21/11/29
3     3 21/11/28 21/11/30
4     4 21/11/29 21/11/30

data

df1 <- structure(list(ID = 1:4, pick1 = c(NA, "21/11/28", "21/11/28", 
NA), pick2 = c("21/11/29", "21/11/29", NA, "21/11/29"), pick3 = c("21/11/30", 
NA, "21/11/30", "21/11/30")), class = "data.frame",
 row.names = c(NA, 
-4L))

You can do the pivot long then back to wide method from @akrun's answer with data.table. The syntax is a little more concise

df1 <- structure(list(ID = 1:4, pick1 = c(NA, "21/11/28", "21/11/28", 
NA), pick2 = c("21/11/29", "21/11/29", NA, "21/11/29"), pick3 = c("21/11/30", 
NA, "21/11/30", "21/11/30")), class = "data.frame",
 row.names = c(NA, 
-4L))

library(data.table)
setDT(df1)

dcast(
  melt(df1, 'ID', na.rm = TRUE),
  ID ~ paste0('pick', rowid(ID)))

#>    ID    pick1    pick2
#> 1:  1 21/11/29 21/11/30
#> 2:  2 21/11/28 21/11/29
#> 3:  3 21/11/28 21/11/30
#> 4:  4 21/11/29 21/11/30

Created on 2021-11-29 by the reprex package (v2.0.1)

Yet another solution:

library(tidyverse)

df <- data.frame(
  stringsAsFactors = FALSE,
  ID = c(1L, 2L, 3L, 4L),
  pick1 = c(NA, "21/11/28", "21/11/28", NA),
  pick2 = c("21/11/29", "21/11/29", NA, "21/11/29"),
  pick3 = c("21/11/30", NA, "21/11/30", "21/11/30")
)

df %>% 
  pivot_longer(cols = str_c("pick",1:3), values_drop_na = T) %>% 
  mutate(name = rep(c("date1","date2"), n()/2)) %>% 
  pivot_wider(ID)

#> # A tibble: 4 × 3
#>      ID date1    date2   
#>   <int> <chr>    <chr>   
#> 1     1 21/11/29 21/11/30
#> 2     2 21/11/28 21/11/29
#> 3     3 21/11/28 21/11/30
#> 4     4 21/11/29 21/11/30

And with tidyr::unnest_wider :

library(tidyverse)

df %>% 
  pivot_longer(cols = str_c("pick",1:3),values_drop_na = T) %>% 
  mutate(name = "date") %>% 
  pivot_wider(ID, values_fn = list) %>% 
  unnest_wider(col="date", names_sep = "")

#> # A tibble: 4 × 3
#>      ID date1    date2   
#>   <int> <chr>    <chr>   
#> 1     1 21/11/29 21/11/30
#> 2     2 21/11/28 21/11/29
#> 3     3 21/11/28 21/11/30
#> 4     4 21/11/29 21/11/30

What about base R?

df <- read.table(text = "ID  pick1      pick2     pick3
1   NA         21/11/29  21/11/30
2   21/11/28   21/11/29  NA
3   21/11/28   NA        21/11/30   
4   NA         21/11/29  21/11/30", header = TRUE)

data.frame(t(apply(df, 1, function(x) x[!is.na(x)])))
#>   X1       X2       X3
#> 1  1 21/11/29 21/11/30
#> 2  2 21/11/28 21/11/29
#> 3  3 21/11/28 21/11/30
#> 4  4 21/11/29 21/11/30

Created on 2021-11-29 by the reprex package (v2.0.1)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM