简体   繁体   中英

How to summarize by sets of grouping variables in R and dplyr?

I want to group a data frame using different sets of grouping variables. For each group I want to count the number of observations (or summarize in any other way) and then collect all results in one data frame.

Important: I want to define the sets of grouping variables programmatically, for example as a list.

How do I achieve this in the tidyverse?

Here is my attempt:

library(tidyverse)

count_by_group <- function(...) {
  mtcars %>%
    count(...) %>%
    mutate(
      grouping_variable = paste(ensyms(...), collapse = "."),
      group = paste(!!!enquos(...), sep = ".")
    ) %>%
    select(grouping_variable, group, n)
}

# I want this ...
bind_rows(
  count_by_group(cyl),
  count_by_group(gear),
  count_by_group(cyl, gear)
)
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

# ... but without the repetition of "count_by_group(var)".
# The following does not work:
map_dfr(
  list(
    cyl,
    gear,
    c(cyl, gear)
  ),
  count_by_group
)
#> Error in map(.x, .f, ...): object 'cyl' not found

Created on 2020-09-17 by the reprex package (v0.3.0)

Update (2020-10-12): More transparent solution (thanks to @LionelHenry)

library(tidyverse)

count_by_group <- function(...) {
  dots <- enquos(..., .named = TRUE)
  names <- names(dots)

  counted <- count(mtcars, !!!dots)

  group <- counted %>%
    select(-n) %>%
    rowwise() %>%
    mutate(paste(c_across(), collapse = ".")) %>%
    pull()

  # # Equivalently:
  # group <- counted %>%
  #   select(-n) %>%
  #   pmap(counted, paste, sep = ".")

  counted %>%
    mutate(
      grouping_variable = paste(names, collapse = "."),
      group = group
    ) %>%
    select(grouping_variable, group, n)
}

grouping_variables <- list(
  vars(cyl),
  vars(gear),
  vars(cyl, gear)
)

map_dfr(grouping_variables, ~ count_by_group(!!! .x))
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

Created on 2020-10-12 by the reprex package (v0.3.0)


I just found that this works!

library(tidyverse)

count_by_group <- function(...) {
  mtcars %>%
    count(...) %>%
    mutate(
      grouping_variable = paste(ensyms(...), collapse = "."),
      group = paste(!!!enquos(...), sep = ".")
    ) %>%
    select(grouping_variable, group, n)
}

grouping_variables <- list(
  vars(cyl),
  vars(gear),
  vars(cyl, gear)
)

map_dfr(grouping_variables, ~count_by_group(!!! .))
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

Created on 2020-10-12 by the reprex package (v0.3.0)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM