rlang：从NSE函数中的冒号快捷方式获取名称

Question

我正在编写一套用于制作人口统计数据表的函数。 我有一个函数，缩写如下，我需要在几列（ ... ）中gather数据框。 诀窍是我想保留这些列的名称，因为我需要在收集后按顺序放置一列。 在这种情况下，这些列是estimate ， moe ， share ， sharemoe 。

library(tidyverse)
library(rlang)

race <- structure(list(region = c("New Haven", "New Haven", "New Haven", "New Haven", "Outer Ring", "Outer Ring", "Outer Ring", "Outer Ring"), 
    variable = c("white", "black", "asian", "latino", "white", "black", "asian", "latino"), 
    estimate = c(40164, 42970, 6042, 37231, 164150, 3471, 9565, 8518), 
    moe = c(1395, 1383, 697, 1688, 1603, 677, 896, 1052), 
    share = c(0.308, 0.33, 0.046, 0.286, 0.87, 0.018, 0.051, 0.045), 
    sharemoe = c(0.011, 0.011, 0.005, 0.013, 0.008, 0.004, 0.005, 0.006)), 
    class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L))

race
#> # A tibble: 8 x 6
#>   region     variable estimate   moe share sharemoe
#>   <chr>      <chr>       <dbl> <dbl> <dbl>    <dbl>
#> 1 New Haven  white       40164  1395 0.308    0.011
#> 2 New Haven  black       42970  1383 0.33     0.011
#> 3 New Haven  asian        6042   697 0.046    0.005
#> 4 New Haven  latino      37231  1688 0.286    0.013
#> 5 Outer Ring white      164150  1603 0.87     0.008
#> 6 Outer Ring black        3471   677 0.018    0.004
#> 7 Outer Ring asian        9565   896 0.051    0.005
#> 8 Outer Ring latino       8518  1052 0.045    0.006

在函数gather_arrange ，我通过映射rlang::exprs(...)并转换为字符来获取...列的名称。 将这些列的名称提取为字符串是很困难的，所以这可能是改进或重写的地方。 但这符合我的要求，将列type作为具有级别estimate ， moe ， share ， sharemoe的因子。

gather_arrange <- function(df, ..., group = variable) {
  gather_cols <- rlang::quos(...)
  grp_var <- rlang::enquo(group)
  gather_names <- purrr::map_chr(rlang::exprs(...), as.character)

  df %>%
    tidyr::gather(key = type, value = value, !!!gather_cols) %>%
    dplyr::mutate(!!rlang::quo_name(grp_var) := !!grp_var %>% 
                  forcats::fct_inorder() %>% forcats::fct_rev()) %>%
    dplyr::mutate(type = as.factor(type) %>% forcats::fct_relevel(gather_names)) %>%
    arrange(type)
}

race %>% gather_arrange(estimate, moe, share, sharemoe)
#> # A tibble: 32 x 4
#>    region     variable type      value
#>    <chr>      <fct>    <fct>     <dbl>
#>  1 New Haven  white    estimate  40164
#>  2 New Haven  black    estimate  42970
#>  3 New Haven  asian    estimate   6042
#>  4 New Haven  latino   estimate  37231
#>  5 Outer Ring white    estimate 164150
#>  6 Outer Ring black    estimate   3471
#>  7 Outer Ring asian    estimate   9565
#>  8 Outer Ring latino   estimate   8518
#>  9 New Haven  white    moe        1395
#> 10 New Haven  black    moe        1383
#> # ... with 22 more rows

但我想选择使用冒号表示法来选择列，即estimate:sharemoe相当于输入所有这些列名。

race %>% gather_arrange(estimate:sharemoe)
#> Error: Result 1 is not a length 1 atomic vector

这失败了，因为它无法从rlang::exprs(...)提取列名。 如何使用此表示法获取列名？ 提前致谢！

Answer 1

我认为您正在寻找的函数是tidyselect::vars_select() ，它由select和rename在内部使用来完成此任务。 它返回变量名称的字符向量。 例如：

> tidyselect::vars_select(letters, g:j)
  g   h   i   j 
"g" "h" "i" "j"

这允许您使用对dplyr::select有效的所有相同语法。

Answer 2

我们可以使用以下方法为这些情况创建if条件:从select获取列名（'gather_names'）以在fct_relevel

gather_arrange <- function(df, group = variable, ...) {

    gather_cols <-  quos(...)
     grp_var <-  enquo(group)
    if(length(gather_cols)==1 && grepl(":", quo_name(gather_cols[[1]]))) {
         gather_cols <- parse_expr(quo_name(gather_cols[[1]]))
    }

    gather_names <- df %>%
                     select(!!! gather_cols) %>% 
                     names
    df %>%
         gather(key = type, value = value, !!!gather_cols)  %>%
         mutate(!!rlang::quo_name(grp_var) := !!grp_var %>% 
         fct_inorder() %>% 
         fct_rev()) %>%
         mutate(type = as.factor(type) %>%
                       fct_relevel(gather_names)) %>%
         arrange(type)             
    }

-检查

out1 <- gather_arrange(df = race, group = variable,
                     estimate, moe, share, sharemoe)
out1
# A tibble: 32 x 4
#   region     variable type      value
#   <chr>      <fct>    <fct>     <dbl>
# 1 New Haven  white    estimate  40164
# 2 New Haven  black    estimate  42970
# 3 New Haven  asian    estimate   6042
# 4 New Haven  latino   estimate  37231
# 5 Outer Ring white    estimate 164150
# 6 Outer Ring black    estimate   3471
# 7 Outer Ring asian    estimate   9565
# 8 Outer Ring latino   estimate   8518
# 9 New Haven  white    moe        1395
#10 New Haven  black    moe        1383
# ... with 22 more rows



out2 <- gather_arrange(df = race, group = variable, estimate:sharemoe)
identical(out1, out2)
#[1] TRUE

更新

如果我们在...中传递多组列

gather_arrange2 <- function(df, group = variable, ...) {

    gather_cols <-  quos(...)
    grp_var <-  enquo(group)

    gather_names <- df %>%
                     select(!!! gather_cols) %>% 
                     names
    gather_colsN <- lapply(gather_cols, function(x) parse_expr(quo_name(x)))

    df %>%
         gather(key = type, value = value, !!!gather_colsN)  %>%
         mutate(!!rlang::quo_name(grp_var) := !!grp_var %>% 
         fct_inorder() %>% 
         fct_rev()) %>%
         mutate(type = as.factor(type) %>%
                       fct_relevel(gather_names)) %>%
         arrange(type)             
    }

-检查

out1 <- gather_arrange2(df = race, group = variable,
                 estimate, moe, share, sharemoe, region)
out2 <- gather_arrange2(df = race, group = variable, estimate:sharemoe, region)

identical(out1, out2)
#[1] TRUE

或者只检查一组列

out1 <- gather_arrange2(df = race, group = variable,
                      estimate, moe, share, sharemoe)
out2 <- gather_arrange2(df = race, group = variable, estimate:sharemoe)
identical(out1, out2)
#[1] TRUE

Answer 3

fun <- function(df, ...){
  as.character(substitute(list(...)))[-1] %>% 
    lapply(function(x)
      if(!grepl(':', x)) x
      else strsplit(x, ':')[[1]] %>%
            lapply(match, names(df)) %>%
            {names(df)[do.call(seq, .)]})%>% 
    unlist
}
names(race)
# [1] "region"   "variable" "estimate" "moe"      "share"    "sharemoe"    

fun(race, estimate:sharemoe, region)
# [1] "estimate" "moe"      "share"    "sharemoe" "region"  

fun(race, estimate, moe, share, sharemoe, region)
# [1] "estimate" "moe"      "share"    "sharemoe" "region" 

fun(race, moe, region:variable)
 # [1] "moe"      "region"   "variable"

这涉及两个:符号表达式和其他列名称作为参数，例如fun(race, estimate:sharemoe, region) 。

有趣的是，这种hacky解决方案似乎比tidyselect选择更快（并不是变量选择可能是整体速度的痛点）

fun <- function(y, ...){
  as.character(substitute(list(...)))[-1] %>% 
    lapply(function(x)
      if(!grepl(':', x)) x
      else strsplit(x, ':')[[1]] %>%
            lapply(match, y) %>%
            {y[do.call(seq, .)]})%>% 
    unlist
}
library(microbenchmark)
microbenchmark(
  tidy = tidyselect::vars_select(letters, b, g:j, a),
  fun  = fun(letters, b, g:j, a), 
  unit = 'relative')
# Unit: relative
#  expr      min       lq     mean   median       uq      max neval
#  tidy 19.90837 18.10964 15.32737 14.28823 13.86212 14.44013   100
#   fun  1.00000  1.00000  1.00000  1.00000  1.00000  1.00000   100

原始功能

gather_arrange <- function(df, ..., group = variable) {
  gather_cols <- rlang::quos(...)
  grp_var <- rlang::enquo(group)
  gather_names <- purrr::map_chr(rlang::exprs(...), as.character)

  df %>%
    tidyr::gather(key = type, value = value, !!!gather_cols) %>%
    dplyr::mutate(!!rlang::quo_name(grp_var) := !!grp_var %>% 
                    forcats::fct_inorder() %>% forcats::fct_rev()) %>%
    dplyr::mutate(type = as.factor(type) %>% forcats::fct_relevel(gather_names)) %>%
    arrange(type)
}

功能使用上面定义的fun ：

my_gather_arrange <- function(df, ..., group = variable) {
  gather_cols <- gather_names <- 
    as.character(substitute(list(...)))[-1] %>% 
      lapply(function(x){
        if(grepl(':', x)){
          strsplit(x, ':')[[1]] %>%
            lapply(match, names(df)) %>%
            {names(df)[do.call(seq, .)]}}
        else x}) %>% 
      unlist
  grp_var <- rlang::enquo(group)

  df %>%
    tidyr::gather(key = type, value = value, !!!gather_cols) %>%
    dplyr::mutate(!!rlang::quo_name(grp_var) := !!grp_var %>% 
                    forcats::fct_inorder() %>% forcats::fct_rev()) %>%
    dplyr::mutate(type = as.factor(type) %>% forcats::fct_relevel(gather_names)) %>%
    arrange(type)
}

out1 <- gather_arrange(race, estimate, moe, share, sharemoe, region)
out2 <- my_gather_arrange(race, estimate:sharemoe, region)
#   
identical(out1, out2)
# [1] TRUE

rlang：从NSE函数中的冒号快捷方式获取名称

问题描述

3 个解决方案

解决方案1
4 已采纳 2018-05-28 23:03:33

解决方案2
2 2018-05-28 00:12:56

更新

解决方案3
1 2018-05-27 19:19:30

rlang：从NSE函数中的冒号快捷方式获取名称

问题描述

3 个解决方案

解决方案1 4 已采纳 2018-05-28 23:03:33

解决方案2 2 2018-05-28 00:12:56

更新

解决方案3 1 2018-05-27 19:19:30

解决方案1
4 已采纳 2018-05-28 23:03:33

解决方案2
2 2018-05-28 00:12:56

解决方案3
1 2018-05-27 19:19:30