group_by dplyr within a function NSE

I am having problems using dplyr and group_by within a piped function call.

Reproducible Example:

Using the following data:

ex_data<- structure(list(word1 = c("no", "not", "not", "no", "not", "not", 
"not", "not", "no", "not", "no", "not", "not", "not", "no", "not", 
"no", "no", "not", "not", "not", "no", "not", "without", "never", 
"no", "not", "no", "no", "not", "not", "not", "no", "no", "no", 
"not", "not", "without", "never", "no", "not", "not", "not", 
"not", "not", "never", "no", "no", "not", "not"), word2 = c("doubt", 
"like", "help", "no", "want", "wish", "allow", "care", "harm", 
"sorry", "great", "leave", "pretend", "worth", "pleasure", "love", 
"danger", "want", "afraid", "doubt", "fail", "good", "forget", 
"feeling", "forget", "matter", "avoid", "chance", "hope", "forgotten", 
"miss", "perfectly", "bad", "better", "opportunity", "admit", 
"fair", "delay", "failed", "wish", "dislike", "distress", "refuse", 
"regret", "trust", "want", "evil", "greater", "better", "blame"
), score = c(-1L, 2L, 2L, -1L, 1L, 1L, 1L, 2L, -2L, -1L, 3L, 
-1L, -1L, 2L, 3L, 3L, -2L, 1L, -2L, -1L, -2L, 3L, -1L, 1L, -1L, 
1L, -1L, 2L, 2L, -1L, -2L, 3L, -3L, 2L, 2L, -1L, 2L, -1L, -2L, 
1L, -2L, -2L, -2L, -2L, 1L, 1L, -3L, 3L, 2L, -2L), n = c(102L, 
99L, 82L, 60L, 45L, 39L, 36L, 23L, 22L, 21L, 19L, 18L, 18L, 17L, 
16L, 16L, 15L, 15L, 15L, 14L, 14L, 13L, 13L, 13L, 12L, 12L, 12L, 
11L, 11L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L), contribution = c(-102L, 
198L, 164L, -60L, 45L, 39L, 36L, 46L, -44L, -21L, 57L, -18L, 
-18L, 34L, 48L, 48L, -30L, 15L, -30L, -14L, -28L, 39L, -13L, 
13L, -12L, 12L, -12L, 22L, 22L, -10L, -20L, 30L, -27L, 18L, 18L, 
-9L, 18L, -9L, -16L, 8L, -16L, -16L, -16L, -16L, 8L, 7L, -21L, 
21L, 14L, -14L)), .Names = c("word1", "word2", "score", "n", 
"contribution"), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))

The regular typical piping operation works as expected:

outside_result<- ex_data %>% 
  mutate(word2=reorder(word2,contribution)) %>% 
  group_by(word1) %>% 
  top_n(10,abs(contribution)) %>% 
  group_by(word1,word2) %>% 
  arrange(desc(contribution)) %>% 
  ungroup() %>% 
  mutate(word2 = factor(paste(word2,word1, sep = "__"),

I have implemented the above into a function as given below:

order_bars <- function(df,facetPanel,barCategory,value){
        df %>% mutate(barCategory=reorder(barCategory,value)) %>% 
          group_by(facetPanel) %>% 
          top_n(10,abs(value)) %>% 
          group_by(facetPanel,barCategory) %>% 
          arrange(desc(value)) %>% 
          ungroup() %>% 
          mutate(barCategory = factor(paste(barCategory,facetPanel, sep = "__"),

And taking the advice from this post , used $ notation when referencing variables of a data.frame during mutate operations within a function.


R throws the following error:

Error: unknown variable to group by : facetPanel
Called from: resolve_vars(new_groups, tbl_vars(.data))

I suspect group_by needs to be tweaked to take either named variables, or I have to use .dot notation to reference columns, although I'm just throwing this out into the wind...

You'll need to learn how to use 1) the SE versions of dplyr verbs such as group_by_ and mutate_ and 2) the enigmatic lazyeval::interp . Please read vignette("nse") carefully.

Then we can come to:

order_bars <- function(df, facetPanel, barCategory, value){
  df %>% 
    mutate_(barCategory = interp(~reorder(x, y), x = as.name(barCategory), 
                                 y = as.name(value))) %>% 
    group_by_(facetPanel) %>% 
    filter_(interp(~min_rank(desc(abs(x))) <= 10, x = as.name(value))) %>% 
    group_by_(facetPanel, barCategory) %>% 
    arrange_(interp(~desc(x), x = as.name(value))) %>% 
    ungroup() %>% 
    mutate_(barCategory = interp(
      ~factor(paste(x, y, sep = "__"), levels = rev(paste(x, y, sep = "__"))),
      x = as.name(barCategory), y = as.name(facetPanel)))

order_bars(ex_data, 'word1', 'word2', 'contribution')
 # A tibble: 25 × 6 word1 word2 score n contribution barCategory <chr> <chr> <int> <int> <int> <fctr> 1 not like 2 99 198 like__not 2 not help 2 82 164 help__not 3 no great 3 19 57 great__no 4 no pleasure 3 16 48 pleasure__no 5 not love 3 16 48 love__not 6 not care 2 23 46 care__not 7 not want 1 45 45 want__not 8 not wish 1 39 39 wish__not 9 no good 3 13 39 good__no 10 not allow 1 36 36 allow__not 

Note that we need to replace top_n with a filter_ statement, since no top_n_ exists. Looking at the source of top_n makes it obvious how the filter_ statement should be constructed.

Or if you want to get fancy, you can write a NSE version of order_bars :

order_bars <- function(df,facetPanel,barCategory,value){
  facetPanel <- substitute(facetPanel)
  barCategory <- substitute(barCategory)
  value <- substitute(value)

  df %>% 
    mutate_(barCategory = interp(~reorder(x, y), x = barCategory, y = value)) %>% 
    group_by_(facetPanel) %>% 
    filter_(interp(~min_rank(desc(abs(x))) <= 10, x = value)) %>% 
    group_by_(facetPanel, barCategory) %>% 
    arrange_(interp(~desc(x), x = value)) %>% 
    ungroup() %>% 
    mutate_(barCategory = interp(
      ~factor(paste(x, y, sep = "__"), levels = rev(paste(x, y, sep = "__"))),
      x = barCategory, y = facetPanel))

order_bars(ex_data, word1, word2, contribution)

Ideally, you would write only the SE version fully, and link the NSE version to the SE version with lazyeval . I'll leave that as an exercise to the reader.

With rlang_0.4.0 and dplyr_0.8.2 , we can use the tidy-evaluation operator ({{...}}) or curly-curly which abstracts quote-and-unquote into a single interpolation step.

order_barsN <- function(df, facetPanel, barCategory, value) {
    df %>% 
        mutate(barCategory = reorder({{barCategory}}, {{value}}))%>%
        group_by({{facetPanel}}) %>%
        filter(min_rank(desc(abs({{value}}))) <= 10) %>%
        group_by({{facetPanel}}, {{barCategory}}) %>%
        arrange(desc({{value}})) %>%
        ungroup %>%
        mutate(barCategory = factor(str_c({{barCategory}}, {{facetPanel}}, sep="__"),
                levels = rev(str_c({{barCategory}}, {{facetPanel}}, sep="__"))))


out2 <- order_barsN(ex_data, word1, word2, contribution)

-checking with previous answer

out1 <- order_bars(ex_data, word1, word2, contribution)
identical(out1, out2)
#[1] TRUE

