[英]using rlang to list variables inside purrr::pmap
I am trying to write a function called grouped_lm,
which basically runs linear regression models for each level of the combination of grouping variables ( grouping.vars
) for multiple criterion/dependent ( crit.vars
) and predictor/independent variables ( pred.vars
). 我试图写一个调用的函数
grouped_lm,
它基本上运行线性回归模型为分组变量的组合,每个级别( grouping.vars
)多标准/依赖( crit.vars
)和预测/自变量( pred.vars
) 。
This is done in such a way that first entry into crit.vars
is regressed onto pred.vars
. 这样做是为了使第一次进入
crit.vars
内容回归到pred.vars
。 For example, if I enter grouping.vars = am
, crit.vars = c(mpg, drat)
, and crit.vars = c(wt, disp)
(in the context of mtcars
dataset), the function will run two regression models ( mpg ~ wt
and drat ~ disp
) for each level of grouping variable am
( am = 0
and am = 1
). 例如,如果我输入
grouping.vars = am
, crit.vars = c(mpg, drat)
crit.vars = c(wt, disp)
crit.vars = c(mpg, drat)
和crit.vars = c(wt, disp)
(在mtcars
数据集的上下文中),该函数将运行两个回归模型( mpg ~ wt
和drat ~ disp
)对应每个级别的分组变量am
( am = 0
和am = 1
)。
I have managed to create a dataframe from entered variables, write a custom function that runs linear regression models, but can't seem to figure out how to use rlang
to get entered variables into the list elements that will be entered into purrr::pmap
. 我已经设法从输入的变量创建一个数据框,编写一个运行线性回归模型的自定义函数,但是似乎无法弄清楚如何使用
rlang
将输入的变量获取将要输入到purrr::pmap
的列表元素中。
Apologies for a lengthy question and thanks in advance for any help offered. 对于冗长的问题,我们深表歉意,并在此先感谢您提供的任何帮助。
# libraries needed
library(tidyverse)
library(plyr)
# function definition
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = list(!!!pred.vars),
y_name = list(!!!crit.vars)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
# example usage of the function
grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
#> the entire nested dataframe:
#> # A tibble: 3 x 2
#> Species data
#> <fct> <list>
#> 1 setosa <tibble [50 x 4]>
#> 2 versicolor <tibble [50 x 4]>
#> 3 virginica <tibble [50 x 4]>
#> first element of the list column from nested dataframe:
#> # A tibble: 50 x 4
#> Sepal.Length Petal.Length Sepal.Width Petal.Width
#> <dbl> <dbl> <dbl> <dbl>
#> 1 5.10 1.40 3.50 0.200
#> 2 4.90 1.40 3.00 0.200
#> 3 4.70 1.30 3.20 0.200
#> 4 4.60 1.50 3.10 0.200
#> 5 5.00 1.40 3.60 0.200
#> 6 5.40 1.70 3.90 0.400
#> 7 4.60 1.40 3.40 0.300
#> 8 5.00 1.50 3.40 0.200
#> 9 4.40 1.40 2.90 0.200
#> 10 4.90 1.50 3.10 0.100
#> # ... with 40 more rows
#> the custom function seems to be working!:
#> # A tibble: 4 x 7
#> group formula term estimate std.error t p.value
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 0 scale(mpg) ~ scale(wt) scale(~ -0.768 0.155 -4.94 1.25e-4
#> 2 1 scale(mpg) ~ scale(wt) scale(~ -0.909 0.126 -7.23 1.69e-5
#> 3 0 scale(drat) ~ scale(disp) scale(~ -0.614 0.192 -3.20 5.20e-3
#> 4 1 scale(drat) ~ scale(disp) scale(~ -0.305 0.287 -1.06 3.12e-1
#> running the custom function on the entered dataframe:
#> Error in !pred.vars: invalid argument type
Created on 2018-03-23 by the reprex package (v0.2.0). 由reprex包 (v0.2.0)创建于2018-03-23。
Edits after the answer was provided 提供答案后进行编辑
I was also wondering how I can get separate columns in the ouput for every grouping variable. 我还想知道如何在输出中为每个分组变量获取单独的列。 So, with the answer provided below, if I run-
因此,使用下面提供的答案,如果我运行-
grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
It works, but the output looks something like this: 它可以工作,但是输出看起来像这样:
As can be appreciated from the picture, it is not at all clear what do the values 1 to 6 represent. 从图片中可以看出,值1到6代表什么完全不清楚。 So it will be nice to also get a separate column for each grouping variable provided so that, in this example, there will be two columns
am
and cyl
with their respective levels for each lm
model. 因此,最好为每个提供的分组变量都获得一个单独的列,以便在此示例中,将有两列
am
和cyl
及其每个lm
模型的各自级别。
(I have manually created this dataframe. This is not how the grouping is happening, but this is just to show what the desired output looks like.) (我已经手动创建了此数据框。这不是分组的过程,但这只是为了显示所需的输出是什么样。)
If we need to replicate the same behavior as in the example usage of 'mtcars' where the x_name
and y_name
are strings rather than symbols
(which is the case of 'pred.vars' and 'crit.vars'), convert those to strings with quo_name
ie 如果我们需要复制与“ mtcars”示例用法中相同的行为,其中
x_name
和y_name
是字符串而不是symbols
(“ pred.vars”和“ crit.vars”的情况),请将其转换为字符串与quo_name
即
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = map(pred.vars, quo_name),
y_name = map(crit.vars, quo_name)
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
Or pass as symbol
without any evaluation ie with !!
或者作为
symbol
通过而没有任何评估,即带有!!
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars, ###
y_name = crit.vars ###
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
This has to do with how the lm_listed
function is taking the arguments. 这与
lm_listed
函数如何接受参数有关。 Consider the objects as strings 将对象视为字符串
sl <- "Sepal.Length"
sw <- "Sepal.Width"
and glue
returns it correctly glue
正确返回
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
Now, we change this to symbol
, it also works 现在,我们将其更改为
symbol
,它也可以工作
sl <- rlang::sym("Sepal.Length")
sw <- rlang::sym("Sepal.Width")
glue::glue("scale({sl}) ~ scale({sw})")
#scale(Sepal.Length) ~ scale(Sepal.Width)
But, the problem is in using !!
但是,问题出在使用
!!
for evaluating that is passed as input argument 用于评估作为输入参数传递的
sl <- !!rlang::sym("Sepal.Length")
Error in !rlang::sym("Sepal.Length") : invalid argument type
!rlang :: sym(“ Sepal.Length”)中的错误:无效的参数类型
The !!
!!
is evaluated outside the environment of tidyverse
functions, which result in the error 在
tidyverse
函数的环境之外评估,从而导致错误
-full code -完整的代码
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows()
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-run the function -运行功能
res <- grouped_lm(
data = iris,
crit.vars = c(Sepal.Length, Petal.Length),
pred.vars = c(Sepal.Width, Petal.Width),
grouping.vars = Species
)
-output printed 输出打印
#the entire nested dataframe:
# A tibble: 3 x 2
# Species data
# <fctr> <list>
#1 setosa <tibble [50 x 4]>
#2 versicolor <tibble [50 x 4]>
#3 virginica <tibble [50 x 4]>
#first element of the list column from nested dataframe:
# A tibble: 50 x 4
# Sepal.Length Petal.Length Sepal.Width Petal.Width
# <dbl> <dbl> <dbl> <dbl>
# 1 5.10 1.40 3.50 0.200
# 2 4.90 1.40 3.00 0.200
# 3 4.70 1.30 3.20 0.200
# 4 4.60 1.50 3.10 0.200
# 5 5.00 1.40 3.60 0.200
# 6 5.40 1.70 3.90 0.400
# 7 4.60 1.40 3.40 0.300
# 8 5.00 1.50 3.40 0.200
# 9 4.40 1.40 2.90 0.200
#10 4.90 1.50 3.10 0.100
# ... with 40 more rows
#the custom function seems to be working!:
# A tibble: 4 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 0 scale(mpg) ~ scale(wt) scale(wt) -0.768 0.155 -4.94 0.000125
#2 1 scale(mpg) ~ scale(wt) scale(wt) -0.909 0.126 -7.23 0.0000169
#3 0 scale(drat) ~ scale(disp) scale(disp) -0.614 0.192 -3.20 0.00520
#4 1 scale(drat) ~ scale(disp) scale(disp) -0.305 0.287 -1.06 0.312
#running the custom function on the entered dataframe:
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
-result output 结果输出
res
# A tibble: 6 x 7
# group formula term estimate std.error t p.value
# <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#1 1 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.743 0.0967 7.68 0.000000000671
#2 2 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.526 0.123 4.28 0.0000877
#3 3 scale(Sepal.Length) ~ scale(Sepal.Width) scale(Sepal.Width) 0.457 0.128 3.56 0.000843
#4 1 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.332 0.136 2.44 0.0186
#5 2 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.787 0.0891 8.83 0.0000000000127
#6 3 scale(Petal.Length) ~ scale(Petal.Width) scale(Petal.Width) 0.322 0.137 2.36 0.0225
If we need to have the 'grouping.vars' also in the output 如果我们需要在输出中也有'grouping.vars'
grouped_lm <- function(data,
grouping.vars,
crit.vars,
pred.vars) {
#================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
crit.vars <-
as.list(rlang::quo_squash(rlang::enquo(crit.vars)))
crit.vars <-
if (length(crit.vars) == 1) {
crit.vars
} else {
crit.vars[-1]
}
# check how many variables were entered for predictor variables vector
pred.vars <-
as.list(rlang::quo_squash(rlang::enquo(pred.vars)))
pred.vars <-
if (length(pred.vars) == 1) {
pred.vars
} else {
pred.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <-
as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <- dplyr::select(.data = data,
!!!grouping.vars,
!!!crit.vars,
!!!pred.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(data = .)
# checking if the nested dataframe looks okay
cat(paste("the entire nested dataframe: \n"))
print(df) # the entire nested dataframe
cat(paste("first element of the list column from nested dataframe: \n"))
print(df$data[[1]]) # first element of the list column
#============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col, x_name, y_name) {
fx <- glue::glue("scale({y_name}) ~ scale({x_name})")
# this tags any names that are not predictor variables (used to remove intercept terms)
filter_name <- glue::glue("scale({x_name})")
# dataframe with results from lm
results_df <-
list.col %>% # running linear regression on each individual group with purrr
purrr::map(.x = .,
.f = ~ stats::lm(formula = as.formula(fx),
data = (.))) %>% # tidying up the output with broom
purrr::map_dfr(.x = .,
.f = ~ broom::tidy(x = .),
.id = "group") %>% # remove intercept terms
dplyr::filter(.data = ., term == !!filter_name) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
group,
formula,
term,
estimate,
std.error,
t = statistic,
p.value
) %>% # convert to a tibble dataframe
tibble::as_data_frame(x = .)
# return the dataframe
return(results_df)
}
# check if the function works
group_mtcars <- split(mtcars, mtcars$am)
fn_results <- purrr::pmap(.l = list(
l = list(group_mtcars),
x_name = list('wt', 'disp'),
y_name = list('mpg', 'drat')
),
.f = lm_listed) %>%
dplyr::bind_rows()
# seems to be working!
cat(paste("the custom function seems to be working!: \n"))
print(fn_results)
#========= using custom function on entered dataframe =================
cat(paste("running the custom function on the entered dataframe: \n"))
# running custom function for each element of the created list column
df <- df %>%
tibble::rownames_to_column('group')
df_lm <- purrr::pmap(.l = list(
l = list(df$data),
x_name = pred.vars,
y_name = crit.vars
),
.f = lm_listed) %>%
dplyr::bind_rows() %>%
left_join(df) %>%
select(!!!grouping.vars, everything()) %>%
select(-group, -data)
#============================== output ========================
print(df_lm)
# return the final dataframe with results
return(df_lm)
}
-run function 运行功能
r1 <- grouped_lm(
data = mtcars,
crit.vars = c(wt, mpg),
pred.vars = c(drat, disp),
grouping.vars = c(am, cyl)
)
-output - 输出
r1
# A tibble: 12 x 8
# am cyl formula term estimate std.error t p.value
# <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
# 1 1.00 6.00 scale(wt) ~ scale(drat) scale(drat) -0.101 0.995 - 0.102 0.935
# 2 1.00 4.00 scale(wt) ~ scale(drat) scale(drat) -0.226 0.398 - 0.568 0.591
# 3 0 6.00 scale(wt) ~ scale(drat) scale(drat) 0.307 0.673 0.456 0.693
# 4 0 8.00 scale(wt) ~ scale(drat) scale(drat) -0.119 0.314 - 0.379 0.713
# 5 0 4.00 scale(wt) ~ scale(drat) scale(drat) 0.422 0.906 0.466 0.722
# 6 1.00 8.00 scale(wt) ~ scale(drat) scale(drat) -1.00 NaN NaN NaN
# 7 1.00 6.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 0 Inf 0
# 8 1.00 4.00 scale(mpg) ~ scale(disp) scale(disp) -0.835 0.225 - 3.72 0.00991
# 9 0 6.00 scale(mpg) ~ scale(disp) scale(disp) 0.670 0.525 1.28 0.330
#10 0 8.00 scale(mpg) ~ scale(disp) scale(disp) -0.535 0.267 - 2.00 0.0729
#11 0 4.00 scale(mpg) ~ scale(disp) scale(disp) 0.932 0.362 2.57 0.236
#12 1.00 8.00 scale(mpg) ~ scale(disp) scale(disp) 1.00 NaN NaN NaN
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.