[英]How to apply a function to mutate a specific combination of columns? (purrr:: use preferred)
假设我有以下数据:
data = tibble::tribble(
~id, ~year_1, ~year_2, ~cod_1, ~cod_2, ~cod_3, ~cod_4, ~var_x,
1, 0, 1, 5, 5, 3, 6, "x",
1, 0, 1, 3, 6, 14, 5, "x",
1, 0, 1, 2, 8, 5, 4, "x",
2, 1, 0, 10, 8, 2, 3, "x",
2, 1, 0, 3, 9, 1, 2, "x",
2, 1, 0, 1, 12, 0, 1, "x"
)
我想通过所有列“cod_”创建所有列“year_”组合的所有可能产品。 我的意思是这样的:
data.new = data %>%
mutate(year_1_cod_1 = year_1 * cod_1) %>%
mutate(year_1_cod_2 = year_1 * cod_2) %>%
mutate(year_1_cod_3 = year_1 * cod_3) %>%
mutate(year_1_cod_4 = year_1 * cod_4) %>%
mutate(year_2_cod_1 = year_2 * cod_1) %>%
mutate(year_2_cod_2 = year_2 * cod_2) %>%
mutate(year_2_cod_3 = year_2 * cod_3) %>%
mutate(year_2_cod_4 = year_2 * cod_4)
我可以使用以下方法获得所有可能的组合:
year.var = colnames(data[, grepl("year", names(data))])
cod.var = colnames(data[, grepl("cod", names(data))])
com = crossing(year.var, cod.var)
> com
# A tibble: 8 x 2
year.var cod.var
<chr> <chr>
1 year_1 cod_1
2 year_1 cod_2
3 year_1 cod_3
4 year_1 cod_4
5 year_2 cod_1
6 year_2 cod_2
7 year_2 cod_3
8 year_2 cod_4
我可以使用 for 循环来移动com
数据框并创建每个新列。 但是我想在dplyr::
环境中执行此dplyr::
。 我想我可以使用purrr::
对所有组合进行mutate
,但我不确定如何。
事实上,在我的真实数据中,我有超过 1k 种可能的组合(即超过 1k 个要变异的变量)。
您可以使用map2
遍历com
的组合,并使用transmute
通过使用非标准评估将这些列相乘并最终将其绑定到原始数据帧来创建新列。
library(dplyr)
library(purrr)
data %>%
bind_cols(map2_dfc(com$year.var, com$cod.var,
~data %>% transmute(!!paste(.x, .y, sep = "_") := !!sym(.x) * !!sym(.y))))
# A tibble: 6 x 16
# id year_1 year_2 cod_1 cod_2 cod_3 cod_4 var_x year_1_cod_1 year_1_cod_2
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
#1 1 0 1 5 5 3 6 x 0 0
#2 1 0 1 3 6 14 5 x 0 0
#3 1 0 1 2 8 5 4 x 0 0
#4 2 1 0 10 8 2 3 x 10 8
#5 2 1 0 3 9 1 2 x 3 9
#6 2 1 0 1 12 0 1 x 1 12
# … with 6 more variables: year_1_cod_3 <dbl>, year_1_cod_4 <dbl>,
# year_2_cod_1 <dbl>, year_2_cod_2 <dbl>, year_2_cod_3 <dbl>,
# year_2_cod_4 <dbl>
library(dplyr)
library(tidyr)
data %>%
pivot_longer(starts_with("year"), names_to = "year", values_to = "year_val") %>%
pivot_longer(starts_with("cod"), names_to = "cod", values_to = "cod_val") %>%
mutate(year_cod = paste(year, cod, sep = "_"),
val = year_val * cod_val) %>%
pivot_wider(
id_cols = c(id, var_x),
names_from = year_cod,
values_from = val,
values_fn = list(val = list)
) %>%
unnest(cols = c(-id, -var_x))
#> # A tibble: 6 x 10
#> id var_x year_1_cod_1 year_1_cod_2 year_1_cod_3 year_1_cod_4 year_2_cod_1
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 x 0 0 0 0 5
#> 2 1 x 0 0 0 0 3
#> 3 1 x 0 0 0 0 2
#> 4 2 x 10 8 2 3 0
#> 5 2 x 3 9 1 2 0
#> 6 2 x 1 12 0 1 0
#> # … with 3 more variables: year_2_cod_2 <dbl>, year_2_cod_3 <dbl>,
#> # year_2_cod_4 <dbl>
由reprex 包(v0.3.0) 于 2020 年 2 月 26 日创建
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.