![](/img/trans.png)
[英]How to set_names after using pull on a list column in the tidyverse
[英]How to set_names on a list column using the tidyverse: tibble, purrr, dplyr
简短版本,我希望能够在从summarise()
函数返回的“列表列”上set_names()
。 因此,如果我有一个使用range()
函数的列表列,我希望能够将名称设置为“min”和“max”。
以下是详细信息和可重现的示例。
library(tidyverse)
# Consider the following:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
#> # A tibble: 5 x 2
#> vore sleep_total_range
#> <chr> <list>
#> 1 carni <dbl [2]>
#> 2 herbi <dbl [2]>
#> 3 insecti <dbl [2]>
#> 4 omni <dbl [2]>
#> 5 <NA> <dbl [2]>
# I would like to be able to identify and label (i.e., set_names()) for the
# min and max columns
# Fail 1: No Column, No Labels
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
unnest()
#> # A tibble: 10 x 2
#> vore sleep_total_range
#> <chr> <dbl>
#> 1 carni 2.7
#> 2 carni 19.4
#> 3 herbi 1.9
#> 4 herbi 16.6
#> 5 insecti 8.4
#> 6 insecti 19.9
#> 7 omni 8
#> 8 omni 18
#> 9 <NA> 5.4
#> 10 <NA> 13.7
# Fail 2: Column, but labels are not correct
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest()
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <int> <dbl>
#> 1 carni 1 2.7
#> 2 carni 2 19.4
#> 3 herbi 1 1.9
#> 4 herbi 2 16.6
#> 5 insecti 1 8.4
#> 6 insecti 2 19.9
#> 7 omni 1 8
#> 8 omni 2 18
#> 9 <NA> 1 5.4
#> 10 <NA> 2 13.7
预期结果
# Success: This is my desired result/output, but it feels verbose,
# and not very "tidyverse / purrr"
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total) %>% enframe(name = "range_col"))
) %>%
unnest() %>%
mutate(
range_col = ifelse(range_col == 1, "min", "max")
)
#> # A tibble: 10 x 3
#> vore range_col value
#> <chr> <chr> <dbl>
#> 1 carni min 2.7
#> 2 carni max 19.4
#> 3 herbi min 1.9
#> 4 herbi max 16.6
#> 5 insecti min 8.4
#> 6 insecti max 19.9
#> 7 omni min 8
#> 8 omni max 18
#> 9 <NA> min 5.4
#> 10 <NA> max 13.7
关闭但还没有...
# I thought I was close with this
temp <-
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
)
temp$sleep_total_range[[1]] %>% set_names(c("min", "max")) %>% enframe()
#> # A tibble: 2 x 2
#> name value
#> <chr> <dbl>
#> 1 min 2.7
#> 2 max 19.4
# But this obviously does not work...
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total)) %>%
set_names(c("min", "max")) %>%
enframe()
)
#> `nm` must be `NULL` or a character vector the same length as `x`
由reprex 包(v0.3.0) 于 2019 年 7 月 18 日创建
最简单的选择是group_by
vore
并计算每个组的min
和max
。
不过,如果你想继续使用range
一种选择是unnest
和重复c("min", "max")
为每vore
。
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(range(sleep_total))) %>%
unnest() %>%
group_by(vore) %>%
mutate(column = c("min", "max"))
# vore sleep_total_range column
# <chr> <dbl> <chr>
# 1 NA 5.4 min
# 2 NA 13.7 max
# 3 carni 2.7 min
# 4 carni 19.4 max
# 5 herbi 1.9 min
# 6 herbi 16.6 max
# 7 insecti 8.4 min
# 8 insecti 19.9 max
# 9 omni 8 min
#10 omni 18 max
或者在 unnest 之前添加第二个列表:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(range(sleep_total))
) %>%
mutate(column = list(c("min", "max"))) %>%
unnest()
如果我们创建一个tibble
我们可以将其作为两列
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>% as_tibble)) %>%
unnest
-使用packageVersion('dplyr')# [1] '0.8.99.9000'
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(setNames(as.list(range(sleep_total)),
c("min", "max")) %>%
as_tibble)) %>%
unnest(c(sleep_total_range)) #changed behavior or else a warning
# A tibble: 5 x 3
# vore min max
# <chr> <dbl> <dbl>
#1 carni 2.7 19.4
#2 herbi 1.9 16.6
#3 insecti 8.4 19.9
#4 omni 8 18
#5 <NA> 5.4 13.7
虽然在这种情况下改变c("min","max")
是更好的选择,如果你想避免这种情况,你可以这样做:
library(tidyverse)
msleep %>%
group_by(vore) %>%
summarise(sleep_total_range = list(c(min=min(sleep_total),
max=max(sleep_total)))) %>%
mutate(sleep_total_range = map(sleep_total_range,
~data.frame(sleep_total_range=.x, dcol=names(.x)))) %>%
unnest()
#> # A tibble: 10 x 3
#> vore sleep_total_range dcol
#> <chr> <dbl> <fct>
#> 1 <NA> 5.4 min
#> 2 <NA> 13.7 max
#> 3 carni 2.7 min
#> 4 carni 19.4 max
#> 5 herbi 1.9 min
#> 6 herbi 16.6 max
#> 7 insecti 8.4 min
#> 8 insecti 19.9 max
#> 9 omni 8 min
#> 10 omni 18 max
受@akrun 的启发,您还可以在此处采用一种有点非常规的双重 unnest 方法:
msleep %>%
group_by(vore) %>%
summarise(
sleep_total_range = list(as.list(range(sleep_total)) %>% set_names(c("min", "max")) %>% enframe)
) %>%
unnest() %>%
unnest()
# A tibble: 10 x 3
vore name value
<chr> <chr> <dbl>
1 carni min 2.7
2 carni max 19.4
3 herbi min 1.9
4 herbi max 16.6
5 insecti min 8.4
6 insecti max 19.9
7 omni min 8
8 omni max 18
9 NA min 5.4
10 NA max 13.7
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.