简体   繁体   中英

Delete incomplete cases in nested dataframe using map function from purrr

I would like to delete incomplete cases from each dataframes of a nested tibble. I did try to use the map function (purrr package), but I received the following error message "Error in parent.env(x) : argument is not an environment". I do not understand what is the problem.

Here is a reproductible example.

library(tidyverse)

gapminder_orig <- read.csv("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/_episodes_rmd/data/gapminder-FiveYearData.csv")

gapminder_orig <- gapminder_orig %>%
  dplyr::select(continent, country, year, pop, lifeExp, gdpPercap)

data_with_NA<-map_df(gapminder_orig[,4:6], function(x) {x[sample(c(TRUE, NA), prob = c(0.8, 0.2), size = length(x), replace = TRUE)]})

gapminder_orig_with_NA<-gapminder_orig %>%
  mutate(pop=data_with_NA$pop, lifeExp=data_with_NA$lifeExp, gdpPercap=data_with_NA$gdpPercap)

gapminder_nested <- gapminder_orig_with_NA %>% 
  mutate(dummy_var= sample(1:3, nrow(gapminder_orig_with_NA), replace=TRUE)) %>%
  group_by(continent) %>% 
  nest() %>%
  add_column(Type=c("Full", "Full", "Subset","Subset","Subset")) %>%
  add_column(Sector=c("Agriculture", "Banking", "Agriculture", "Banking", "Agriculture"))
gapminder_nested

remove_NA<-function(x) {
  y <- x[complete.cases(x),]
  return(y)
}

remove_NAz<-function(x, z) {
y <- x[complete.cases(x),]
return(y)
}

test<-gapminder_nested  %>%
  #mutate(data2 = map(.x=data, .f=filter(complete.cases(.x))))  #Does not work
  #mutate(data2 = map(.x=data, .f=na.omit)) #Does not work
  #mutate(data2 = map(data, ~ map_dfc(., na.omit))) #Does not work
  #mutate(data2 = map(data, function(.x) remove_NA(.x))) #Does not work
  mutate(data2= map2(data, Type, function(.x, .z) remove_NAz(.x, .z))) #Work but not elegant

Any idea of what is going wrong with the calls to map function? Why does it work with map2?

Thanks!

As far as I get it at least your second approach worked fine. Also to make the first approach work use .f = ~ filter(.x, complete.cases(.x)) .

Both approaches give me the same result as your final approach using map2

library(dplyr)
library(purrr)
library(tidyr)
library(tibble)

set.seed(42)
gapminder_orig <- gapminder::gapminder

gapminder_orig <- gapminder_orig %>%
  dplyr::select(continent, country, year, pop, lifeExp, gdpPercap)

data_with_NA<-map_df(gapminder_orig[,4:6], function(x) {x[sample(c(TRUE, NA), prob = c(0.8, 0.2), size = length(x), replace = TRUE)]})

gapminder_orig_with_NA<-gapminder_orig %>%
  mutate(pop=data_with_NA$pop, lifeExp=data_with_NA$lifeExp, gdpPercap=data_with_NA$gdpPercap)

gapminder_nested <- gapminder_orig_with_NA %>% 
  mutate(dummy_var= sample(1:3, nrow(gapminder_orig_with_NA), replace=TRUE)) %>%
  group_by(continent) %>% 
  nest() %>%
  add_column(Type=c("Full", "Full", "Subset","Subset","Subset")) %>%
  add_column(Sector=c("Agriculture", "Banking", "Agriculture", "Banking", "Agriculture"))

remove_NAz<-function(x, z) {
  y <- x[complete.cases(x),]
  return(y)
}

gapminder_nested  %>%
  mutate(data2 = map(data, ~ filter(.x, complete.cases(.x))))
#> # A tibble: 5 x 5
#> # Groups:   continent [5]
#>   continent data               Type   Sector      data2             
#>   <fct>     <list>             <chr>  <chr>       <list>            
#> 1 Asia      <tibble [396 x 6]> Full   Agriculture <tibble [185 x 6]>
#> 2 Europe    <tibble [360 x 6]> Full   Banking     <tibble [195 x 6]>
#> 3 Africa    <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas  <tibble [300 x 6]> Subset Banking     <tibble [150 x 6]>
#> 5 Oceania   <tibble [24 x 6]>  Subset Agriculture <tibble [10 x 6]>

gapminder_nested  %>%
  mutate(data2 = map(.x=data, .f=na.omit)) 
#> # A tibble: 5 x 5
#> # Groups:   continent [5]
#>   continent data               Type   Sector      data2             
#>   <fct>     <list>             <chr>  <chr>       <list>            
#> 1 Asia      <tibble [396 x 6]> Full   Agriculture <tibble [185 x 6]>
#> 2 Europe    <tibble [360 x 6]> Full   Banking     <tibble [195 x 6]>
#> 3 Africa    <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas  <tibble [300 x 6]> Subset Banking     <tibble [150 x 6]>
#> 5 Oceania   <tibble [24 x 6]>  Subset Agriculture <tibble [10 x 6]>

gapminder_nested %>% 
  mutate(data2= map2(data, Type, function(.x, .z) remove_NAz(.x, .z)))
#> # A tibble: 5 x 5
#> # Groups:   continent [5]
#>   continent data               Type   Sector      data2             
#>   <fct>     <list>             <chr>  <chr>       <list>            
#> 1 Asia      <tibble [396 x 6]> Full   Agriculture <tibble [185 x 6]>
#> 2 Europe    <tibble [360 x 6]> Full   Banking     <tibble [195 x 6]>
#> 3 Africa    <tibble [624 x 6]> Subset Agriculture <tibble [311 x 6]>
#> 4 Americas  <tibble [300 x 6]> Subset Banking     <tibble [150 x 6]>
#> 5 Oceania   <tibble [24 x 6]>  Subset Agriculture <tibble [10 x 6]>

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM