使用 purrr::map2() 或 purrr::imap() 提取數據集的名稱

Question

我正在嘗試使用purrr::map2()或purrr::imap()從存在給定變量的大量數據集中查找數據集。 本質上，我將遍歷數據集列表並僅打印具有感興趣變量的數據集的名稱。 當我使用purrr::map()執行此操作時，數據集未命名為“.x[[i]]”。 任何幫助將不勝感激。 謝謝

#load packages
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(purrr)

#create fictitious datasets
df1 <- tibble(score_a=1:20,
              sex_a=rep(c("M", "F"), 10))
df2 <- tibble(score_b=1:20,
              sex_b=rep(c("M", "F"), 10))
df3 <- tibble(score_c=1:20,
              sex_c=rep(c("M", "F"), 10))

#create a function that returns the dataset that
#contains a given variable
get_dataset_name <- function(data, contains){
  
  data_var_names <- colnames(data) 

  dataname <- deparse(substitute(data))
  
  if(contains %in% data_var_names){
    return(dataname)
  }
}

#testing the function
get_dataset_name(data=df3, contains="score_c")
#> [1] "df3"


#creating a list of the all datasets
data_list <- list(df1, df2, df3)

#looping through a list of the dataset to find the dataset
#that includes the given variable

map(data_list, get_dataset_name, contains="score_c")
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL
#> 
#> [[3]]
#> [1] ".x[[i]]"

#I was hoping to obtain "df3" instead of  ".x[[i]]"
#I read that purrr::map2() or purrr::imap could solve
#the issue but I am not sure how to set it up
#Any help would be appreciated it



# map2(.x=data_list, 
#      .y=names(data_list), 
#      ~get_dataset_name(data=.x, contains="score_c"),
#      nest(.x, name=.y)
# )


#imap(data_list, get_dataset_name, contains="score_c")

^{由代表 package (v2.0.0) 於 2022 年 9 月 26 日創建}

Answer 1

嘗試獲取數據集名稱的索引，而不是deparse/substitute 。 索引可以是邏輯的或數字的。 如果列表是命名列表，則名稱可用於索引列表，這些值將由第二個 function iget_dataset_name 。

# load packages
suppressPackageStartupMessages({
  library(dplyr)
  library(purrr)
})

# create fictitious datasets
df1 <- tibble(score_a=1:20,
              sex_a=rep(c("M", "F"), 10))
df2 <- tibble(score_b=1:20,
              sex_b=rep(c("M", "F"), 10))
df3 <- tibble(score_c=1:20,
              sex_c=rep(c("M", "F"), 10))

# create two functions that return indices to the
# data sets that contain the wanted name
get_dataset_name <- function(data, contains){
  data_var_names <- colnames(data) 
  contains %in% data_var_names
}
iget_dataset_name <- function(data, col_names, contains){
  data_var_names <- colnames(data) 
  i <- which(contains %in% data_var_names)
  col_names[i]
}

# testing the function
get_dataset_name(data=df3, contains="score_c")
#> [1] TRUE


# creating a list of the all datasets
data_list <- list(df1, df2, df3)

# looping through a list of the dataset to find the dataset
# that includes the given variable

map(data_list, get_dataset_name, contains="score_c")
#> [[1]]
#> [1] FALSE
#> 
#> [[2]]
#> [1] FALSE
#> 
#> [[3]]
#> [1] TRUE

imap(data_list, iget_dataset_name, contains="score_c")
#> [[1]]
#> integer(0)
#> 
#> [[2]]
#> integer(0)
#> 
#> [[3]]
#> [1] 3

# give the list a names attribute
names(data_list) <- c("df1", "df2", "df3")
# retest the functions
map(data_list, get_dataset_name, contains="score_c")
#> $df1
#> [1] FALSE
#> 
#> $df2
#> [1] FALSE
#> 
#> $df3
#> [1] TRUE

imap(data_list, iget_dataset_name, contains="score_c")
#> $df1
#> character(0)
#> 
#> $df2
#> character(0)
#> 
#> $df3
#> [1] "df3"

^{使用reprex v2.0.2創建於 2022-09-27}

使用 purrr::map2() 或 purrr::imap() 提取數據集的名稱

問題描述

1 個解決方案

解決方案1
0 2022-09-27 05:34:10

使用 purrr::map2() 或 purrr::imap() 提取數據集的名稱

問題描述

1 個解決方案

解決方案1 0 2022-09-27 05:34:10

解決方案1
0 2022-09-27 05:34:10