繁体   English   中英

使用列表通过一组代码运行多个统计测试

[英]Using lists to run multiple statistics test with one set of code

我有兴趣使用列表通过一组代码运行多个统计测试。

例如,我想根据数据框/列表中的行运行在 DV、IV、数据和族方面有所不同的 glm() 测试。 我可以长期这样做,我可以使用 lapply() 来做到这一点“中等方式”,这样我就可以更改测试中使用的 DV。 但我想知道是否有一种方法{最好使用 lapply()} 以更少的代码和更自动化/迭代的方式完成这项任务。

对于示例数据,我使用 ggplot2::diamonds 数据和以下代码创建了 2 个数据集:

### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)

### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)

然后我使用这些数据创建了一个包含列表信息的数据框,并得到了以下结果:

## creates df with variable info
model_variable_df <-
  data.frame(
    cbind(
      DV_name = c("carat", "depth", "price"),
      DV_label = c("carat size", "depth size", "diamond price"),
      dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
      IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
      family = c("poisson", "poisson", "gaussian")
    )
  )

> model_variable_df
  DV_name      DV_label       dataset_name         IV_name   family
1   carat    carat size diamonds_bottom300   x + y + color  poisson
2   depth    depth size diamonds_bottom300 x + y + clarity  poisson
3   price diamond price    diamonds_top300   x + z + color gaussian

我可以使用 long 方法完成我的任务:

## long form of 3 models

### creates first model
freq_glm_poisson_carat <- 
  (
  glm(
    carat ~ x + y + color,
             data = diamonds_bottom300, 
             family = poisson()
           )
  )

### creates 2nd model
freq_glm_binomial_depth <- 
  glm(
    depth ~ x + y + clarity,
    data = diamonds_bottom300, 
    family= poisson()
  )

### creates 3rd model
freq_glm_gaussian_price <- 
  glm(
    price ~ x + z + color,
    data = diamonds_top300, 
    family= gaussian()
  )

我还可以将 medium 方法用于更具体和有限的基于DV_name的任务。

## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  lapply(model_variable_df$DV_name,
         function(DV_list) wrapr::let(
           c(DV_col = DV_list, 
             dataset_obj = "diamonds_top300"),
           glm(
             DV_col ~ x + y + color, 
             data = dataset_obj,
             family = poisson()
             )
         )
  )
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter", 
                                                                       model_variable_df$DV_name,
                                                                       sep = "__")
  )
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")

有没有什么方法可以使用(1)更少的代码和(2)更多的迭代/自动化来完成这项任务? 非常感谢任何和所有帮助。

仅供参考,我在 2013 Intel Macbook Pro 上使用 RStudio。

谢谢。



用于练习的代码:

# sets up data

## Loads packages
# ---- NOTE: making plots and diamonds dataset
if(!require(ggplot2)){install.packages("ggplot2")}
# ---- NOTE: run mixed effects models
if(!require(lme4)){install.packages("lme4")}
# ---- NOTE: for data wrangling
if(!require(dplyr)){install.packages("dplyr")}
# ---- NOTE: for iteration
if(!require(wrapr)){install.packages("wrapr")}

## dataset creation

### for dataset with top 300 rows
# ---- NOTE: selects only the top 300 rows of the dataset
diamonds_top300 <- data.frame(dplyr::top_n(diamonds, 300, table))
# ---- NOTE: gives dataset info
head(diamonds_top300)
str(diamonds_top300)
colnames(diamonds_top300)
nrow(diamonds_top300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_top300$price)
unique(diamonds_top300$y)
unique(diamonds_top300$cut)
unique(diamonds_top300$color)
unique(diamonds_top300$carat)
unique(diamonds_top300$clarity)
unique(diamonds_top300$depth)
unique(diamonds_top300$table)

### for dataset with bottom 300 rows
# ---- NOTE: selects only the bottom 300 rows of the dataset
diamonds_bottom300 <- data.frame(dplyr::top_n(diamonds, -300, table))
# ---- NOTE: gives dataset info
head(diamonds_bottom300)
str(diamonds_bottom300)
colnames(diamonds_bottom300)
nrow(diamonds_bottom300)
# ---- NOTE: gives unique values of Fixed and Random effects, and dvs
unique(diamonds_bottom300$price)
unique(diamonds_bottom300$y)
unique(diamonds_bottom300$cut)
unique(diamonds_bottom300$color)
unique(diamonds_bottom300$carat)
unique(diamonds_bottom300$clarity)
unique(diamonds_bottom300$depth)
unique(diamonds_bottom300$table)

## creates df with variable info
model_variable_df <-
  data.frame(
    cbind(
      DV_name = c("carat", "depth", "price"),
      DV_label = c("carat size", "depth size", "diamond price"),
      dataset_name = c("diamonds_bottom300", "diamonds_bottom300", "diamonds_top300"),
      IV_name = c("x + y + color", "x + y + clarity", "x + z + color"),
      family = c("poisson", "poisson", "gaussian")
    )
  )

## long for of 3 models

### creates first model
freq_glm_poisson_carat <- 
  (
  glm(
    carat ~ x + y + color,
             data = diamonds_bottom300, 
             family = poisson()
           )
  )

### creates 2nd model
freq_glm_binomial_depth <- 
  glm(
    depth ~ x + y + clarity,
    data = diamonds_bottom300, 
    family= poisson()
  )

### creates 3rd model
freq_glm_gaussian_price <- 
  glm(
    price ~ x + z + color,
    data = diamonds_top300, 
    family= gaussian()
  )

## model that uses lapply, and just values DV
# ---- NOTE: DV_name is the only thing that changes
# ---- NOTE: IVs / effects (fixed and random) are the same as x + y + color, or model 1
# ---- NOTE: data = diamonds_top300 only
# ---- NOTE: family = poisson() only
# ---- NOTE: creates list object
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  lapply(model_variable_df$DV_name,
         function(DV_list) wrapr::let(
           c(DV_col = DV_list, 
             dataset_obj = "diamonds_top300"),
           glm(
             DV_col ~ x + y + color, 
             data = dataset_obj,
             family = poisson()
             )
         )
  )
# ---- NOTE: changes list object name
freq_checking_mlm_poisson_Effects_x_y_color_1z_model <- 
  setNames(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, paste("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter", 
                                                                       model_variable_df$DV_name,
                                                                       sep = "__")
  )
# ---- NOTE: creates unique objects for each part list object
list2env(freq_checking_mlm_poisson_Effects_x_y_color_1z_model, .GlobalEnv)
# ---- NOTE: gathers objects with prefix
apropos("freq_checking_mlm_poisson_Effects_x_y_color_1z_model_contracts_filter")

您可以使用lapply执行此操作:

lapply(seq(nrow(model_variable_df)), function(i) {
  val <- model_variable_df[i, ]
  glm(as.formula(paste(val$DV_name, val$IV_name, sep = '~')), 
      data = get(val$dataset_name), family = val$family)
}) -> model_list

as.formula用于将字符串转换为公式, get用于从字符串值中获取数据集。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM