简体   繁体   中英

R Loop Regressions

data=mtcars
data$group = rep(seq(from=1, to=4, by=1), 8)


model1 <- glm(vs ~ mpg + cyl + disp + hp, data = subset(data, group == 1), family = "binomial")
model2 <- glm(vs ~ mpg + cyl + disp + hp, data = subset(data, group == 2), family = "binomial")
model3 <- glm(vs ~ mpg + cyl + disp + hp, data = subset(data, group == 3), family = "binomial")
model4 <- glm(vs ~ mpg + cyl + disp + hp, data = subset(data, group == 4), family = "binomial")

model5 <- glm(am ~ mpg + cyl + disp + hp, data = subset(data, group == 1), family = "binomial")
model6 <- glm(am ~ mpg + cyl + disp + hp, data = subset(data, group == 2), family = "binomial")
model7 <- glm(am ~ mpg + cyl + disp + hp, data = subset(data, group == 3), family = "binomial")
model8 <- glm(am ~ mpg + cyl + disp + hp, data = subset(data, group == 4), family = "binomial")

Say you want to estimate a bunch of stratified models that are identical in every way except the stratified group (models 1-4) and also that you want to repeat this series of models for different outcomes (models 5-8).

That is what I have for the code above. However, is there a more efficient way to run this in terms of it not taking up as many lines of code? For example to specify the covariates, outcomes, and groups, and then loop over them?

You can for instance use data.table to run the model fitting by group, eg:

library(data.table)
dt = as.data.table(data)

models = dt[, .(fit_vs = list(glm(vs ~ mpg + cyl + disp + hp, family = "binomial")),
                fit_am = list(glm(am ~ mpg + cyl + disp + hp, family = "binomial"))), 
            by = .(group)]

The result is then:

print(models)
#    group fit_vs fit_am
# 1:     2  <glm>  <glm>
# 2:     1  <glm>  <glm>
# 3:     3  <glm>  <glm>
# 4:     4  <glm>  <glm>

You can access the fit for vs and group 3 using:

models[group == "3", fit_vs]
# [[1]]
# 
# Call:  glm(formula = vs ~ mpg + cyl + disp + hp, family = "binomial")
# 
# Coefficients:
#   (Intercept)          mpg          cyl         disp           hp  
# 180.970664    -0.384760   -24.366394    -0.008435    -0.010799  
# 
# Degrees of Freedom: 9 Total (i.e. Null);  5 Residual
# Null Deviance:        13.46 
# Residual Deviance: 3.967e-10  AIC: 10

First of all, seq(from=1, to=4, length=T) returns 1 , so your code only creates 1 group. I thus modified your code as follows.

data=mtcars
data$group = rep(1:4, each = 8)

We can use the functions to apply glm to each combination as follows.

library(tidyverse)

data2 <- data %>%
  gather(Y, Value, vs, am) %>%
  group_split(Y, group) %>%
  set_names(nm = map_chr(., ~str_c(unique(.x$Y), unique(.x$group), sep = "-"))) %>%
  map(~glm(Value ~ mpg + cyl + disp + hp, data = .x, family = "binomial"))

We can access the result by names

data2[["am-1"]]
# Call:  glm(formula = Value ~ mpg + cyl + disp + hp, family = "binomial", 
#            data = .x)
# 
# Coefficients:
# (Intercept)          mpg          cyl         disp           hp  
#     4.9180      -0.5335      17.2521      -0.7975       0.5192  
# 
# Degrees of Freedom: 7 Total (i.e. Null);  3 Residual
# Null Deviance:        10.59 
# Residual Deviance: 2.266e-10  AIC: 10

data3 <- data %>%
  gather(Y, Value, vs, am) %>%
  group_by(Y, group) %>%
  nest() %>%
  mutate(Model = map(data, ~glm(Value ~ mpg + cyl + disp + hp, data = .x, family = "binomial")))
data3
# # A tibble: 8 x 4
# # Groups:   group, Y [8]
#   group Y                data Model 
#   <int> <chr> <list<df[,10]>> <list>
# 1     1 vs           [8 x 10] <glm> 
# 2     2 vs           [8 x 10] <glm> 
# 3     3 vs           [8 x 10] <glm> 
# 4     4 vs           [8 x 10] <glm> 
# 5     1 am           [8 x 10] <glm> 
# 6     2 am           [8 x 10] <glm> 
# 7     3 am           [8 x 10] <glm> 
# 8     4 am           [8 x 10] <glm> 

data3 %>%
  filter(group == 1, Y == "am") %>%
  pull(Model)
# [[1]]
# 
# Call:  glm(formula = Value ~ mpg + cyl + disp + hp, family = "binomial", 
#            data = .x)
# 
# Coefficients:
# (Intercept)          mpg          cyl         disp           hp  
#      4.9180      -0.5335      17.2521      -0.7975       0.5192  
# 
# Degrees of Freedom: 7 Total (i.e. Null);  3 Residual
# Null Deviance:        10.59 
# Residual Deviance: 2.266e-10  AIC: 10

You can extract the information with mutate and map , like below.

data4 <- data3 %>% mutate(Coef = map(Model, coef))

data4 %>%
  filter(group == 1, Y == "am") %>%
  pull(Coef)
# [[1]]
# (Intercept)         mpg         cyl        disp          hp 
#   4.9179574  -0.5334823  17.2520829  -0.7974839   0.5191961 

Or use the functions from the broom package.

library(broom)

data5 <- data3 %>%
  mutate(Info = map(Model, tidy)) %>%
  select(-Model, -data) %>%
  unnest(cols = "Info")
data5
# # A tibble: 40 x 7
# # Groups:   group, Y [8]
#   group Y     term        estimate std.error  statistic p.value
#    <int> <chr> <chr>          <dbl>     <dbl>      <dbl>   <dbl>
#  1     1 vs    (Intercept)  397.     4682905.  0.0000849   1.000
#  2     1 vs    mpg           -8.95    176775. -0.0000507   1.000
#  3     1 vs    cyl          -41.9     141996. -0.000295    1.000
#  4     1 vs    disp           0.525     1510.  0.000348    1.000
#  5     1 vs    hp            -0.610     8647. -0.0000705   1.000
#  6     2 vs    (Intercept)  126.     2034044.  0.0000619   1.000
#  7     2 vs    mpg           -0.965    69501. -0.0000139   1.000
#  8     2 vs    cyl           25.6     398854.  0.0000642   1.000
#  9     2 vs    disp           0.266     3917.  0.0000680   1.000
# 10     2 vs    hp            -2.29     19162. -0.000120    1.000
# # ... with 30 more rows

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM