简体   繁体   中英

Nested loop for regression over several columns in R

I've got two data frames. One with my data:

test <- structure(list(IDcount = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), year = c(1, 
2, 3, 4, 5, 1, 2, 3, 4, 5), Otminus1 = c(-0.28, -0.28, -0.44, 
-0.27, 0.23, -0.03, -0.06, -0.04, 0, 0.02), N.1 = c(-0.76, -0.1, 
0.01, 0.1, -0.04, -0.04, -0.04, -0.04, -0.05, -0.05), N.2 = c(NA, 
-0.86, -0.09, 0.11, 0.06, -0.05, -0.08, -0.08, -0.09, -0.09), 
    N.3 = c(NA, NA, -0.85, 0.01, 0.07, -0.04, -0.09, -0.12, -0.13, 
    -0.13)), row.names = c(NA, -10L), groups = structure(list(
    IDcount = c(1, 2), .rows = structure(list(1:5, 6:10), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

and one to catch my regression results:

results <- structure(list(IDcount = c(1, 2), N.1 = c(NA, NA), N.2 = c(NA, 
NA), N.3 = c(NA, NA), N.4 = c(NA, NA), N.5 = c(NA, NA)), row.names = c(NA, 
-2L), class = "data.frame")

I'm running a regression across my data where every company is being assigned a coefficient with the following code:

betas <- matrix(nrow=2, ncol=2)

colnames(betas) <- c("Intercept", "beta")
  
    for (i in 1:2) {
    betas[i,] <- coef(lm(Otminus1~N.1, test[test$IDcount==i,]))
    }
  
    betas <- data.frame(betas)
    
    results$N.1 <- betas$beta

I now would like to nest this loop inside a loop so the column used in the regression and in the results data frame moves from 1 to 3. Using the loop should result in values in the columns N.1 to N.5 in the data frame results. Here's my false approach to this:

for (j in 1:5) {
    
    for (i in 1:2) {
    betas[i,] <- coef(lm(Otminus1~N.j, test[test$IDcount==i,]))
    }
  
    betas <- data.frame(betas)
   
    results$N.j <- betas$beta
  }

But this loop can't identify the j in Nj as one of the for loop variables.

Try this code by iterating j over the Nj column names:

library(dplyr)
library(stringr)

index <- colnames(test) %>% str_which("N.")

for (j in colnames(test)[index]) {
  
  for (i in 1:2) {
    betas[i,] <- coef(lm(Otminus1~., test[test$IDcount==i, c("Otminus1", j)]))
  }
  
  betas <- data.frame(betas)
  
  results[[j]] <- betas$beta
}

You could use dplyr and tidyr and get rid of for loops.

library(dplyr) #dplyr > 1.0.0
library(tidyr)

test %>%
  pivot_longer(cols = starts_with('N')) %>%
  group_by(IDcount, name) %>%
  summarise(value = coef(lm(Otminus1~value, cur_data()))) %>%
  slice(2L) %>%
  pivot_wider()

#  IDcount     N.1    N.2    N.3
#    <dbl>   <dbl>  <dbl>  <dbl>
#1       1  0.0756  0.190  0.499
#2       2 -5.33   -0.815 -0.412

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM