簡體   English   中英

通過R中的CSV文件列表循環或批處理

[英]Loop or batch process through a list of CSV files in R

我想遍歷CSV文件列表:

Macro <- read.csv("P:/R/R_Input/JWN_Input.csv")
Macro <- read.csv("P:/R/R_Input/BBY_Input.csv")
...

這也將輸出到相應的CSV文件:

write.csv(a, "P:/Model_Output/JWN.csv", row.names = F, na="")
write.csv(a, "P:/Model_Output/BBY.csv", row.names = F, na="")
...

以上兩項是唯一的唯一輸入/輸出。 代碼的主體在下面。 我試圖使用下面的代碼主體來批量處理輸入/輸出CSV文件。

Macro <- read.csv("P:/R/R_Input/JWN_Input.csv")

# train set up
ctrl <- caret::trainControl(method = "timeslice", initialWindow = 8, horizon = 1, 
                            fixedWindow = FALSE, savePredictions = TRUE)

# Loads all variable names from Macro and Macro2
vars_macro = names(Macro)[!names(Macro) %in% c("qtrs", "y", "s1", "s2", "s3")] 
vars_macro2 = names(Macro2)[!names(Macro2) %in% c("y", "s1", "s2", "s3")]
vars_macro3 = names(Macro3)[!names(Macro3) %in% c("y", "s1", "s2", "s3")]
vars = c(vars_macro, vars_macro2, vars_macro3)

# run lm
lst = foreach(var = vars) %dopar% {

  if (var %in% vars_macro)  
    foo <- function(start, mod_formula) {
      myfit <- caret::train(mod_formula, data = Macro[start:14, ,drop = FALSE],
                            method = "lm", trControl = ctrl)
      c(myfit$pred)  ## return; drop dimension as a vector
    }
  if (var %in% vars_macro2)
    foo <- function(start, mod_formula) {
      myfit <- caret::train(mod_formula, data = Macro2[start:14, ,drop = FALSE],
                            method = "lm", trControl = ctrl)
      c(myfit$pred)  ## return; drop dimension as a vector
    }
  if (var %in% vars_macro3)
    foo <- function(start, mod_formula) {
      myfit <- caret::train(mod_formula, data = Macro3[start:14, ,drop = FALSE],
                            method = "lm", trControl = ctrl)
      c(myfit$pred)  ## return; drop dimension as a vector
    } 
  f = formula(paste0("y ~ ", var,  "+ s1 + s2 + s3"))
  Forecast <- sapply(1:6, foo, mod_formula = f)  

  F9 <- c(Forecast[[1,1]][1])
  F10 <- c(Forecast[[1,1]][2], Forecast[[1,2]][1])
  F11 <- c(Forecast[[1,1]][3], Forecast[[1,2]][2], Forecast[[1,3]][1])
  F12 <- c(Forecast[[1,1]][4], Forecast[[1,2]][3], Forecast[[1,3]][2],
           Forecast[[1,4]][1])
  F13 <- c(Forecast[[1,1]][5], Forecast[[1,2]][4], Forecast[[1,3]][3],
           Forecast[[1,4]][2], Forecast[[1,5]][1])
  F14 <- c(Forecast[[1,1]][6], Forecast[[1,2]][5], Forecast[[1,3]][4],
           Forecast[[1,4]][3], Forecast[[1,5]][2], Forecast[[1,6]][1])

  A <-c((mean(F9)/Macro[9:9,2:2]-1), (mean(F10)/Macro[10:10,2:2]-1), 
        (mean(F11)/Macro[11:11,2:2]-1), (mean(F12)/Macro[12:12,2:2]-1), 
        (mean(F13)/Macro[13:13,2:2]-1),(mean(F14)/Macro[14:14,2:2]-1))
  Temp <- mean(abs(A[0:5]))
  P <-c((mean(F9)/Macro[9:9,2:2]-1), (mean(F10)/Macro[10:10,2:2]-1), 
        (mean(F11)/Macro[11:11,2:2]-1), (mean(F12)/Macro[12:12,2:2]-1), 
        (mean(F13)/Macro[13:13,2:2]-1),(mean(F14)/Macro[14:14,2:2]-1),
        Temp,(mean(F14)/(1+mean(A[3:5])))/Macro[14:14,2:2]-1)
  #E <- scales::percent(P)
  C <- c(mean(F9),mean(F10),mean(F11), mean(F12), mean(F13), mean(F14),
         "abs error",mean(F14)/(1+mean(P[3:5])))
  data.frame(C, P)
}

# Summary            
model_error = as.character(sapply(lst, function(elt) elt$P[7]))
forecasts = as.numeric(as.character(sapply(lst, function(elt) elt$C[8])))
delta = as.character(sapply(lst, function(elt) elt$P[8]))
df = data.frame(Card = vars, Model_Avg_Error = model_error, 
                Forecast = forecasts,  Delta = delta)
df$blankVar = NA

df_macro1 = df[df$Card %in% vars_macro,]
df_macro1$blankVar = NA
df_macro2 = df[df$Card %in% vars_macro2,]
df_macro2 = df_macro2[order(df_macro2$Model_Avg_Error),]
df_macro2$blankVar = NA
df_macro3 = df[df$Card %in% vars_macro3,]
df_macro3 = df_macro3[order(df_macro3$Model_Avg_Error),]
df_macro3$blankVar = NA
df_macro4 = df[df$Card %in% names(Macro4),]
df_macro4 = df_macro4[order(df_macro4$Model_Avg_Error),]

df = df[order(df$Model_Avg_Error),]
a = cbind.fill(df_macro1, df_macro2, df_macro3, df, df_macro4)
# save
write.csv(a, "P:/Model_Output/JWN.csv", row.names = F, na="")

只需創建文件名列表並像這樣遍歷它們即可:

Files = c("JWN", "BBY")

for(f in Files) {
    InFile  = paste("P:/R/R_Input/", f, "_Input.csv",  sep="")
    OutFile = paste("P:/R/R_Input/", f, "_Output.csv", sep="")
    Macro <- read.csv(InFile)

      ## All of that other code 

      write.csv(a, OutFile, row.names = F, na="")
}

基於評論的附錄:原始海報出現錯誤:

文件錯誤(con,“ w”):所有連接都在使用中”

通過在write.csv語句之后立即添加closeAllConnections()解決此問題。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM