简体   繁体   中英

R - How can I make this loop run faster?

The for loop below iterates over nodes in an igraph graph. There are 2048 of these, so it is very slow. I've tried to code as efficiently as possible (for example, by not growing vectors). How can I make the loop run faster?

Edit: I've also thought about writing this in C++ via Rcpp. I just don't know how I would use igraph in that case.

Edit 2: compatible_models actually depends on child_node. What I gave here is an example of what it could be for a particular value of child_node.

library(igraph)
library(Metrics)

set.seed(1234)
N <- 10000
A <- rnorm(N, 10, 2)
B <- rnorm(N, 9, 2)
C <- rnorm(N, 12, 1)
D <- rnorm(N, 7, 3)
Y <- A + B + A*B + D + A^2 + rnorm(N)
data <- data.frame(Y = Y, A = A, B = B, C = C, D = D)
partition <- sort(sample(N, 0.7*N))
data_train <- data[partition, ]
data_test <- data[-partition, ]

g <- make_empty_graph()
g <- g + vertices(1:2049)

generate_edges <- function(start_vertex, end_vertices) {
  edges <- c()
  for (i in 1:length(end_vertices)) {
    edges <- c(edges, start_vertex, end_vertices[i])
  }
  return(edges)
}

outward_edges <- generate_edges(V(g)[1], V(g)[2:vcount(g)])
g <- g + edges(outward_edges, attr1 = rep(0, length(outward_edges) / 2), attr2 = rep(0, length(outward_edges) / 2))


successors <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9) # nrow = number of successors of node 1
i <- 1

for (child_node in 2:2049) {
   # compatible_models <- lapply(...) # suppose this is a list of "formula" objects
   # like: 
   compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
   compatible_models <- lapply(compatible_models, lm, data = data_train)
   predictions <- sapply(compatible_models, predict, newdata = data_test)
   successors[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]), 
                                     sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))

   i <- i + 1

  }




Correct me if i am wrong but i think you could evaluate the first three lines (or any lines that build model objects, but do not evaluate anything) outside of the loop, which ~ triples the performance of the code on my machine:

successors <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9) # nrow = number of successors of node 1

i <- 1

start_time <- Sys.time()
for (child_node in 2:2049) {
# build models inside loop:
  compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
  compatible_models <- lapply(compatible_models, lm, data = data_train)
  predictions <- sapply(compatible_models, predict, newdata = data_test)
  successors[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]), 
                          sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))
  
  i <- i + 1
  
}
Sys.time()-start_time
#Time difference of 26.69914 secs

Optimized code with model creation outside of loop:



## model building:
compatible_models <- list(Y ~ A + B + C, Y ~ I(A^2) + B + C + D, Y ~ B + D)
compatible_models <- lapply(compatible_models, lm, data = data_train)
predictions <- sapply(compatible_models, predict, newdata = data_test)
## initialisation:
successors2 <- matrix(nrow = length(g[[1, ]][[1]]), ncol = 9) 
i <- 1

start_time <- Sys.time()
for (child_node in 2:2049) {

  successors2[i, 1:3] <- c(edge_attr(g, name = "attr1", g[[1, V(g)[child_node], edges = TRUE]]), edge_attr(g, name = "attr2", g[[1, V(g)[child_node], edges = TRUE]]), 
                          sum(apply(predictions, 2, rmse, actual = data_test$Y))/length(compatible_models))
  
  i <- i + 1
  
}
Sys.time()-start_time
#Time difference of 8.885826 secs

all.equal(successors,successors2)
# [1] TRUE

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM