简体   繁体   English

如何在 R 中为这个嵌套的 for 循环编写更有效的代码?

[英]how to write a more efficient code for this nested for-loop in R?

Trying to calculate a combination of p1 (event in experiment group) and p0 (event in control group) with or (odds ratio) of 1.5 .尝试计算p1 (实验组中的事件)和p0 (对照组中的事件)的组合, or (优势比)为1.5 nnt = Number needed to treat (100/( p1 - p0 )) nnt = 需要治疗的数量 (100/( p1 - p0 ))

library(tidyverse)

p1 <- seq(0,1, 0.0001)
p0 <- seq(0,1,0.0001)
or <- 1.5

df <- tibble(p1 = as.numeric(), p0 = as.numeric(), nnt = as.numeric())

for (i in p1) {
for (j in p0) {
  or_formula <- round((i/(1-i))/(j/(1-j)),3)
  
  if (or_formula == or & !is.na(or_formula)) {
    df <- df %>% add_row(p1 = i, p0 = j, nnt = round(1/(i-j), digits = 0))
  }
  
}
}

We could use outer我们可以使用outer

or_formula <- function(i, j) round((i/(1-i))/(j/(1-j)), 3)
m1 <- outer(p1, p0, FUN = or_formula)
dim(m1)
#[1] 10001 10001
i1 <- m1 == or & !is.na(m1)
i2 <- which(i1, arr.ind = TRUE)
p1new <- p1[i2[,1]]
p0new <- p0[i2[,2]]
df1 <- tibble(p1 = p1new, p0 = p0new, nnt = round(1/(p1new-p0new), digits = 0))

Benchmarks基准

-using outer - 使用外部

system.time({
  m1 <- outer(p1, p0, FUN = or_formula)
  i1 <- m1 == or & !is.na(m1)
  i2 <- which(i1, arr.ind = TRUE)
  p1new <- p1[i2[,1]]
  p0new <- p0[i2[,2]]
  df1 <- tibble(p1 = p1new, p0 = p0new, nnt = round(1/(p1new-p0new), digits = 0))
 
 
 })
#   user  system elapsed 
#  5.038   1.288   6.319 

-using OP's for loop - 使用 OP 的 for 循环

system.time({
  df <- tibble(p1 = as.numeric(), p0 = as.numeric(), nnt = as.numeric())
  
  for (i in p1) {
   for (j in p0) {
   or_formula <- round((i/(1-i))/(j/(1-j)),3)
   
   if (or_formula == or & !is.na(or_formula)) {
     df <- df %>% add_row(p1 = i, p0 = j, nnt = round(1/(i-j), digits = 0))
   }
   
 }
 }
 
 
 })
#   user  system elapsed 
#122.391   0.748 123.128 

-testing the equality - 测试相等性

identical(df, df1)
#[1] TRUE

Here is another base R option with expand.grid + subset (but not as fast as @akurn's outer solution )这是另一个带有expand.grid + subset的基本 R 选项(但不如@akurn 的outer解决方案快)

na.omit(
  subset(
    transform(
      expand.grid(p1 = p1, p0 = p0),
      nnt = round(1 / (p1 - p0), 0),
    ),
    round((p1 / (1 - p1)) / (p0 / (1 - p0)), 3) == or
  )
)

A dplyr/purrr approach: dplyr/purrr 方法:

library(tidyverse)

p1 <- seq(0,1, 0.0001)
p0 <- seq(0,1,0.0001)
or <- 1.5


test <- map_df(p1, function(p1_element){
            map( p1_element,
                 ~list( 
                     p_0 = p0,
                     p_1 = p1_element,
                     or_formula = (. / (1-.)) / (p0 / (1 - p0)) ) ) }) %>%
        mutate(or_formula = round(or_formula, 3)) %>%
        filter(!is.na(or_formula) & or_formula == or) %>% 
        mutate(nnt = round(1 / (p_1 - p_0), digits =  0))

test %>% select(-or_formula)    
 

## 6.531 sec elapsed 
## original for loop : 131.454 sec elapsed

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM