简体   繁体   English

我可以使用 Rstudio 中的循环有效地创建此数据框吗?

[英]Can I create this dataframe efficiently with a loop in Rstudio?

I want to optimize my code.我想优化我的代码。 I have a lot of repetitions and I'm sure this can be done using a loop.我有很多重复,我相信这可以使用循环来完成。

ciecg <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1,na.rm=T),
                    baseline_total_n = table(d$cardinv_ecg1)[1]+table(d$cardinv_ecg1)[2],
                    baseline_pct = mean(d$cardinv_ecg1,na.rm=T)*100,
                    latest_cardinv_n = sum(d$cardinv_ecg2,na.rm=T),
                    latest_total_n = table(d$cardinv_ecg2)[1]+table(d$cardinv_ecg2)[2],
                    latest_pct = mean(d$cardinv_ecg2,na.rm=T)*100); rownames(ciecg) <- "ECG only"; ciecg

ciecghol <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1,na.rm=T),
                       baseline_total_n = table(d$cardinv_ecg1_hol1)[1]+table(d$cardinv_ecg1_hol1)[2],
                       baseline_pct = mean(d$cardinv_ecg1_hol1,na.rm=T)*100,
                       latest_cardinv_n = sum(d$cardinv_ecg2_hol2,na.rm=T),
                       latest_total_n = table(d$cardinv_ecg2_hol2)[1]+table(d$cardinv_ecg2_hol2)[2],
                       latest_pct = mean(d$cardinv_ecg2_hol2,na.rm=T)*100); rownames(ciecghol) <- "ECG + HOL"; ciecghol

ciecgholec <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1_ec1,na.rm=T),
                         baseline_total_n = table(d$cardinv_ecg1_hol1_ec1)[1]+table(d$cardinv_ecg1_hol1_ec1)[2],
                         baseline_pct = mean(d$cardinv_ecg1_hol1_ec1,na.rm=T)*100,
                         latest_cardinv_n = sum(d$cardinv_ecg2_hol2_ec2,na.rm=T),
                         latest_total_n = table(d$cardinv_ecg2_hol2_ec2)[1]+table(d$cardinv_ecg2_hol2_ec2)[2],
                         latest_pct = mean(d$cardinv_ecg2_hol2_ec2,na.rm=T)*100); rownames(ciecgholec) <- "ECG + HOL + TTE"; ciecgholec


round(rbind(ciecg,ciecghol,ciecgholec),1)

If I print the last line in the console I get this:如果我在控制台中打印最后一行,我会得到:

> round(rbind(ciecg,ciecghol,ciecgholec),1)
                baseline_cardinv_n baseline_total_n baseline_pct latest_cardinv_n latest_total_n latest_pct
ECG only                        47              194         24.2               83            169       49.1
ECG + HOL                       61              183         33.3               59            120       49.2
ECG + HOL + TTE                 73              180         40.6               65            113       57.5

I would like to make that same code with a loop to shorten my code.我想用一个循环来制作相同的代码来缩短我的代码。 Here is what I have tried (I only tried to make the ciecg to make it simpler for myself):这是我尝试过的方法(我只是想让 ciecg 对我自己来说更简单):

ci_exam <- data.frame(matrix(nrow = 3,ncol = 0))
exam_1 <- c("cardinv_ecg1","cardinv_hol1","cardinv_ec1")
exam_2 <- c("cardinv_ecg2","cardinv_hol2","cardinv_ec2")
exam_name <- c("ECG only","HOL only","EC only")
for (i in 1:3) {
  ci_exam[exam_name[i]] <- c(sum(d$exam_1[i],na.rm=T),
                             table(d$exam_1[i])[1]+table(d$exam_1[i])[2],
                             mean(d$exam_1[i],na.rm=T)*100,
                             sum(d$exam_2[i],na.rm=T),
                             table(d$exam_2[i])[1]+table(d$exam_2[i])[2],
                             mean(d$exam_2[i],na.rm=T)*100)
}

But gives me this error但是给我这个错误

Error in `[<-.data.frame`(`*tmp*`, exam_name[i], value = c(0, NA, NA,  : 
  erstatning har 6 rækker, data har 3
In addition: Warning messages:
1: In mean.default(d$exam_1[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA
2: In mean.default(d$exam_2[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA

Tried to make a loop.试图做一个循环。 I was expecting to make the entire dataframe in one loop (all ciecg, ciecghol, ciecgholec combined in one, as shown from the command round(rbind(ciecg,ciecghol,ciecgholec),1))我期待在一个循环中制作整个数据帧(所有 ciecg、ciecghol、ciecgholec 合并为一个,如命令 round(rbind(ciecg,ciecghol,ciecgholec),1) 所示)

step 1;步骤1; make your repeated code into a function把你重复的代码变成一个函数

do_thing <- function(x,y,z){
 this_ <-  data.frame(
    baseline_cardinv_n = sum(d[[x]], na.rm = T),
    baseline_total_n = table(d[[x]])[1] + table(d[[x]])[2],
    baseline_pct = mean(d[[x]], na.rm = T) * 100,
    latest_cardinv_n = sum(d[[y]], na.rm = T),
    latest_total_n = table(d[[y]])[1] + table(d[[y]])[2],
    latest_pct = mean(d[[y]], na.rm = T) * 100
  ) ; rownames(this_) <- z; this_
}
#example of use
ciecg <- do_thing("cardinv_ecg1",
                  "cardinv_ecg2",
                  "ECG only")

step 2;第2步; iterate over params and using your function遍历参数并使用你的函数

#note to align with your first sets of examples, I had to
params <- data.frame(exam_1  = c("cardinv_ecg1","cardinv_ecg1_hol1","cardinv_ecg1_hol1_ec1"),
exam_2  = c("cardinv_ecg2","cardinv_ecg1_hol1","cardinv_ecg2_hol2_ec2"),
exam_name = c("ECG only", "ECG + HOL","ECG + HOL + TTE")) 

library(dplyr)

(results_from_params <- params |> rowwise() |> mutate(res = list(do_thing(x=exam_1,
                                                                    y=exam_2,
                                                                    z=exam_name))))

bind_rows(results_from_params$res)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM