简体   繁体   中英

Can I create this dataframe efficiently with a loop in Rstudio?

I want to optimize my code. I have a lot of repetitions and I'm sure this can be done using a loop.

ciecg <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1,na.rm=T),
                    baseline_total_n = table(d$cardinv_ecg1)[1]+table(d$cardinv_ecg1)[2],
                    baseline_pct = mean(d$cardinv_ecg1,na.rm=T)*100,
                    latest_cardinv_n = sum(d$cardinv_ecg2,na.rm=T),
                    latest_total_n = table(d$cardinv_ecg2)[1]+table(d$cardinv_ecg2)[2],
                    latest_pct = mean(d$cardinv_ecg2,na.rm=T)*100); rownames(ciecg) <- "ECG only"; ciecg

ciecghol <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1,na.rm=T),
                       baseline_total_n = table(d$cardinv_ecg1_hol1)[1]+table(d$cardinv_ecg1_hol1)[2],
                       baseline_pct = mean(d$cardinv_ecg1_hol1,na.rm=T)*100,
                       latest_cardinv_n = sum(d$cardinv_ecg2_hol2,na.rm=T),
                       latest_total_n = table(d$cardinv_ecg2_hol2)[1]+table(d$cardinv_ecg2_hol2)[2],
                       latest_pct = mean(d$cardinv_ecg2_hol2,na.rm=T)*100); rownames(ciecghol) <- "ECG + HOL"; ciecghol

ciecgholec <- data.frame(baseline_cardinv_n = sum(d$cardinv_ecg1_hol1_ec1,na.rm=T),
                         baseline_total_n = table(d$cardinv_ecg1_hol1_ec1)[1]+table(d$cardinv_ecg1_hol1_ec1)[2],
                         baseline_pct = mean(d$cardinv_ecg1_hol1_ec1,na.rm=T)*100,
                         latest_cardinv_n = sum(d$cardinv_ecg2_hol2_ec2,na.rm=T),
                         latest_total_n = table(d$cardinv_ecg2_hol2_ec2)[1]+table(d$cardinv_ecg2_hol2_ec2)[2],
                         latest_pct = mean(d$cardinv_ecg2_hol2_ec2,na.rm=T)*100); rownames(ciecgholec) <- "ECG + HOL + TTE"; ciecgholec


round(rbind(ciecg,ciecghol,ciecgholec),1)

If I print the last line in the console I get this:

> round(rbind(ciecg,ciecghol,ciecgholec),1)
                baseline_cardinv_n baseline_total_n baseline_pct latest_cardinv_n latest_total_n latest_pct
ECG only                        47              194         24.2               83            169       49.1
ECG + HOL                       61              183         33.3               59            120       49.2
ECG + HOL + TTE                 73              180         40.6               65            113       57.5

I would like to make that same code with a loop to shorten my code. Here is what I have tried (I only tried to make the ciecg to make it simpler for myself):

ci_exam <- data.frame(matrix(nrow = 3,ncol = 0))
exam_1 <- c("cardinv_ecg1","cardinv_hol1","cardinv_ec1")
exam_2 <- c("cardinv_ecg2","cardinv_hol2","cardinv_ec2")
exam_name <- c("ECG only","HOL only","EC only")
for (i in 1:3) {
  ci_exam[exam_name[i]] <- c(sum(d$exam_1[i],na.rm=T),
                             table(d$exam_1[i])[1]+table(d$exam_1[i])[2],
                             mean(d$exam_1[i],na.rm=T)*100,
                             sum(d$exam_2[i],na.rm=T),
                             table(d$exam_2[i])[1]+table(d$exam_2[i])[2],
                             mean(d$exam_2[i],na.rm=T)*100)
}

But gives me this error

Error in `[<-.data.frame`(`*tmp*`, exam_name[i], value = c(0, NA, NA,  : 
  erstatning har 6 rækker, data har 3
In addition: Warning messages:
1: In mean.default(d$exam_1[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA
2: In mean.default(d$exam_2[i], na.rm = T) :
  argument er ikke numerisk eller logisk: returnerer NA

Tried to make a loop. I was expecting to make the entire dataframe in one loop (all ciecg, ciecghol, ciecgholec combined in one, as shown from the command round(rbind(ciecg,ciecghol,ciecgholec),1))

step 1; make your repeated code into a function

do_thing <- function(x,y,z){
 this_ <-  data.frame(
    baseline_cardinv_n = sum(d[[x]], na.rm = T),
    baseline_total_n = table(d[[x]])[1] + table(d[[x]])[2],
    baseline_pct = mean(d[[x]], na.rm = T) * 100,
    latest_cardinv_n = sum(d[[y]], na.rm = T),
    latest_total_n = table(d[[y]])[1] + table(d[[y]])[2],
    latest_pct = mean(d[[y]], na.rm = T) * 100
  ) ; rownames(this_) <- z; this_
}
#example of use
ciecg <- do_thing("cardinv_ecg1",
                  "cardinv_ecg2",
                  "ECG only")

step 2; iterate over params and using your function

#note to align with your first sets of examples, I had to
params <- data.frame(exam_1  = c("cardinv_ecg1","cardinv_ecg1_hol1","cardinv_ecg1_hol1_ec1"),
exam_2  = c("cardinv_ecg2","cardinv_ecg1_hol1","cardinv_ecg2_hol2_ec2"),
exam_name = c("ECG only", "ECG + HOL","ECG + HOL + TTE")) 

library(dplyr)

(results_from_params <- params |> rowwise() |> mutate(res = list(do_thing(x=exam_1,
                                                                    y=exam_2,
                                                                    z=exam_name))))

bind_rows(results_from_params$res)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM