简体   繁体   中英

Efficiently looping through rows and columns with if statementes

I have two vectors of column names. Each vector has the same length and the same name except for a single number.

columns <- paste0("q1h10_",1:9) # Filter columns
columns2 <- paste0("q1h4_", 1:9) # Columns where number is at

I'd like to loop through the rows of a data frame and through the columns in vector one and whenever there is a 5, find the equivalent column in the second vector and bring the number in it. I have other conditions to bring the number which you can see in the code below.

value <- vector("numeric", nrow(psid))

for (i in 1:nrow(psid)) {
for (x in 1:length(columns)) {
    if (5 %in% psid[i, columns[x]] & all(as.numeric(psid[i, columns2[x]]) != 97:99)) {
        value[i] <- substr(columns[x], 7, 8)
    } else if (5 %in% psid[i, columns[x]] & psid[i, columns2[x]] %in% 97:99 & x != 1) {
        value[i] <- substr(columns[x - 1], 7, 8)
    } else if (5 %in% psid[i, columns[x]] & psid[i, columns2[x]] %in% 97:99 & x == 1) {
        value[i] <- 0
    }
  }
}
value
[1] "1" "2" "2" "3" "4" "0" "0" "0" "1" "0" "3" "1" "3" "2" "3" "0" "1" "3" "1" "1" "2"
[22] "2" "2" "1" "2" "3" "1" "1" "0" "1"

I managed to get my desired result but I find this is a slow way of achieving it. I've tried 'traducing' this to the apply family but I'm not entirely an expert and I can't manage to do it correctly. If anyone has a faster, perhaps more readable solution, any help would be appreciated.

Data :

  psid <- structure(list(q1h10_1 = c(5L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 5L, 
0L, 1L, 5L, 1L, 1L, 1L, 0L, 5L, 1L, 5L, 5L, 1L, 1L, 1L, 5L, 1L, 
1L, 5L, 1L, 0L, 5L), q1h10_2 = c(0L, 5L, 5L, 1L, 1L, 0L, 0L, 
0L, 0L, 0L, 1L, 0L, 1L, 5L, 1L, 0L, 0L, 1L, 0L, 0L, 5L, 5L, 5L, 
0L, 5L, 1L, 0L, 5L, 0L, 0L), q1h10_3 = c(0L, 0L, 0L, 5L, 1L, 
0L, 0L, 0L, 0L, 0L, 5L, 0L, 5L, 0L, 5L, 0L, 0L, 5L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L), q1h10_4 = c(0L, 0L, 0L, 
0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h10_5 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h10_6 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h10_7 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h10_8 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h10_9 = c(0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), q1h4_1 = c(1, 
2, 6, 4, 4, 0, 0, 0, 4, 0, 3, 4, 4, 4, 1, 0, 4, 4, 3, 1, 4, 6, 
4, 4, 4, 4, 4, 3, 0, 6), q1h4_2 = c(0, 3, 4, 3, 3, 0, 0, 0, 0, 
0, 2, 0, 6, 4, 6, 0, 0, 4, 0, 0, 6, 4, 4, 0, 6, 6, 0, 97, 0, 
0), q1h4_3 = c(0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 6, 0, 6, 0, 6, 0, 
0, 6, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0), q1h4_4 = c(0, 0, 0, 
0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0), q1h4_5 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), q1h4_6 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0), q1h4_7 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), q1h4_8 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), q1h4_9 = c(0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0)), class = "data.frame", .Names = c("q1h10_1", 
"q1h10_2", "q1h10_3", "q1h10_4", "q1h10_5", "q1h10_6", "q1h10_7", 
"q1h10_8", "q1h10_9", "q1h4_1", "q1h4_2", "q1h4_3", "q1h4_4", 
"q1h4_5", "q1h4_6", "q1h4_7", "q1h4_8", "q1h4_9"), row.names = c(NA, 
-30L))

We can execute your logic more efficiently using:

value <- as.character(rep(0,nrow(psid)))

cond1 <- 5 == psid[,columns]
cond2 <- matrix(as.matrix(psid[,columns2]) %in% (97:99), nrow(psid))
ind1 <- which(cond1 & !cond2, arr.ind=TRUE)
if (length(ind1) > 0) value[ind1[,1]] <- substr(columns[ind1[,2]],7,8)
ind2 <- which(cond1 & cond2, arr.ind=TRUE)
ind2 <- matrix(ind2[ind2[,2]!=1,],ncol=2)
if (length(ind2) > 0) value[ind2[,1]] <- substr(columns[ind2[,2]-1],7,8)

We first compute two bool "matrices":

  1. cond1 is TRUE at row i and column x if psid[i, columns[x]] == 5 ; FALSE otherwise. This computation is performed vectorized over all rows and all columns in columns of psid .
  2. cond2 is TRUE at row i and column x if psid[i, columns2[x]] is either 97 , 98 , or 99 . Here, we convert psid[,columns2] to a matrix and then evaluate whether its elements are %in% (97:99) . The result is then reshaped back to a matrix with the original dimensions.

The condition cond1 & !cond2 is the condition in your first if . Once we have this evaluated for all rows and all columns in psid , we can use which with arr.ind=TRUE to retrieve the row and column indices for which this condition is TRUE . We then subset using these rows and columns to set value accordingly.

For the condition in your first elseif , we further subset the result from which(cond1 & cond2, arr.ind=TRUE) by keeping only those columns (or x ) which are not 1 . Again, we then subset using these rows and columns to set value accordingly.

For the complement of these two conditions, we leave value as "0" . Note that which can return an empty result (ie, length(0) ), and we must check that condition as an edge case.

The result using your data is as expected:

print(value)
## [1] "1" "2" "2" "3" "4" "0" "0" "0" "1" "0" "3" "1" "3" "2" "3" "0" "1" "3" "1" "1" "2" "2"
##[23] "2" "1" "2" "3" "1" "1" "0" "1"

I think the following code is working for your first condition (you can easily adapt it for the two others)

ind=apply(psid,1,function(x) return(which(x[columns] %in% 5 & all(as.numeric(x[columns2]) !=97:99))))
values2=rep("0",nrow(psid))
values2[which(ind>0)]=substr(columns[unlist(ind[which(ind>0)])], 7, 8)
values2
 [1] "1" "2" "2" "3" "4" "0" "0" "0" "1" "0" "3" "1" "3" "2" "3"
 [16] "0" "1" "3" "1" "1" "2" "2" "2" "1" "2" "3" "1" "2" "0" "1"

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM