简体   繁体   中英

R conditional max function with NA values

I would like to write an R code to loop through a dataframe, populating a new column "Winner" based on the maximum value from 5 columns in a row. The code runs successfully when all NA values are changed to 0s, but not with the NAs. I tried na.rm = TRUE but to no avail.

  Can1 <- df[row, "Candidate1_percent"]
  Can2  <- df[row, "Candidate2_percent"]
  Can3 <- df[row, "Candidate3_percent"]
  Can4 <- df[row, "Candidate4_percent"]
  Can5  <- df[row, "Candidate5_percent"]

  if(max(Can1,Can2,Can3,Can4,Can5) == Can1) {
    df[row, "Winner"] = df[row,"Candidate1_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can2) {
    df[row, "Winner"] = df[row,"Candidate2_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can3) {
    df[row, "Winner"] = df[row,"Candidate3_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can4) {
    df[row, "Winner"] = df[row,"Candidate4_name"]
  } else if(max(Can1,Can2,Can3,Can4,Can5) == Can5) {
    df[row, "Winner"] = df[row,"Candidate5_name"]
  }
}

Can anyone help?

Hard to reproduce this without more information. However, does the following work for you?

##Example data
test = data.frame(Candidate1_percent = c(10,20,30,NA,50),
                  Candidate1_name = rep("One",5),
                  Candidate2_percent = c(5,50,NA,10,30),
                  Candidate2_name = rep("Two",5),
                  Candidate3_percent = c(40,40,NA,30,25),
                  Candidate3_name  = rep("Three",5),
                  Candidate4_percent = c(90,10,1,10,80),
                  Candidate4_name = rep("Four",5),
                  Candidate5_percent = c(44,13,82,27,12),
                  Candidate5_name = rep("Five",5)
                  )

##Get the maximum value for each row of interest
pccols = which(grepl("percent",names(test))) ## the cols with the percent
maximum = apply(test[,pccols], MARGIN=1, max, na.rm=T) ## get the maximum value

## find who was the maximum and add name
test$Winner = NA ## prefill the column of winners
for(r in seq(nrow(test)) ){ ## loop through
    test$Winner[r] = test[r,which(test[r,pccols]==maximum[r])*2]
}

test

If you must keep it in the loop you present, then you need to check for the NA to avoid the error:

df = data.frame(Candidate1_percent = c(10,20,30,NA,50,NA),
                  Candidate1_name = rep("One",6),
                  Candidate2_percent = c(5,50,NA,10,30,NA),
                  Candidate2_name = rep("Two",6),
                  Candidate3_percent = c(40,40,NA,30,25,NA),
                  Candidate3_name  = rep("Three",6),
                  Candidate4_percent = c(90,10,1,10,80,NA),
                  Candidate4_name = rep("Four",6),
                  Candidate5_percent = c(44,13,82,27,12,NA),
                  Candidate5_name = rep("Five",6)
)
for(row in seq(nrow(df)) ){
  Can1 <- df[row, "Candidate1_percent"]
  Can2  <- df[row, "Candidate2_percent"]
  Can3 <- df[row, "Candidate3_percent"]
  Can4 <- df[row, "Candidate4_percent"]
  Can5  <- df[row, "Candidate5_percent"]
  
  tempMax = max(Can1,Can2,Can3,Can4,Can5,na.rm=T)
  if(!is.na(Can1) & tempMax == Can1) {
    df[row, "Winner"] = df[row,"Candidate1_name"]
  } else if(!is.na(Can2) & tempMax == Can2) {
    df[row, "Winner"] = df[row,"Candidate2_name"]
  } else if(!is.na(Can3) & tempMax == Can3) {
    df[row, "Winner"] = df[row,"Candidate3_name"]
  } else if(!is.na(Can4) & tempMax == Can4) {
    df[row, "Winner"] = df[row,"Candidate4_name"]
  } else if(!is.na(Can5) & tempMax == Can5) {
    df[row, "Winner"] = df[row,"Candidate5_name"]
  }
} 
df

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM