简体   繁体   中英

Mutate using ifelse with a condition in R

I want to create data frame for temperature

Time frame : 2020-01-01 ~ 2020-12-31
Temperature :
2020-01-01 ~ 2020-02-29 and 2020-10-01 ~ 2020-12-31 : random integer between 10 ~ 24
2020-03-01 ~ 2020-05-31 : random integer between 5 ~ 17
2020-06-01 ~ 2020-09-31 : random integer between 1 ~ 8

my code is

library(tidyverse)
library(lubridate)
library(summarytools)
library(dplyr)

start <- as.POSIXct("2020-01-01", "%Y-%m-%d", tz = "UTC")
end   <- as.POSIXct("2020-12-31", "%Y-%m-%d", tz = "UTC")

remT <- seq(start, end, by = "1 day") 
date_df <- as.data.frame(remT)
date_df <- setNames(date_df, c("Date")) 
date_df <- date_df %>% arrange(Date)

cond_1 <- for(i in date_df$Date){(i >= as.Date('2020-01-01') && i <= as.Date('2020-02-29')) || (i >= as.Date('2020-10-01') && i <= as.Date('2020-12-31'))}
cond_2 <- for(j in date_df$Date)(j >= as.Date('2020-06-01') && j <= as.Date('2020-09-30'))

x <- sample(10:24, 152, replace=TRUE)
y <- sample(1:8, 122, replace=TRUE)
z <- sample(5:17, 92, replace=TRUE)


date_df <- date_df %>%
  mutate(Test = 
           ifelse(cond_1, x,
                     ifelse(cond_2, y , z)
                  )
         )

However, this code returned the error message unfortunately

Error: Problem with mutate() column Test . Test = ifelse(cond_1, x, ifelse(cond_2, y, z)) . Test must be size 366 or 1, not 0.

How to solve this error?

mutate needs either 1 or a complete set of values. case_when or ifelse or if_else will take care where to replace values according to given condition. So sample 366 values for all conditions.

Best way is to use n() in place of any value . Moreover, you may do this in one single pipe

library(tidyverse)
library(lubridate, warn.conflicts = F)

set.seed(123)
seq.Date(as.Date('2020-01-01'), as.Date('2020-12-31'), by = 1) %>%
  as.data.frame() %>% setNames('Date') %>%
  mutate(temp = case_when(month(Date) %in% c(1:2, 10:12)~ sample(10:24, n(), T),
                          month(Date) %in% c(3:5) ~ sample(5:17, n(), T),
                          TRUE ~ sample(1:8, n(), T))) -> df

head(df, 5)
#>         Date temp
#> 1 2020-01-01   24
#> 2 2020-01-02   24
#> 3 2020-01-03   12
#> 4 2020-01-04   23
#> 5 2020-01-05   12

tail(df, 5)
#>           Date temp
#> 362 2020-12-27   15
#> 363 2020-12-28   19
#> 364 2020-12-29   19
#> 365 2020-12-30   15
#> 366 2020-12-31   21

Created on 2021-06-17 by the reprex package (v2.0.0)

You can use sapply with switch and sample :

library(lubridate)
start <- as.POSIXct("2020-01-01", "%Y-%m-%d", tz = "UTC")
end   <- as.POSIXct("2020-12-31", "%Y-%m-%d", tz = "UTC")

remT <- seq(start, end, by = "1 day") 

set.seed(1)
temp <- sapply(as.character(month(remT)), function(m)
  switch(m,
         `1` = , `2` = , `10` = , `11` = , `12` = sample(10:24, 1),
         `3` = , `4` = , `5` = sample(5:17, 1),
         `6` = , `7` = , `8` = , `9` = sample(1:8, 1)))
  

sort(unique(temp[month(remT) %in% c(1:2, 10:12)]))
#R>  [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
sort(unique(temp[month(remT) %in% 3:5]))
#R>  [1]  5  6  7  8  9 10 11 12 13 14 15 16 17
sort(unique(temp[month(remT) %in% 6:9]))
#R> [1] 1 2 3 4 5 6 7 8

A slightly faster solution may be to use vapply , switch and sample.int :

set.seed(1)
temp <- vapply(as.character(month(remT)), function(m)
  switch(m,
         `1` = , `2` = , `10` = , `11` = , `12` = sample.int(15, 1) + 9L,
         `3` = , `4` = , `5` = sample.int(13, 1) + 4L,
         `6` = , `7` = , `8` = , `9` = sample.int(8, 1)), integer(1))
  

sort(unique(temp[month(remT) %in% c(1:2, 10:12)]))
#R>  [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
sort(unique(temp[month(remT) %in% 3:5]))
#R>  [1]  5  6  7  8  9 10 11 12 13 14 15 16 17
sort(unique(temp[month(remT) %in% 6:9]))
#R> [1] 1 2 3 4 5 6 7 8

Otherwise, ifelse needs an equal length of values as the first argument as AnilGoyal points out. Thus, the following does work:

set.seed(1)
temp <- ifelse(
  month(remT) %in% c(1:2, 10:12), sample(10:24, length(remT), TRUE),
  ifelse(month(remT) %in% 3:5, sample(5:17, length(remT), TRUE),
         sample(1:8, length(remT), TRUE)))

sort(unique(temp[month(remT) %in% c(1:2, 10:12)]))
#R>  [1] 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
sort(unique(temp[month(remT) %in% 3:5]))
#R>  [1]  5  6  7  8  9 10 11 12 13 14 15 16 17
sort(unique(temp[month(remT) %in% 6:9]))
#R> [1] 1 2 3 4 5 6 7 8

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM