record_id <- c(1,1,1,2,3,4,4,5,6,7,8,8,9,10,10,10)
visit_date <- c("2018-09-24", "2018-12-05", "2019-03-01", "2018-10-03", "2018-10-01", "2018-10-05", "NA", "2018-08-25", "2018-09-19", "2018-10-01", "2018-09-27", "2021-09-07",
"2018-10-03", "2018-10-08", "2019-03-22", "2019-07-12")
repeat_instance <- c(0,1,2,0,0,0,1,0,0,0,0,1,0,0,1,2)
Time_Since_Appointment <- c("NA", "72d 1H 0M 0S", "86d 0H 0M 0S", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "1076d 0H 0M 0S", "NA", "NA", "165d 0H 0M 0S", "112d 0H 0M 0S")
data1 <- data %>%
group_by(record_id) %>%
mutate(Time_Since_Appointment = Visit_Date - lag(Visit_Date))
data1$Time_Since_Appointment <- seconds_to_period(data1$Time_Since_Appointment)
test1 <- test %>%
mutate(Retention =
case_when(Time_Since_Appointment <= 90 ~ "Retained within 3 months",
Time_Since_Appointment > 91 & Time_Since_Appointment <= 180 ~ "Retained within 6 months",
Time_Since_Appointment > 180 ~ "Not Retained"))
I am trying to create a variable that creates a category based on the time since the previous appointment and if there was no follow up appointment it calculates the time since the first appointment and today.
These calculated times will then be used to create 3 categories: Retained within 3 months (<90 days), Retained within 6 months (90 - 180 days), and Not retained (>180 days).
I have included the code I have used so far with some success up until the point in which I used dplyr and mutate to try and create a new variable called Retention.
The problem appears to be that you are assuming a period
object is comparable with the number of days, but in fact it stores the number of seconds , as you can confirm by doing
period(1, "day") > 1000
#> [1] TRUE
as.numeric(period(1, "day"))
#> [1] 86400
So you need to divide the number of seconds by 86400 to get the number of days. I would also tend to use cut
rather then case_when
for dealing with numeric data:
library(dplyr)
library(lubridate)
data %>%
group_by(record_id) %>%
mutate(Time_Since_Appointment = visit_date - lag(visit_date),
Time_Since_Appointment = seconds_to_period(Time_Since_Appointment),
visit_date = as.Date(visit_date),
Retention = cut(as.numeric(Time_Since_Appointment) / 86400,
breaks = c(0, 90, 180, Inf),
labels = c("Retained within 3 months",
"Retained within 6 months",
"Not retained")))
#> # A tibble: 16 x 5
#> # Groups: record_id [10]
#> record_id visit_date repeat_instance Time_Since_Appointment Retention
#> <dbl> <date> <dbl> <Period> <fct>
#> 1 1 2018-09-24 0 NA NA
#> 2 1 2018-12-05 1 72d 0H 0M 0S Retained within 3 months
#> 3 1 2019-03-01 2 86d 0H 0M 0S Retained within 3 months
#> 4 2 2018-10-03 0 NA NA
#> 5 3 2018-10-01 0 NA NA
#> 6 4 2018-10-05 0 NA NA
#> 7 4 NA 1 NA NA
#> 8 5 2018-08-25 0 NA NA
#> 9 6 2018-09-19 0 NA NA
#> 10 7 2018-10-01 0 NA NA
#> 11 8 2018-09-27 0 NA NA
#> 12 8 2021-09-07 1 1076d 0H 0M 0S Not retained
#> 13 9 2018-10-03 0 NA NA
#> 14 10 2018-10-08 0 NA NA
#> 15 10 2019-03-22 1 165d 0H 0M 0S Retained within 6 months
#> 16 10 2019-07-12 2 112d 0H 0M 0S Retained within 6 months
Created on 2022-08-26 with reprex v2.0.2
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.