简体   繁体   中英

How can i split one row of a time period into multiple rows based on hourly base on date and time format, In R

How to do below task in R?

df <- tribble(
    ~ID,         ~StartTime,              ~EndTime    
    , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
    , 02, "2018-05-14 21:30:00", "2018-05-15 02:00:00"
    , 03, "2018-05-15 07:00:00", "2018-05-16 22:30:00"
    , 04, "2018-05-16 23:00:00", "2018-05-16 23:40:00"
    , 05, "2018-05-17 01:00:00", "2018-05-19 15:00:00"
)

df$StartDate <- as.POSIXlt(df$StartDate, tryFormats = "%Y-%m-%d %H:%M:%S")
df$EndDate   <- as.POSIXlt(df$EndDate, tryFormats = "%Y-%m-%d %H:%M:%S")

Note: Multiple duplicate rows needs to be created from single row,

For example Original Single row:

01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"

Post processing Multiple row:

01, "2018-05-14 09:30:00", "2018-05-14 10:00:00"
01, "2018-05-14 10:00:00", "2018-05-14 11:00:00"
01, "2018-05-14 11:00:00", "2018-05-14 12:00:00"
01, "2018-05-14 12:00:00", "2018-05-14 12:10:00"

Hoping my request is clear.

We can write a function which generates an hourly sequence between two timestamps. Using map2 we call that function for every pair of StartTime and EndTime and expand the dataframe.

library(dplyr)
library(lubridate)

generate_hourly_time <- function(x, y) {
   EndTime <- ceiling_date(x, 'hour')
   EndTime2 <- seq(EndTime, floor_date(y, 'hour'), 'hour')
   tibble(StartTime = c(x, EndTime2), EndTime = c(EndTime2, y))
}


df %>%
  mutate(across(-1, ymd_hms)) %>%
  #For dplyr < 1.0.0
  #mutate_at(-1, ymd_hms) %>%
  mutate(time = purrr::map2(StartTime, EndTime, generate_hourly_time)) %>%
  select(ID, time) %>%
  tidyr::unnest(time)


# A tibble: 117 x 3
#      ID StartTime           EndTime           
#   <dbl> <dttm>              <dttm>             
# 1     1 2018-05-14 09:30:00 2018-05-14 10:00:00
# 2     1 2018-05-14 10:00:00 2018-05-14 11:00:00
# 3     1 2018-05-14 11:00:00 2018-05-14 12:00:00
# 4     1 2018-05-14 12:00:00 2018-05-14 12:10:00
# 5     2 2018-05-14 21:30:00 2018-05-14 22:00:00
# 6     2 2018-05-14 22:00:00 2018-05-14 23:00:00
# 7     2 2018-05-14 23:00:00 2018-05-15 00:00:00
# 8     2 2018-05-15 00:00:00 2018-05-15 01:00:00
# 9     2 2018-05-15 01:00:00 2018-05-15 02:00:00
#10     2 2018-05-15 02:00:00 2018-05-15 02:00:00
# … with 107 more rows

I hope it's useful:

df <- tribble(
  ~ID,         ~StartTime,              ~EndTime    
  , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
  , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
  , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
  , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
  , 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
  , 02, "2018-05-14 21:30:00", "2018-05-15 02:00:00"
  , 03, "2018-05-15 07:00:00", "2018-05-16 22:30:00"
  , 04, "2018-05-16 23:00:00", "2018-05-16 23:40:00"
  , 05, "2018-05-17 01:00:00", "2018-05-19 15:00:00"
)

nrow(df)

id.unique <- unique(df[,'ID'])
id.unique.numeric <- as.numeric(unlist(id.unique))

id.i <- id.unique.numeric

for (i in id.i) {
  out.pre <- subset(df, ID==i)
  name.out <- paste('df', i, '<-out.pre', sep = '')
  eval(parse(text=name.out))
}

df1

FM

You could also do:

library(tidyverse)
df %>%
  pivot_longer(-ID)%>%
  group_by(ID)%>%
  mutate(start = list(unique(c(value[1],seq(strptime(value[1],"%F %H"),
                                     value[2],"1 hour")[-1],value[2]))),
         name = NULL, value = NULL)%>%
  slice(1)%>%
  unnest(start)%>%
  mutate(end = lead(start,1,last(start)))
# A tibble: 117 x 3
# Groups:   ID [5]
      ID start               end                
   <dbl> <dttm>              <dttm>             
 1     1 2018-05-14 09:30:00 2018-05-14 10:00:00
 2     1 2018-05-14 10:00:00 2018-05-14 11:00:00
 3     1 2018-05-14 11:00:00 2018-05-14 12:00:00
 4     1 2018-05-14 12:00:00 2018-05-14 12:10:00
 5     1 2018-05-14 12:10:00 2018-05-14 12:10:00
 6     2 2018-05-14 21:30:00 2018-05-14 22:00:00
 7     2 2018-05-14 22:00:00 2018-05-14 23:00:00
 8     2 2018-05-14 23:00:00 2018-05-15 00:00:00
 9     2 2018-05-15 00:00:00 2018-05-15 01:00:00
10     2 2018-05-15 01:00:00 2018-05-15 02:00:00
# ... with 107 more rows

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM