繁体   English   中英

后续行两列之间的因子变量的时差

[英]Time Difference for a factor variable between two columns on subsequent rows

我有一个包含Entry TimeExit time的数据框。 我正在尝试获得一列Stay Duration ,该Stay Duration是退出时间与下一行进入时间之间的差。 数据框按日期和输入时间排序。

对于第一行,停留时间为:6: 6:54:50 - 7:34:36 = 0:39:46 : 6:54:50 - 7:34:36 = 0:39:46 : 6:54:50 - 7:34:36 = 0:39:46

我遇到的问题是针对以下情况:当Exit_Time is 22:15:05且下一行Entry_Time has 6:02:46 如果是这种情况,我需要Stay Duration to be 7:47:41

dput(df)
structure(list(JRNY_ID_NUM = c(115492027250, 115492027250, 115523231209, 
115523231209, 115526742250, 115526742250, 115509240124, 115509240124, 
115539253765, 115539253765, 115570245886, 115567046025, 115562452408, 
115562452408, 115574565032, 115574565032), BIZ_DT = structure(c(1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("2017-01-01", 
"2017-01-02", "2017-01-03", "2017-01-04", "2017-01-05", "2017-01-06", 
"2017-01-07", "2017-01-09", "2017-01-10", "2017-01-11", "2017-01-12", 
"2017-01-13", "2017-01-14", "2017-01-15", "2017-01-16", "2017-01-17", 
"2017-01-18", "2017-01-19", "2017-01-20", "2017-01-21", "2017-01-22", 
"2017-01-23", "2017-01-24", "2017-01-25", "2017-01-26", "2017-01-27", 
"2017-01-31"), class = "factor"), ENTRY_TM = structure(c(37L, 
41L, 45L, 46L, 8L, 25L, 52L, 73L, 5L, 15L, 56L, 89L, 29L, 33L, 
63L, 77L), .Label = c("05:30:39", "05:32:07", "05:32:33", "05:32:38", 
"05:32:50", "05:32:59", "05:33:06", "05:37:14", "05:37:58", "05:38:34", 
"05:38:38", "05:40:22", "05:40:49", "05:41:16", "05:42:27", "05:47:17", 
"05:48:03", "05:48:13", "05:48:54", "05:49:15", "05:50:17", "05:51:42", 
"05:52:30", "05:53:20", "05:54:40", "05:56:24", "05:57:59", "06:00:11", 
"06:02:46", "06:03:28", "06:05:44", "06:32:18", "06:40:32", "06:40:40", 
"06:42:35", "06:45:51", "06:45:55", "06:52:49", "06:57:25", "07:03:49", 
"07:34:36", "08:26:43", "09:16:34", "10:16:10", "12:21:51", "13:36:40", 
"15:29:30", "16:07:03", "16:10:49", "16:13:51", "16:15:04", "16:29:20", 
"16:47:49", "16:48:42", "16:55:50", "16:56:27", "16:58:53", "17:01:02", 
"17:03:31", "17:06:19", "17:09:03", "17:11:22", "17:12:15", "17:12:57", 
"17:15:11", "17:16:56", "17:21:07", "17:22:18", "17:22:22", "17:23:53", 
"17:28:37", "17:30:17", "17:30:24", "17:31:21", "17:32:22", "17:59:07", 
"18:16:25", "18:17:13", "18:23:36", "18:27:40", "18:44:43", "18:46:36", 
"18:53:21", "20:55:32", "21:06:00", "21:07:08", "21:18:10", "21:18:21", 
"21:42:25", "21:43:45", "23:31:38"), class = "factor"), EXIT_TM = structure(c(34L, 
37L, 45L, 46L, 9L, 27L, 54L, 60L, 7L, 20L, 71L, 88L, 25L, 40L, 
68L, 72L), .Label = c("?", "05:37:56", "05:39:50", "05:39:51", 
"05:39:53", "05:40:03", "05:40:51", "05:41:01", "05:43:02", "05:44:51", 
"05:45:04", "05:45:45", "05:56:35", "05:57:45", "05:58:58", "06:03:06", 
"06:21:39", "06:23:35", "06:24:30", "06:24:58", "06:28:36", "06:29:17", 
"06:29:25", "06:32:11", "06:32:15", "06:34:01", "06:36:28", "06:41:02", 
"06:41:05", "06:43:31", "06:44:44", "06:51:05", "06:51:46", "06:54:50", 
"07:07:55", "07:39:39", "07:43:21", "07:48:49", "08:05:22", "08:17:58", 
"08:18:15", "08:32:12", "09:34:23", "10:31:51", "13:17:38", "13:46:19", 
"16:08:29", "16:26:21", "16:26:50", "16:37:08", "17:09:13", "17:25:49", 
"17:26:31", "17:26:50", "17:27:35", "17:28:20", "17:31:16", "17:34:43", 
"17:35:16", "17:36:37", "17:42:09", "17:42:23", "18:00:27", "18:06:53", 
"18:08:38", "18:09:18", "18:13:02", "18:14:35", "18:20:15", "18:22:06", 
"18:23:17", "18:25:18", "18:25:30", "18:28:47", "18:30:11", "18:30:54", 
"18:33:31", "18:38:49", "18:41:19", "18:52:25", "19:05:37", "19:27:49", 
"21:06:21", "21:41:28", "21:47:13", "21:53:35", "21:54:29", "22:15:05", 
"22:25:41", "23:59:35"), class = "factor")), .Names = c("JRNY_ID_NUM", 
"BIZ_DT", "ENTRY_TM", "EXIT_TM"), row.names = c(160L, 73L, 51L, 
145L, 111L, 56L, 119L, 157L, 168L, 131L, 81L, 78L, 135L, 35L, 
165L, 25L), class = "data.frame")

我想得到的输出是:

JRNY_ID_NUM     BIZ_DT      ENTRY_TM    EXIT_TM     Stay_Duration
115492027250    1/1/2017    6:45:55     6:54:50     0:39:46
115492027250    1/1/2017    7:34:36     7:43:21     4:38:30
115523231209    1/1/2017    12:21:51    13:17:38    0:19:02
115523231209    1/1/2017    13:36:40    13:46:19    15:50:55
115526742250    1/2/2017    5:37:14     5:43:02     0:11:38
115526742250    1/2/2017    5:54:40     6:36:28     9:52:52
115509240124    1/2/2017    16:29:20    17:26:50    0:03:34
115509240124    1/2/2017    17:30:24    17:36:37    11:56:13
115539253765    1/3/2017    5:32:50     5:40:51     0:01:36
115539253765    1/3/2017    5:42:27     6:24:58     10:31:29
115570245886    1/3/2017    16:56:27    18:23:17    3:19:08
115567046025    1/3/2017    21:42:25    22:15:05    7:47:41
115562452408    1/4/2017    6:02:46     6:32:15     0:08:17
115562452408    1/4/2017    6:40:32     8:17:58     8:54:17
115574565032    1/4/2017    17:12:15    18:14:35    0:01:50
115574565032    1/4/2017    18:16:25    18:25:18    

更新

尝试:

require(dplyr)

diff_to_hms <- function(x) {
    y <- abs(x)
    sprintf("%s:%02d:%02d:%02d", 
      ifelse(x < 0, "-", ""), 
      y %% 86400 %/% 3600,  
      y %% 3600 %/% 60,  
      y %% 60 %/% 1) 
}

dat %>% mutate(ENTRY_TM = as.POSIXct(strptime(paste(BIZ_DT,ENTRY_TM),format = "%Y-%m-%d %H:%M:%S")),
            EXIT_TM = as.POSIXct(strptime(paste(BIZ_DT,EXIT_TM),format = "%Y-%m-%d %H:%M:%S"))) %>%
            mutate(Stay_Duration = as.numeric(difftime(EXIT_TM,lead(ENTRY_TM),unit="secs")*-1),
                Stay_Duration2 = diff_to_hms(Stay_Duration))

尝试这个:

require(data.table)
require(Hmisc)
setDT(df)
setnames(df, Cs(ID, da, en, ex))
df[, en1 := as.POSIXct(paste(da, en))]
df[, ex1 := as.POSIXct(paste(da, ex))]
df[, s := Lag(en1, -1)]
df[, Stay_Duration := s - ex1]
df[, Cs(en1,ex1,s) := NULL]
df
# ID         da       en       ex   Stay_Duration
# 1: 115492027250 2017-01-01 06:45:55 06:54:50  39.766667 mins
# 2: 115492027250 2017-01-01 07:34:36 07:43:21 278.500000 mins
# 3: 115523231209 2017-01-01 12:21:51 13:17:38  19.033333 mins
# 4: 115523231209 2017-01-01 13:36:40 13:46:19 950.916667 mins
# 5: 115526742250 2017-01-02 05:37:14 05:43:02  11.633333 mins
# 6: 115526742250 2017-01-02 05:54:40 06:36:28 592.866667 mins
# 7: 115509240124 2017-01-02 16:29:20 17:26:50   3.566667 mins
# 8: 115509240124 2017-01-02 17:30:24 17:36:37 716.216667 mins
# 9: 115539253765 2017-01-03 05:32:50 05:40:51   1.600000 mins
# 10: 115539253765 2017-01-03 05:42:27 06:24:58 631.483333 mins
# 11: 115570245886 2017-01-03 16:56:27 18:23:17 199.133333 mins
# 12: 115567046025 2017-01-03 21:42:25 22:15:05 467.683333 mins
# 13: 115562452408 2017-01-04 06:02:46 06:32:15   8.283333 mins
# 14: 115562452408 2017-01-04 06:40:32 08:17:58 534.283333 mins
# 15: 115574565032 2017-01-04 17:12:15 18:14:35   1.833333 mins
# 16: 115574565032 2017-01-04 18:16:25 18:25:18         NA mins

格式:

df[, stay2 := strftime(
  as.POSIXct(
    as.numeric(Stay_Duration)*60,
    origin = as.POSIXct("00:00:00", format = "%H:%M:%S", tz = "GMT"),
    tz = "GMT"),
  format = "%H:%M:%S", tz = "GMT")]
df
#              ID         da       en       ex   Stay_Duration    stay2
# 1: 115492027250 2017-01-01 06:45:55 06:54:50  39.766667 mins 00:39:46
# 2: 115492027250 2017-01-01 07:34:36 07:43:21 278.500000 mins 04:38:30
# 3: 115523231209 2017-01-01 12:21:51 13:17:38  19.033333 mins 00:19:02
# 4: 115523231209 2017-01-01 13:36:40 13:46:19 950.916667 mins 15:50:55
# 5: 115526742250 2017-01-02 05:37:14 05:43:02  11.633333 mins 00:11:38
# 6: 115526742250 2017-01-02 05:54:40 06:36:28 592.866667 mins 09:52:52
# 7: 115509240124 2017-01-02 16:29:20 17:26:50   3.566667 mins 00:03:34
# 8: 115509240124 2017-01-02 17:30:24 17:36:37 716.216667 mins 11:56:13
# 9: 115539253765 2017-01-03 05:32:50 05:40:51   1.600000 mins 00:01:36
# 10: 115539253765 2017-01-03 05:42:27 06:24:58 631.483333 mins 10:31:29
# 11: 115570245886 2017-01-03 16:56:27 18:23:17 199.133333 mins 03:19:08
# 12: 115567046025 2017-01-03 21:42:25 22:15:05 467.683333 mins 07:47:41
# 13: 115562452408 2017-01-04 06:02:46 06:32:15   8.283333 mins 00:08:17
# 14: 115562452408 2017-01-04 06:40:32 08:17:58 534.283333 mins 08:54:17
# 15: 115574565032 2017-01-04 17:12:15 18:14:35   1.833333 mins 00:01:50
# 16: 115574565032 2017-01-04 18:16:25 18:25:18         NA mins       NA

您需要使时间知道日期,并移动一列以匹配相应的条目。 例如:

library(dplyr) # Needed for lead function

# Consider 'x' as your DF

x$entDate <- as.POSIXct(paste(x$BIZ_DT, x$ENTRY_TM)) # make entry date-aware
x$extDate <- as.POSIXct(paste(x$BIZ_DT, x$EXIT_TM)) # make exit date-aware

x$Stay_Duration <- as.numeric(lead(x$entDate, 1) - x$extDate, units = "secs") # Timedelta in seconds, could also be 'mins' etc.

#     JRNY_ID_NUM  BIZ_DT     ENTRY_TM EXIT_TM  entDate             extDate                     Stay_Duration
# 160 115492027250 2017-01-01 06:45:55 06:54:50 2017-01-01 06:45:55 2017-01-01 06:54:50          2386
# 73  115492027250 2017-01-01 07:34:36 07:43:21 2017-01-01 07:34:36 2017-01-01 07:43:21         16710
# 51  115523231209 2017-01-01 12:21:51 13:17:38 2017-01-01 12:21:51 2017-01-01 13:17:38          1142
# 145 115523231209 2017-01-01 13:36:40 13:46:19 2017-01-01 13:36:40 2017-01-01 13:46:19         57055
# 111 115526742250 2017-01-02 05:37:14 05:43:02 2017-01-02 05:37:14 2017-01-02 05:43:02           698
# 56  115526742250 2017-01-02 05:54:40 06:36:28 2017-01-02 05:54:40 2017-01-02 06:36:28         35572
# 119 115509240124 2017-01-02 16:29:20 17:26:50 2017-01-02 16:29:20 2017-01-02 17:26:50           214
# 157 115509240124 2017-01-02 17:30:24 17:36:37 2017-01-02 17:30:24 2017-01-02 17:36:37         42973
# 168 115539253765 2017-01-03 05:32:50 05:40:51 2017-01-03 05:32:50 2017-01-03 05:40:51            96
# 131 115539253765 2017-01-03 05:42:27 06:24:58 2017-01-03 05:42:27 2017-01-03 06:24:58         37889
# 81  115570245886 2017-01-03 16:56:27 18:23:17 2017-01-03 16:56:27 2017-01-03 18:23:17         11948
# 78  115567046025 2017-01-03 21:42:25 22:15:05 2017-01-03 21:42:25 2017-01-03 22:15:05         28061
# 135 115562452408 2017-01-04 06:02:46 06:32:15 2017-01-04 06:02:46 2017-01-04 06:32:15           497
# 35  115562452408 2017-01-04 06:40:32 08:17:58 2017-01-04 06:40:32 2017-01-04 08:17:58         32057
# 165 115574565032 2017-01-04 17:12:15 18:14:35 2017-01-04 17:12:15 2017-01-04 18:14:35           110
# 25  115574565032 2017-01-04 18:16:25 18:25:18 2017-01-04 18:16:25 2017-01-04 18:25:18            NA

但请注意,此解决方案不适用于同一行中的出入确实发生在不同日期的情况(因此,我的评论)。

这是幼稚的基于R的可能性( naive ,因为它不知道日期,只是使用时间):

res <- df
res$Stay_Duration <-
  as.difftime(c(as.integer(difftime( 
    strptime(tail(res$ENTRY_TM, -1), "%H:%M:%S"),
    strptime(head(res$EXIT_TM, -1), "%H:%M:%S"), 
    units = "secs"
  )) %% (24*3600), NA), units = "secs")

#      JRNY_ID_NUM     BIZ_DT ENTRY_TM  EXIT_TM Stay_Duration
# 160 115492027250 2017-01-01 06:45:55 06:54:50     2386 secs
# 73  115492027250 2017-01-01 07:34:36 07:43:21    16710 secs
# 51  115523231209 2017-01-01 12:21:51 13:17:38     1142 secs
# 145 115523231209 2017-01-01 13:36:40 13:46:19    57055 secs
# 111 115526742250 2017-01-02 05:37:14 05:43:02      698 secs
# 56  115526742250 2017-01-02 05:54:40 06:36:28    35572 secs
# 119 115509240124 2017-01-02 16:29:20 17:26:50      214 secs
# 157 115509240124 2017-01-02 17:30:24 17:36:37    42973 secs
# 168 115539253765 2017-01-03 05:32:50 05:40:51       96 secs
# 131 115539253765 2017-01-03 05:42:27 06:24:58    37889 secs
# 81  115570245886 2017-01-03 16:56:27 18:23:17    11948 secs
# 78  115567046025 2017-01-03 21:42:25 22:15:05    28061 secs
# 135 115562452408 2017-01-04 06:02:46 06:32:15      497 secs
# 35  115562452408 2017-01-04 06:40:32 08:17:58    32057 secs
# 165 115574565032 2017-01-04 17:12:15 18:14:35      110 secs
# 25  115574565032 2017-01-04 18:16:25 18:25:18       NA secs

它只是比较时间,并进行模运算。

正如其他人已经说过的那样,如果出现日期间隔,您可能会遇到不同类型的问题。

与所需输出的一致性检查:

desired_output <-
  read.table(text =
"JRNY_ID_NUM     BIZ_DT      ENTRY_TM    EXIT_TM     Stay_Duration
115492027250    1/1/2017    6:45:55     6:54:50     0:39:46
115492027250    1/1/2017    7:34:36     7:43:21     4:38:30
115523231209    1/1/2017    12:21:51    13:17:38    0:19:02
115523231209    1/1/2017    13:36:40    13:46:19    15:50:55
115526742250    1/2/2017    5:37:14     5:43:02     0:11:38
115526742250    1/2/2017    5:54:40     6:36:28     9:52:52
115509240124    1/2/2017    16:29:20    17:26:50    0:03:34
115509240124    1/2/2017    17:30:24    17:36:37    11:56:13
115539253765    1/3/2017    5:32:50     5:40:51     0:01:36
115539253765    1/3/2017    5:42:27     6:24:58     10:31:29
115570245886    1/3/2017    16:56:27    18:23:17    3:19:08
115567046025    1/3/2017    21:42:25    22:15:05    7:47:41
115562452408    1/4/2017    6:02:46     6:32:15     0:08:17
115562452408    1/4/2017    6:40:32     8:17:58     8:54:17
115574565032    1/4/2017    17:12:15    18:14:35    0:01:50
115574565032    1/4/2017    18:16:25    18:25:18    place_holder",
  stringsAsFactors = FALSE, header = TRUE)

all.equal(
  as.difftime(desired_output$Stay_Duration, "%H:%M:%S", "secs"),
  res$Stay_Duration
)
# [1] TRUE

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM