简体   繁体   中英

Time Series Analysis function in r - how long an object was in range

do you know of a Time Series Analysis function in r for a data frame:

dput(h)

structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6986L, 6986L, 6986L, 6986L, 
6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 
6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 6986L, 
6986L, 6986L, 6986L, 627L, 627L, 627L, 627L, 627L, 627L, 627L, 
627L, 627L, 627L, 627L, 627L, 627L, 627L, 627L, 627L, 627L, 627L, 
627L, 627L, 627L, 627L, 627L, 6271L, 6271L, 6271L, 6271L, 6271L, 
6271L, 6271L, 6271L, 6271L, 6271L, 6271L, 6271L, 6271L, 6271L, 
6271L, 6271L, 6271L, 6271L, 6271L), value = c(134, 60, 63, 69, 
63, 66, 58, 63, 60, 65, 65, 48, 56, 50, 60, 60, 58, 60, 68, 58, 
60, 75, 64, 71, 73, 71, 67, 68, 66, 67, 63, 62, 68, 72, 74, 79, 
69, 76, 70, 72, 72, 60, 66, 67, 99, 107, 104, 106, 100, 91, 90, 
94, 95, 93, 108, 87, 93, 90, 100, 100, 104, 92, 102, 97, 93, 
84, 55, 86, 86, 80, 95, 98, 82, 85, 91, 83, 92, 86, 90, 93, 97, 
103, 94, 103, 99, 113), Time = structure(c(1273691520, 1273695180, 
1273698780, 1273702320, 1273705980, 1273709580, 1273713180, 1273716780, 
1273720380, 1273723980, 1273727580, 1273731180, 1273734780, 1273744080, 
1273745580, 1273749180, 1273752780, 1273756380, 1273759980, 1154541540, 
1154542260, 1154545860, 1154549460, 1154553060, 1154556000, 1154560260, 
1154563860, 1154567460, 1154571060, 1154574660, 1154578260, 1154581860, 
1154585460, 1154589060, 1154592660, 1154596260, 1154599860, 1154603460, 
1154607060, 1154610660, 1154614260, 1154617860, 1154621460, 1154625060, 
1189450860, 1189454520, 1189458060, 1189461660, 1189465260, 1189468860, 
1189472460, 1189476120, 1189479720, 1189483320, 1189486860, 1189490460, 
1189494060, 1189497720, 1189501260, 1189504860, 1189508520, 1189512120, 
1189515720, 1189519320, 1189522920, 1189526520, 1189530060, 1105998780, 
1105999440, 1106003040, 1106006700, 1106010060, 1106013840, 1106017440, 
1106021040, 1106024640, 1106028240, 1106031900, 1106035500, 1106039100, 
1106042700, 1106046300, 1106049900, 1106057100, 1106060700, 1106064300
), tzone = "UTC", class = c("POSIXct", "POSIXt"))), .Names = c("id", 
"value", "Time"), row.names = c(NA, -86L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0000000000200788>)
> 

my code:

setDT(h)[(value)>=55 & (value) <=85, Time[.N]- Time[1L], id] 

the reply is:

     id         V1
1:    1 18.0 hours
2: 6986 23.2 hours
3:  627 59.0 hours
4: 6271  6.0 hours

or:

setDT(h)[(value)>=55 & (value) <=85,.N, id]

     id  N
1:    1 16
2: 6986 25
3:  627  2
4: 6271  4

but for id 1 is 16, and for id 627 its one value so 0, for id 6986 its 23.2, for id 6271 its 1. (for every id the time series is a day so it supposed to be between 0-24).

what is the problem?

We can use data.table

library(data.table)
setDT(list)[value>55 & value <85, .N, id]

Or if it is the difference in 'Time'

setDT(list)[value>55 & value <85, Time[.N] - Time[1L], id]

Based on the updated 'p' dataset in the OP's post

p[value>55 & value < 85, Time[.N]- Time[1], by = id]
#     id         V1
#1: 6986 23.2 hours

Update

The reason is that there are 'id's where the 'Time' difference is less than an hour, and the - coerces it to 'hour' unit without converting the value. One option is difftime and specify the unit of our preference.

setDT(h)[value>=55 & value <=85, difftime(Time[.N], Time[1L], unit = "hour"), id] 
#     id               V1
#1:    1 18.0000000 hours
#2: 6986 23.2000000 hours
#3:  627  0.9833333 hours
#4: 6271  6.0000000 hours

If we need to convert to numeric , wrap it with as.numeric

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM