in R
I have a data-frame with 24400 observations (rows). In this data-frame I have a column df$hours containing hours index (as shown below). Basically each hour is repeated over around ~ 60 times before passing to the next hour until reaching the 23rd hour then it goes back again to "o" (example below
df$hours
1] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[12] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[23] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[34] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[45] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[56] "00" "00" "00" "00" "00" "01" "01" "01" "01" "01" "01"
[67] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[78] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[89] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[100] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[111] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "02"
[122] "02" "02" "02" "02" "02" "02" "02" "02" "02" "02" "02"
[133] "02" "02" "02" "02" "02" "02" "02" "02" "02" "02" "02"
[144] "02" "02" "02" "02" "02" "02" "02" "02" "02" "02" "02"
[155] "02" "02" "02" "02" "02" "02" "02" "02" "02" "02" "02"
[166] "02" "02" "02" "02" "02" "02" "02" "02" "02" "02" "02"
[177] "02" "02" "02" "02" "03" "03" "03" "03" "03" "03" "03"
[188] "03" "03" "03" "03" "03" "03" "03" "03" "03" "03" "03"
[199] "03" "03" "03" "03" "03" "03" "03" "03" "03" "03" "03"
[210] "03" "03" "03" "03" "03" "03" "03" "03" "03" "03" "03"
[221] "03" "03" "03" "03" "03" "03" "03" "03" "03" "03" "03"
[232] "03" "03" "03" "03" "03" "03" "03" "03" "03" "04" "04"
[243] "04" "04" "04" "04" "04" "04" "04" "04" "04" "04" "04"
[254] "04" "04" "04" "04" "04" "04" "04" "04" "04" "04" "04"
[265] "04" "04" "04" "04" "04" "04" "04" "04" "04" "04" "04"
[276] "04" "04" "04" "04" "04" "04" "04" "04" "04" "04" "04"
[287] "04" "04" "04" "04" "04" "04" "04" "04" "04" "04" "04"
[298] "04" "04" "04" "05" "05" "05" "05" "05" "05" "05" "05"
[309] "05" "05" "05" "05" "05" "05" "05" "05" "05" "05" "05"
[320] "05" "05" "05" "05" "05" "05" "05" "05" "05" "05" "05"
[331] "05" "05" "05" "05" "05" "05" "05" "05" "05" "05" "05"
[342] "05" "05" "05" "05" "05" "05" "05" "05" "05" "05" "05"
[353] "05" "05" "05" "05" "05" "05" "05" "05" "06" "06" "06"
[364] "06" "06" "06" "06" "06" "06" "06" "06" "06" "06" "06"
[375] "06" "06" "06" "06" "06" "06" "06" "06" "06" "06" "06"
[386] "06" "06" "06" "06" "06" "06" "06" "06" "06" "06" "06"
[397] "06" "06" "06" "06" "06" "06" "06" "06" "06" "06" "06"
[408] "06" "06" "06" "06" "06" "06" "06" "06" "06" "06" "06"
[419] "06" "06" "07" "07" "07" "07" "07" "07" "07" "07" "07"
[430] "07" "07" "07" "07" "07" "07" "07" "07" "07" "07" "07"
[441] "07" "07" "07" "07" "07" "07" "07" "07" "07" "07" "07"
[452] "07" "07" "07" "07" "07" "07" "07" "07" "07" "07" "07"
[463] "07" "07" "07" "07" "07" "07" "07" "07" "07" "07" "07"
[474] "07" "07" "07" "07" "07" "07" "07" "08" "08" "08" "08"
[485] "08" "08" "08" "08" "08" "08" "08" "08" "08" "08" "08"
[496] "08" "08" "08" "08" "08" "08" "08" "08" "08" "08" "08"
[507] "08" "08" "08" "08" "08" "08" "08" "08" "08" "08" "08"
[518] "08" "08" "08" "08" "08" "08" "08" "08" "08" "08" "08"
[529] "08" "08" "08" "08" "08" "08" "08" "08" "08" "08" "08"
[540] "18" "18" "18" "18" "18" "18" "18" "18" "18" "18" "18"
[551] "18" "18" "18" "18" "18" "18" "18" "18" "18" "18" "18"
[562] "18" "18" "18" "18" "18" "18" "18" "18" "18" "18" "18"
[573] "18" "18" "18" "18" "18" "18" "18" "18" "18" "18" "18"
[584] "18" "18" "18" "18" "18" "18" "18" "18" "18" "18" "18"
[595] "18" "18" "18" "18" "18" "19" "19" "19" "19" "19" "19"
[606] "19" "19" "19" "19" "19" "19" "19" "19" "19" "19" "19"
[617] "19" "19" "19" "19" "19" "19" "19" "19" "19" "19" "19"
[628] "19" "19" "19" "19" "19" "19" "19" "19" "19" "19" "19"
[639] "19" "19" "19" "19" "19" "19" "19" "19" "19" "19" "19"
[650] "19" "19" "19" "19" "19" "19" "19" "19" "19" "19" "20"
[661] "20" "20" "20" "20" "20" "20" "20" "20" "20" "20" "20"
[672] "20" "20" "20" "20" "20" "20" "20" "20" "20" "20" "20"
[683] "20" "20" "20" "20" "20" "20" "20" "20" "20" "20" "20"
[694] "20" "20" "20" "20" "20" "20" "20" "20" "20" "20" "20"
[705] "20" "20" "20" "20" "20" "20" "20" "20" "20" "20" "20"
[716] "20" "20" "20" "20" "21" "21" "21" "21" "21" "21" "21"
[727] "21" "21" "21" "21" "21" "21" "21" "21" "21" "21" "21"
[738] "21" "21" "21" "21" "21" "21" "21" "21" "21" "21" "21"
[749] "21" "21" "21" "21" "21" "21" "21" "21" "21" "21" "21"
[760] "21" "21" "21" "21" "21" "21" "21" "21" "21" "21" "21"
[771] "21" "21" "21" "21" "21" "21" "21" "21" "21" "22" "22"
[782] "22" "22" "22" "22" "22" "22" "22" "22" "22" "22" "22"
[793] "22" "22" "22" "22" "22" "22" "22" "22" "22" "22" "22"
[804] "22" "22" "22" "22" "22" "22" "22" "22" "22" "22" "22"
[815] "22" "22" "22" "22" "22" "22" "22" "22" "22" "22" "22"
[826] "22" "22" "22" "22" "22" "22" "22" "22" "22" "22" "22"
[837] "22" "22" "22" "23" "23" "23" "23" "23" "23" "23" "23"
[848] "23" "23" "23" "23" "23" "23" "23" "23" "23" "23" "23"
[859] "23" "23" "23" "23" "23" "23" "23" "23" "23" "23" "23"
[870] "23" "23" "23" "23" "23" "23" "23" "23" "23" "23" "23"
[881] "23" "23" "23" "23" "23" "23" "23" "23" "23" "23" "23"
[892] "23" "23" "23" "23" "23" "23" "00" "00" "00" "00" "00"
[903] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[914] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[925] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[936] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[947] "00" "00" "00" "00" "00" "00" "00" "00" "00" "00" "00"
[958] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[969] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[980] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[991] "01" "01" "01" "01" "01" "01" "01" "01" "01" "01"
[ reached getOption("max.print") -- omitted 23459 entries ]
my aim is to generate another column df$minutes that shows the minutes within each hour. for example if we start following the order I gave it's gonna be
Column A | Column B |
---|---|
0 | 1 |
0 | 2 |
0 | 3 |
. | . |
. | . |
. | . |
1 | 61 |
1 | 62 |
. | . |
. | . |
. | . |
23 | 1380 |
0 | 1 |
0 | 1 |
0 | 2 |
0 | 3 |
. | . |
. | . |
. | . |
1 | 61 |
1 | 62 |
. | . |
. | . |
. | . |
23 | 1380 |
0 | 1 |
please not 2 things: 1- I don't have the same exact number of minutes in each hours (sometimes it is 60 sometimes it is less) 2- I don't have all hours of the day (there is a gap between 8-18)
I tried this but this is i think suggest that we have the same number of minutes for each hour (which is 60)
`timepf <- as.numeric(df$hours)
n <- length(timepf)
xtime <- numeric()
start <- 1
while (start <= n) {
end <- min(start + 59, n)
part <- timepf[start:end]
xtime_part <- part * 60 + (1:length(part))
xtime <- c(xtime, xtime_part)
start <- end + 1
}
xtime
length(xtime)
`
xtime is the minutes column we're looking to create.
I'll demonstrate using much smaller data, just 3 hours a day for only 4 minutes per hour.
df <- data.frame(hour = rep(sprintf("%02i", rep(0:2, each=4)), 3))
Note that my code will assume full 60 minutes, though, so it should work as-is for your data.
The first thing we need to do, though, is account for "day": each time hour
increments to "00"
from something other than "00"
, increment the day. After that, this is simply summarize by group .
library(dplyr)
df %>%
mutate(day = cumsum(hour == "00" & lag(hour != "00", default = FALSE))) %>%
group_by(day, hour) %>%
mutate(minute = as.integer(hour) * 60 + row_number()) %>%
ungroup()
# # A tibble: 36 × 3
# hour day minute
# <chr> <int> <dbl>
# 1 00 0 1
# 2 00 0 2
# 3 00 0 3
# 4 00 0 4
# 5 01 0 61
# 6 01 0 62
# 7 01 0 63
# 8 01 0 64
# 9 02 0 121
# 10 02 0 122
# # … with 26 more rows
# # ℹ Use `print(n = ...)` to see more rows
Take my word that on day = 1 (the second day) the minute
resets to 1
. (Or look in the base-R code below, it's the values.)
df$day <- cumsum(df$hour == "00" & c(FALSE, df$hour[-nrow(df)] != "00"))
df$day <- cumsum(df$hour == "00" & c(FALSE, df$hour[-nrow(df)] != "00"))
I'll note that if you think you may have a day restart lacking any of the "00"
hours, you can use an alternate method to determine day
by assuming that any decrease in hour
means a new day:
df$day2 <- cumsum(diff(as.integer(df$hour)) < 0)
From here, we'll use ave
:
df$minute <- ave(as.integer(df$hour), df[c("day","hour")], FUN = function(z) as.integer(z) * 60 + seq_along(z))
df
# hour day minute
# 1 00 0 1
# 2 00 0 2
# 3 00 0 3
# 4 00 0 4
# 5 01 0 61
# 6 01 0 62
# 7 01 0 63
# 8 01 0 64
# 9 02 0 121
# 10 02 0 122
# 11 02 0 123
# 12 02 0 124
# 13 00 1 1
# 14 00 1 2
# 15 00 1 3
# 16 00 1 4
# 17 01 1 61
# 18 01 1 62
# 19 01 1 63
# 20 01 1 64
# 21 02 1 121
# 22 02 1 122
# 23 02 1 123
# 24 02 1 124
# 25 00 2 1
# 26 00 2 2
# 27 00 2 3
# 28 00 2 4
# 29 01 2 61
# 30 01 2 62
# 31 01 2 63
# 32 01 2 64
# 33 02 2 121
# 34 02 2 122
# 35 02 2 123
# 36 02 2 124
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.