If I have a data frame with entries stamped with date/time on "POSIXct" "POSIXt" form (as shown in the example data) Is there a way to group by day/hour %>% summarize(sum) to obtain all the observations by day by hour or do I have to extract the day and time and create a new column for each and group by those?
The issue I am having is that when I use the second approach with multiple year data the plots group the month variables instead of showing a consecutive time series.
Thanks in advance
structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 14L, 15L, 20L,
23L), day_time = structure(c(1623290208, 1623289673, 1622607095,
1622778322, 1623268019, 1622843074, 1622842999, 1623289383, 1622851630,
1623051043), tzone = "UTC", class = c("POSIXct", "POSIXt")),
value = c(6, 0, -2, 6, -2, 0, -6, -2, 8, -1)), row.names = c(NA,
-10L), groups = structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L,
14L, 15L, 20L, 23L), .rows = structure(list(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
If you want to group by each day separately, extract only the date from day_time
. If you want to group by each hour for each day separately extract date along with hour
using format
so that 8 AM today is in a separate group than 8 AM on any other day.
library(dplyr)
df %>%
mutate(date = as.Date(day_time),
hour = format(day_time, '%Y %m %d %H'))
# id day_time value date hour
# <int> <dttm> <dbl> <date> <chr>
# 1 1 2021-06-10 01:56:48 6 2021-06-10 2021 06 10 01
# 2 2 2021-06-10 01:47:53 0 2021-06-10 2021 06 10 01
# 3 4 2021-06-02 04:11:35 -2 2021-06-02 2021 06 02 04
# 4 7 2021-06-04 03:45:22 6 2021-06-04 2021 06 04 03
# 5 11 2021-06-09 19:46:59 -2 2021-06-09 2021 06 09 19
# 6 13 2021-06-04 21:44:34 0 2021-06-04 2021 06 04 21
# 7 14 2021-06-04 21:43:19 -6 2021-06-04 2021 06 04 21
# 8 15 2021-06-10 01:43:03 -2 2021-06-10 2021 06 10 01
# 9 20 2021-06-05 00:07:10 8 2021-06-05 2021 06 05 00
#10 23 2021-06-07 07:30:43 -1 2021-06-07 2021 06 07 07
You can use this column in group_by
and use summarise
as usual.
df %>%
group_by(hour = format(day_time, '%Y-%m-%d %H')) %>%
summarise(value = sum(value))
# hour value
# <chr> <dbl>
#1 2021-06-02 04 -2
#2 2021-06-04 03 6
#3 2021-06-04 21 -6
#4 2021-06-05 00 8
#5 2021-06-07 07 -1
#6 2021-06-09 19 -2
#7 2021-06-10 01 4
An alternative way:
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
data <- structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 14L, 15L, 20L,
23L), day_time = structure(c(1623290208, 1623289673, 1622607095,
1622778322, 1623268019, 1622843074, 1622842999, 1623289383, 1622851630,
1623051043), tzone = "UTC", class = c("POSIXct", "POSIXt")),
value = c(6, 0, -2, 6, -2, 0, -6, -2, 8, -1)), row.names = c(NA,
-10L), groups = structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L,
14L, 15L, 20L, 23L), .rows = structure(list(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
df <- mutate(data, hour = str_split(day_time, ' ') %>% map_chr(~.x[[2]]),
date = str_split(day_time, ' ') %>% map_chr(~.x[[1]]))
df
#> # A tibble: 10 x 5
#> # Groups: id [10]
#> id day_time value hour date
#> <int> <dttm> <dbl> <chr> <chr>
#> 1 1 2021-06-10 01:56:48 6 01:56:48 2021-06-10
#> 2 2 2021-06-10 01:47:53 0 01:47:53 2021-06-10
#> 3 4 2021-06-02 04:11:35 -2 04:11:35 2021-06-02
#> 4 7 2021-06-04 03:45:22 6 03:45:22 2021-06-04
#> 5 11 2021-06-09 19:46:59 -2 19:46:59 2021-06-09
#> 6 13 2021-06-04 21:44:34 0 21:44:34 2021-06-04
#> 7 14 2021-06-04 21:43:19 -6 21:43:19 2021-06-04
#> 8 15 2021-06-10 01:43:03 -2 01:43:03 2021-06-10
#> 9 20 2021-06-05 00:07:10 8 00:07:10 2021-06-05
#> 10 23 2021-06-07 07:30:43 -1 07:30:43 2021-06-07
df %>%
mutate(hour = str_sub(hour, 1, 2)) %>%
group_by(date, hour) %>%
summarise(sum(value))
#> `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
#> # A tibble: 7 x 3
#> # Groups: date [6]
#> date hour `sum(value)`
#> <chr> <chr> <dbl>
#> 1 2021-06-02 04 -2
#> 2 2021-06-04 03 6
#> 3 2021-06-04 21 -6
#> 4 2021-06-05 00 8
#> 5 2021-06-07 07 -1
#> 6 2021-06-09 19 -2
#> 7 2021-06-10 01 4
Created on 2021-06-10 by the reprex package (v2.0.0)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.