简体   繁体   中英

Summarizing values and plotting using POSIXct

If I have a data frame with entries stamped with date/time on "POSIXct" "POSIXt" form (as shown in the example data) Is there a way to group by day/hour %>% summarize(sum) to obtain all the observations by day by hour or do I have to extract the day and time and create a new column for each and group by those?

The issue I am having is that when I use the second approach with multiple year data the plots group the month variables instead of showing a consecutive time series.

Thanks in advance

structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 14L, 15L, 20L, 
23L), day_time = structure(c(1623290208, 1623289673, 1622607095, 
1622778322, 1623268019, 1622843074, 1622842999, 1623289383, 1622851630, 
1623051043), tzone = "UTC", class = c("POSIXct", "POSIXt")), 
    value = c(6, 0, -2, 6, -2, 0, -6, -2, 8, -1)), row.names = c(NA, 
-10L), groups = structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 
14L, 15L, 20L, 23L), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 
    6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

If you want to group by each day separately, extract only the date from day_time . If you want to group by each hour for each day separately extract date along with hour using format so that 8 AM today is in a separate group than 8 AM on any other day.

library(dplyr)

df %>%
  mutate(date = as.Date(day_time), 
         hour = format(day_time, '%Y %m %d %H'))

#      id day_time            value date       hour         
#   <int> <dttm>              <dbl> <date>     <chr>        
# 1     1 2021-06-10 01:56:48     6 2021-06-10 2021 06 10 01
# 2     2 2021-06-10 01:47:53     0 2021-06-10 2021 06 10 01
# 3     4 2021-06-02 04:11:35    -2 2021-06-02 2021 06 02 04
# 4     7 2021-06-04 03:45:22     6 2021-06-04 2021 06 04 03
# 5    11 2021-06-09 19:46:59    -2 2021-06-09 2021 06 09 19
# 6    13 2021-06-04 21:44:34     0 2021-06-04 2021 06 04 21
# 7    14 2021-06-04 21:43:19    -6 2021-06-04 2021 06 04 21
# 8    15 2021-06-10 01:43:03    -2 2021-06-10 2021 06 10 01
# 9    20 2021-06-05 00:07:10     8 2021-06-05 2021 06 05 00
#10    23 2021-06-07 07:30:43    -1 2021-06-07 2021 06 07 07

You can use this column in group_by and use summarise as usual.

df %>%
  group_by(hour = format(day_time, '%Y-%m-%d %H')) %>%
  summarise(value = sum(value))

#   hour          value
#  <chr>         <dbl>
#1 2021-06-02 04    -2
#2 2021-06-04 03     6
#3 2021-06-04 21    -6
#4 2021-06-05 00     8
#5 2021-06-07 07    -1
#6 2021-06-09 19    -2
#7 2021-06-10 01     4

An alternative way:

library(tidyverse)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

data <- structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 14L, 15L, 20L, 
                              23L), day_time = structure(c(1623290208, 1623289673, 1622607095, 
                                                           1622778322, 1623268019, 1622843074, 1622842999, 1623289383, 1622851630, 
                                                           1623051043), tzone = "UTC", class = c("POSIXct", "POSIXt")), 
                       value = c(6, 0, -2, 6, -2, 0, -6, -2, 8, -1)), row.names = c(NA, 
                                                                                    -10L), groups = structure(list(id = c(1L, 2L, 4L, 7L, 11L, 13L, 
                                                                                                                          14L, 15L, 20L, 23L), .rows = structure(list(1L, 2L, 3L, 4L, 5L, 
                                                                                                                                                                      6L, 7L, 8L, 9L, 10L), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                                                                          "vctrs_vctr", "list"))), row.names = c(NA, -10L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                      "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
                                                                                                                                                                                                                                                                                                                                     "tbl_df", "tbl", "data.frame"))




df <- mutate(data, hour = str_split(day_time, ' ') %>% map_chr(~.x[[2]]),
             date = str_split(day_time, ' ') %>% map_chr(~.x[[1]]))

df
#> # A tibble: 10 x 5
#> # Groups:   id [10]
#>       id day_time            value hour     date      
#>    <int> <dttm>              <dbl> <chr>    <chr>     
#>  1     1 2021-06-10 01:56:48     6 01:56:48 2021-06-10
#>  2     2 2021-06-10 01:47:53     0 01:47:53 2021-06-10
#>  3     4 2021-06-02 04:11:35    -2 04:11:35 2021-06-02
#>  4     7 2021-06-04 03:45:22     6 03:45:22 2021-06-04
#>  5    11 2021-06-09 19:46:59    -2 19:46:59 2021-06-09
#>  6    13 2021-06-04 21:44:34     0 21:44:34 2021-06-04
#>  7    14 2021-06-04 21:43:19    -6 21:43:19 2021-06-04
#>  8    15 2021-06-10 01:43:03    -2 01:43:03 2021-06-10
#>  9    20 2021-06-05 00:07:10     8 00:07:10 2021-06-05
#> 10    23 2021-06-07 07:30:43    -1 07:30:43 2021-06-07


df %>% 
    mutate(hour = str_sub(hour, 1, 2)) %>% 
    group_by(date, hour) %>% 
    summarise(sum(value))
#> `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
#> # A tibble: 7 x 3
#> # Groups:   date [6]
#>   date       hour  `sum(value)`
#>   <chr>      <chr>        <dbl>
#> 1 2021-06-02 04              -2
#> 2 2021-06-04 03               6
#> 3 2021-06-04 21              -6
#> 4 2021-06-05 00               8
#> 5 2021-06-07 07              -1
#> 6 2021-06-09 19              -2
#> 7 2021-06-10 01               4

Created on 2021-06-10 by the reprex package (v2.0.0)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM