简体   繁体   中英

How to use rollmean on dplyr grouped data

I hope my example data doesn't seem too large

df <- structure(list(date = structure(c(17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140), class = "Date"), Gender = c("Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male"), Age = c("Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger"), attribute = c("Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B"), measure_1 = c(0.33, 0.31, 0.31, 0.16, 0.37, 0.29, 
0.27, 0.26, 0.24, 0.38, 0.47, 0.21, 0.32, 0.24, 0.26, 0.38, 0.38, 
0.39, 0.37, 0.3, 0.29, 0.48, 0.45, 0.45, 0.35, 0.49, 0.44, 0.41, 
0.44, 0.35, 0.38, 0.39, 0.55, 0.45, 0.43, 0.38, 0.38, 0.57, 0.47, 
0.51, 0.48, 0.32, 0.27, 0.22, 0.13, 0.02, 0.12, 0.16, 0.15, 0.17, 
0.23, 0.12, 0.31, 0.12, 0.16, 0.16, 0.16, 0.24, 0.06, 0.06, 0.17, 
0.15, 0.14, 0.37, 0.35, 0.2, 0.17, 0.25, 0.2, 0.3, 0.23, 0.26, 
0.14, 0.29, 0.35, 0.14, 0.32, 0.14, 0.14, 0.24, 0.18, 0.24, 0.24, 
0.17, 0.4, 0.3, 0.36, 0.41, 0.38, 0.31, 0.33, 0.43, 0.27, 0.31, 
0.26, 0.29, 0.25, 0.23, 0.38, 0.2, 0.29, 0.26, 0.22, 0.41, 0.25, 
0.45, 0.4, 0.54, 0.51, 0.48, 0.46, 0.4, 0.48, 0.29, 0.33, 0.36, 
0.48, 0.5, 0.32, 0.42, 0.43, 0.35, 0.35, 0.49, 0.44, 0.42, 0.48, 
0.34, 0.44, 0.38, 0.49, 0.27, 0.33, 0.42, 0.31, 0.32, 0.31, 0.38, 
0.46, 0.35, 0.4, 0.36, 0.38, 0.51, 0.41, 0.44, 0.36, 0.7, 0.57, 
0.66, 0.65, 0.57, 0.62, 0.53, 0.52, 0.43, 0.52, 0.53, 0.61, 0.67, 
0.59, 0.57, 0.55, 0.54, 0.67, 0.54, 0.57, 0.57), measure_2 = c(0.5, 
0.47, 0.48, 0.31, 0.54, 0.45, 0.43, 0.42, 0.4, 0.55, 0.66, 0.37, 
0.49, 0.4, 0.42, 0.56, 0.55, 0.57, 0.54, 0.47, 0.45, 0.66, 0.63, 
0.63, 0.52, 0.67, 0.62, 0.58, 0.61, 0.52, 0.55, 0.57, 0.74, 0.63, 
0.61, 0.56, 0.56, 0.77, 0.66, 0.7, 0.67, 0.49, 0.38, 0.32, 0.23, 
0.12, 0.22, 0.26, 0.25, 0.27, 0.34, 0.22, 0.41, 0.21, 0.26, 0.26, 
0.26, 0.34, 0.16, 0.16, 0.27, 0.25, 0.24, 0.48, 0.45, 0.31, 0.27, 
0.36, 0.3, 0.4, 0.34, 0.36, 0.24, 0.39, 0.45, 0.24, 0.43, 0.24, 
0.24, 0.35, 0.28, 0.34, 0.35, 0.27, 0.51, 0.43, 0.48, 0.52, 0.49, 
0.44, 0.46, 0.54, 0.4, 0.44, 0.4, 0.42, 0.39, 0.37, 0.49, 0.34, 
0.42, 0.39, 0.36, 0.52, 0.39, 0.56, 0.51, 0.63, 0.6, 0.58, 0.56, 
0.51, 0.58, 0.42, 0.46, 0.48, 0.58, 0.59, 0.45, 0.52, 0.54, 0.47, 
0.47, 0.58, 0.54, 0.53, 0.7, 0.62, 0.68, 0.64, 0.7, 0.59, 0.62, 
0.67, 0.61, 0.61, 0.61, 0.65, 0.69, 0.63, 0.65, 0.64, 0.64, 0.71, 
0.66, 0.68, 0.63, 0.81, 0.75, 0.8, 0.79, 0.75, 0.77, 0.72, 0.72, 
0.67, 0.72, 0.72, 0.77, 0.8, 0.76, 0.75, 0.73, 0.73, 0.8, 0.73, 
0.75, 0.74)), class = "data.frame", row.names = c(NA, -168L), na.action = structure(169:176, .Names = c("169", 
"170", "171", "172", "173", "174", "175", "176"), class = "omit"))

I'd like to find a tidy type %>% solution to rolling grouped data on a 12 month basis. That is, I would like to group multiple categorical variables (such as age and gender and measurement classes) and find the rolling 12 month average for any associated numeric variables

This seems to work but the code is not easy to interpret

df1 <- df # mutate(date = as.Date(date)) %>% select(-date)  %>% group_by(Gender, Age, attribute) %>% mutate_if(is.numeric, function(x, n = 12){stats::filter(x, rep(1 / n, n), sides = 1)}) 

I've read many posts on rollmean and rollmeanr but can't get it working with grouped data. How can I write a one or two line solution using such a straightforward function?

1) Using a smaller example (please provide minimal data in the future)

DF <- data.frame(group = c(1, 1, 1, 2, 2, 2), value1 = 1:6, value2 = 7:12)

library(dplyr)
library(zoo)

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), list(roll = ~ rollmeanr(., k = 2, fill = NA))) %>%
  ungroup

giving:

# A tibble: 6 x 5
  group value1 value2 value1_roll value2_roll
  <dbl>  <int>  <int>       <dbl>       <dbl>
1     1      1      7        NA          NA  
2     1      2      8         1.5         7.5
3     1      3      9         2.5         8.5
4     2      4     10        NA          NA  
5     2      5     11         4.5        10.5
6     2      6     12         5.5        11.5

2) or if you don't need the original value1 and value2 :

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), rollmeanr, k = 2, fill = NA) %>%
  ungroup

giving:

# A tibble: 6 x 3
  group value1 value2
  <dbl>  <dbl>  <dbl>
1     1   NA     NA  
2     1    1.5    7.5
3     1    2.5    8.5
4     2   NA     NA  
5     2    4.5   10.5
6     2    5.5   11.5

3) Another approach is

DF %>%
  group_by(group) %>%
  do(cbind(., roll = rollmeanr(.[-1], k = 2, fill = NA))) %>%
  ungroup

or without the original value variables:

DF %>%
  group_by(group) %>%
  do(rollmeanr(.[-1], k = 2, fill = NA) %>% as.data.frame) %>%
  ungroup

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM