简体   繁体   English

如何在 dplyr 分组数据上使用 rollmean

[英]How to use rollmean on dplyr grouped data

I hope my example data doesn't seem too large我希望我的示例数据看起来不会太大

df <- structure(list(date = structure(c(17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140, 17532, 17563, 17591, 17622, 17652, 17683, 17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 17532, 17563, 17591, 17622, 
17652, 17683, 17713, 17744, 17775, 17805, 17836, 17866, 17897, 
17928, 17956, 17987, 18017, 18048, 18078, 18109, 18140, 17532, 
17563, 17591, 17622, 17652, 17683, 17713, 17744, 17775, 17805, 
17836, 17866, 17897, 17928, 17956, 17987, 18017, 18048, 18078, 
18109, 18140), class = "Date"), Gender = c("Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male"), Age = c("Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Older", "Older", "Older", "Older", "Older", 
"Older", "Older", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger", "Younger", "Younger", "Younger", "Younger", 
"Younger", "Younger"), attribute = c("Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling A", "Feeling A", "Feeling A", "Feeling A", "Feeling A", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B", "Feeling B", "Feeling B", "Feeling B", "Feeling B", 
"Feeling B"), measure_1 = c(0.33, 0.31, 0.31, 0.16, 0.37, 0.29, 
0.27, 0.26, 0.24, 0.38, 0.47, 0.21, 0.32, 0.24, 0.26, 0.38, 0.38, 
0.39, 0.37, 0.3, 0.29, 0.48, 0.45, 0.45, 0.35, 0.49, 0.44, 0.41, 
0.44, 0.35, 0.38, 0.39, 0.55, 0.45, 0.43, 0.38, 0.38, 0.57, 0.47, 
0.51, 0.48, 0.32, 0.27, 0.22, 0.13, 0.02, 0.12, 0.16, 0.15, 0.17, 
0.23, 0.12, 0.31, 0.12, 0.16, 0.16, 0.16, 0.24, 0.06, 0.06, 0.17, 
0.15, 0.14, 0.37, 0.35, 0.2, 0.17, 0.25, 0.2, 0.3, 0.23, 0.26, 
0.14, 0.29, 0.35, 0.14, 0.32, 0.14, 0.14, 0.24, 0.18, 0.24, 0.24, 
0.17, 0.4, 0.3, 0.36, 0.41, 0.38, 0.31, 0.33, 0.43, 0.27, 0.31, 
0.26, 0.29, 0.25, 0.23, 0.38, 0.2, 0.29, 0.26, 0.22, 0.41, 0.25, 
0.45, 0.4, 0.54, 0.51, 0.48, 0.46, 0.4, 0.48, 0.29, 0.33, 0.36, 
0.48, 0.5, 0.32, 0.42, 0.43, 0.35, 0.35, 0.49, 0.44, 0.42, 0.48, 
0.34, 0.44, 0.38, 0.49, 0.27, 0.33, 0.42, 0.31, 0.32, 0.31, 0.38, 
0.46, 0.35, 0.4, 0.36, 0.38, 0.51, 0.41, 0.44, 0.36, 0.7, 0.57, 
0.66, 0.65, 0.57, 0.62, 0.53, 0.52, 0.43, 0.52, 0.53, 0.61, 0.67, 
0.59, 0.57, 0.55, 0.54, 0.67, 0.54, 0.57, 0.57), measure_2 = c(0.5, 
0.47, 0.48, 0.31, 0.54, 0.45, 0.43, 0.42, 0.4, 0.55, 0.66, 0.37, 
0.49, 0.4, 0.42, 0.56, 0.55, 0.57, 0.54, 0.47, 0.45, 0.66, 0.63, 
0.63, 0.52, 0.67, 0.62, 0.58, 0.61, 0.52, 0.55, 0.57, 0.74, 0.63, 
0.61, 0.56, 0.56, 0.77, 0.66, 0.7, 0.67, 0.49, 0.38, 0.32, 0.23, 
0.12, 0.22, 0.26, 0.25, 0.27, 0.34, 0.22, 0.41, 0.21, 0.26, 0.26, 
0.26, 0.34, 0.16, 0.16, 0.27, 0.25, 0.24, 0.48, 0.45, 0.31, 0.27, 
0.36, 0.3, 0.4, 0.34, 0.36, 0.24, 0.39, 0.45, 0.24, 0.43, 0.24, 
0.24, 0.35, 0.28, 0.34, 0.35, 0.27, 0.51, 0.43, 0.48, 0.52, 0.49, 
0.44, 0.46, 0.54, 0.4, 0.44, 0.4, 0.42, 0.39, 0.37, 0.49, 0.34, 
0.42, 0.39, 0.36, 0.52, 0.39, 0.56, 0.51, 0.63, 0.6, 0.58, 0.56, 
0.51, 0.58, 0.42, 0.46, 0.48, 0.58, 0.59, 0.45, 0.52, 0.54, 0.47, 
0.47, 0.58, 0.54, 0.53, 0.7, 0.62, 0.68, 0.64, 0.7, 0.59, 0.62, 
0.67, 0.61, 0.61, 0.61, 0.65, 0.69, 0.63, 0.65, 0.64, 0.64, 0.71, 
0.66, 0.68, 0.63, 0.81, 0.75, 0.8, 0.79, 0.75, 0.77, 0.72, 0.72, 
0.67, 0.72, 0.72, 0.77, 0.8, 0.76, 0.75, 0.73, 0.73, 0.8, 0.73, 
0.75, 0.74)), class = "data.frame", row.names = c(NA, -168L), na.action = structure(169:176, .Names = c("169", 
"170", "171", "172", "173", "174", "175", "176"), class = "omit"))

I'd like to find a tidy type %>% solution to rolling grouped data on a 12 month basis.我想找到一个整洁的%>%解决方案来滚动分组数据 12 个月。 That is, I would like to group multiple categorical variables (such as age and gender and measurement classes) and find the rolling 12 month average for any associated numeric variables也就是说,我想对多个分类变量(例如年龄、性别和测量类别)进行分组,并找到任何相关数字变量的 12 个月滚动平均值

This seems to work but the code is not easy to interpret这似乎有效,但代码不容易解释

df1 <- df # mutate(date = as.Date(date)) %>% select(-date)  %>% group_by(Gender, Age, attribute) %>% mutate_if(is.numeric, function(x, n = 12){stats::filter(x, rep(1 / n, n), sides = 1)}) 

I've read many posts on rollmean and rollmeanr but can't get it working with grouped data.我已经阅读了许多关于 rollmean 和 rollmeanr 的帖子,但无法使用分组数据。 How can I write a one or two line solution using such a straightforward function?如何使用如此简单的函数编写一两行解决方案?

1) Using a smaller example (please provide minimal data in the future) 1)使用较小的例子(请在未来提供最少的数据)

DF <- data.frame(group = c(1, 1, 1, 2, 2, 2), value1 = 1:6, value2 = 7:12)

library(dplyr)
library(zoo)

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), list(roll = ~ rollmeanr(., k = 2, fill = NA))) %>%
  ungroup

giving:给予:

# A tibble: 6 x 5
  group value1 value2 value1_roll value2_roll
  <dbl>  <int>  <int>       <dbl>       <dbl>
1     1      1      7        NA          NA  
2     1      2      8         1.5         7.5
3     1      3      9         2.5         8.5
4     2      4     10        NA          NA  
5     2      5     11         4.5        10.5
6     2      6     12         5.5        11.5

2) or if you don't need the original value1 and value2 : 2)或者如果您不需要原始value1value2

DF %>%
  group_by(group) %>%
  mutate_at(vars(contains("value")), rollmeanr, k = 2, fill = NA) %>%
  ungroup

giving:给予:

# A tibble: 6 x 3
  group value1 value2
  <dbl>  <dbl>  <dbl>
1     1   NA     NA  
2     1    1.5    7.5
3     1    2.5    8.5
4     2   NA     NA  
5     2    4.5   10.5
6     2    5.5   11.5

3) Another approach is 3)另一种方法是

DF %>%
  group_by(group) %>%
  do(cbind(., roll = rollmeanr(.[-1], k = 2, fill = NA))) %>%
  ungroup

or without the original value variables:或没有原始value变量:

DF %>%
  group_by(group) %>%
  do(rollmeanr(.[-1], k = 2, fill = NA) %>% as.data.frame) %>%
  ungroup

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM