[英]Year on Year Comparison in R
我有以下 dataframe,我想得到一個同比比較,就像最后的圖像一樣。
structure(list(Var1 = structure(1:46, .Label = c("Sep 2015",
"Apr 2018", "May 2018", "Dec 2018", "Apr 2019", "May 2019", "Jun 2019",
"Jul 2019", "Sep 2019", "Nov 2019", "Dec 2019", "Jan 2020", "Feb 2020",
"Mar 2020", "Apr 2020", "May 2020", "Jun 2020", "Jul 2020", "Aug 2020",
"Sep 2020", "Oct 2020", "Nov 2020", "Dec 2020", "Jan 2021", "Feb 2021",
"Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", "Aug 2021",
"Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021", "Jan 2022", "Feb 2022",
"Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", "Aug 2022",
"Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), Freq = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 2L,
2L, 1L, 3L, 6L, 7L, 5L, 6L, 10L, 11L, 7L, 7L, 7L, 4L, 4L, 4L,
2L, 3L, 8L, 10L, 4L, 6L, 4L, 6L, 6L, 5L, 3L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-46L))
第 2 步(評論后):我創建了 3 個數據框,每年一個
2020
structure(list(Var1 = structure(1:3, .Label = c("Oct 2020", "Nov 2020",
"Dec 2020"), class = "factor"), Freq = 4:6, MonthInd = c("January",
"February", "March"), Year = c("2020", "2020", "2020")), row.names = c(NA,
-3L), class = "data.frame")
2021年
structure(list(Var1 = structure(1:12, .Label = c("Jan 2021",
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021",
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"),
Freq = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 4L
), MonthInd = c("January", "February", "March", "April",
"May", "June", "July", "August", "September", "October",
"November", "December"), Year = c("2021", "2021", "2021",
"2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021",
"2021")), row.names = c(NA, -12L), class = "data.frame")
2022年
structure(list(Var1 = structure(1:11, .Label = c("Jan 2022",
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022",
"Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"),
Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 13L, 4L, 5L, 4L, 2L),
MonthInd = c("January", "February", "March", "April", "May",
"June", "July", "August", "September", "October", "November"
), Year = c("2022", "2022", "2022", "2022", "2022", "2022",
"2022", "2022", "2022", "2022", "2022")), row.names = c(NA,
-11L), class = "data.frame")
現在,我想將它們合並為以下格式的數據框:Month - Freq.CNO_2020_Monthly - Freq.CNO_2021_Monthly - Freq.CNO_2022_Monthly
我嘗試使用 list() 然后 reduce():
ClosedYoY_List <- list(CNO_2020_Monthly, CNO_2021_Monthly, CNO_2022_Monthly)
Final <- ClosedYoY_List %>% reduce(full_join, by='MonthInd')
但最終的結果是錯誤的:十月、十一月和十二月不合適。
structure(list(Var1.x = structure(c(1L, 2L, 3L, NA, NA, NA, NA,
NA, NA, NA, NA, NA), .Label = c("Oct 2020", "Nov 2020", "Dec 2020"
), class = "factor"), Freq.x = c(4L, 5L, 6L, NA, NA, NA, NA,
NA, NA, NA, NA, NA), MonthInd = c("January", "February", "March",
"April", "May", "June", "July", "August", "September", "October",
"November", "December"), Year.x = c("2020", "2020", "2020", NA,
NA, NA, NA, NA, NA, NA, NA, NA), Var1.y = structure(1:12, .Label = c("Jan 2021",
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021",
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"),
Freq.y = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L,
4L), Year.y = c("2021", "2021", "2021", "2021", "2021", "2021",
"2021", "2021", "2021", "2021", "2021", "2021"), Var1 = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, NA), .Label = c("Jan 2022",
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022",
"Jul 2022", "Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"
), class = "factor"), Freq = c(6L, 9L, 14L, 7L, 2L, 11L,
13L, 4L, 5L, 4L, 2L, NA), Year = c("2022", "2022", "2022",
"2022", "2022", "2022", "2022", "2022", "2022", "2022", "2022",
NA)), row.names = c(NA, -12L), class = "data.frame")
關於如何以整潔的方式並排放置這些列表的任何幫助? (我知道之后我需要重命名列,所以現在的重點是將它們排成一行)。
這是一種選擇。 請注意,您的測試數據與您預期的 output 不對應。
library(tidyverse)
df %>%
# Separate Year and Month into separate columns
separate(Var1, into=c("Month", "Year")) %>%
# Pivot to wide format
pivot_wider(
id_cols=Year,
names_from=Month,
values_from=Freq,
values_fill=0
) %>%
# Arrange columns into a sensible order
select(Year, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>%
# Calculate yearly totals
rowwise() %>%
mutate(Total=sum(c_across(-Year))) %>%
# Remove the effects of rowwise()
ungroup() %>%
# Add grand total row
add_row(Year="Total") %>%
mutate(
across(
-Year,
function(x) ifelse(is.na(x), sum(x, na.rm=TRUE), x)
)
)
# A tibble: 7 × 14
Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Total
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2015 0 0 0 0 0 0 0 0 1 0 0 0 1
2 2018 0 0 0 1 1 0 0 0 0 0 0 1 3
3 2019 0 0 0 1 1 1 1 0 2 0 1 1 8
4 2020 1 2 2 3 4 2 2 1 3 6 7 5 38
5 2021 6 10 11 7 7 7 4 4 4 2 3 8 73
6 2022 10 4 6 4 6 6 5 3 1 1 1 0 47
7 Total 17 16 19 16 19 16 12 8 11 9 12 15 170
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.