簡體   English   中英

R年同比比較

[英]Year on Year Comparison in R

我有以下 dataframe,我想得到一個同比比較,就像最后的圖像一樣。

structure(list(Var1 = structure(1:46, .Label = c("Sep 2015", 
"Apr 2018", "May 2018", "Dec 2018", "Apr 2019", "May 2019", "Jun 2019", 
"Jul 2019", "Sep 2019", "Nov 2019", "Dec 2019", "Jan 2020", "Feb 2020", 
"Mar 2020", "Apr 2020", "May 2020", "Jun 2020", "Jul 2020", "Aug 2020", 
"Sep 2020", "Oct 2020", "Nov 2020", "Dec 2020", "Jan 2021", "Feb 2021", 
"Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", "Aug 2021", 
"Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021", "Jan 2022", "Feb 2022", 
"Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", "Aug 2022", 
"Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), Freq = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 2L, 
2L, 1L, 3L, 6L, 7L, 5L, 6L, 10L, 11L, 7L, 7L, 7L, 4L, 4L, 4L, 
2L, 3L, 8L, 10L, 4L, 6L, 4L, 6L, 6L, 5L, 3L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA, 
-46L))

在此處輸入圖像描述

第 2 步(評論后):我創建了 3 個數據框,每年一個

2020

structure(list(Var1 = structure(1:3, .Label = c("Oct 2020", "Nov 2020", 
"Dec 2020"), class = "factor"), Freq = 4:6, MonthInd = c("January", 
"February", "March"), Year = c("2020", "2020", "2020")), row.names = c(NA, 
-3L), class = "data.frame")

2021年

structure(list(Var1 = structure(1:12, .Label = c("Jan 2021", 
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", 
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"), 
    Freq = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 4L
    ), MonthInd = c("January", "February", "March", "April", 
    "May", "June", "July", "August", "September", "October", 
    "November", "December"), Year = c("2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", 
    "2021")), row.names = c(NA, -12L), class = "data.frame")

2022年

structure(list(Var1 = structure(1:11, .Label = c("Jan 2022", 
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", 
"Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), 
    Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 13L, 4L, 5L, 4L, 2L), 
    MonthInd = c("January", "February", "March", "April", "May", 
    "June", "July", "August", "September", "October", "November"
    ), Year = c("2022", "2022", "2022", "2022", "2022", "2022", 
    "2022", "2022", "2022", "2022", "2022")), row.names = c(NA, 
-11L), class = "data.frame")

現在,我想將它們合並為以下格式的數據框:Month - Freq.CNO_2020_Monthly - Freq.CNO_2021_Monthly - Freq.CNO_2022_Monthly

我嘗試使用 list() 然后 reduce():

ClosedYoY_List <- list(CNO_2020_Monthly, CNO_2021_Monthly, CNO_2022_Monthly)

Final <- ClosedYoY_List %>% reduce(full_join, by='MonthInd')

但最終的結果是錯誤的:十月、十一月和十二月不合適。

structure(list(Var1.x = structure(c(1L, 2L, 3L, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), .Label = c("Oct 2020", "Nov 2020", "Dec 2020"
), class = "factor"), Freq.x = c(4L, 5L, 6L, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), MonthInd = c("January", "February", "March", 
"April", "May", "June", "July", "August", "September", "October", 
"November", "December"), Year.x = c("2020", "2020", "2020", NA, 
NA, NA, NA, NA, NA, NA, NA, NA), Var1.y = structure(1:12, .Label = c("Jan 2021", 
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", 
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"), 
    Freq.y = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 
    4L), Year.y = c("2021", "2021", "2021", "2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021"), Var1 = structure(c(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, NA), .Label = c("Jan 2022", 
    "Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", 
    "Jul 2022", "Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"
    ), class = "factor"), Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 
    13L, 4L, 5L, 4L, 2L, NA), Year = c("2022", "2022", "2022", 
    "2022", "2022", "2022", "2022", "2022", "2022", "2022", "2022", 
    NA)), row.names = c(NA, -12L), class = "data.frame")

關於如何以整潔的方式並排放置這些列表的任何幫助? (我知道之后我需要重命名列,所以現在的重點是將它們排成一行)。

這是一種選擇。 請注意,您的測試數據與您預期的 output 不對應。

library(tidyverse)

df %>% 
  # Separate Year and Month into separate columns
  separate(Var1, into=c("Month", "Year")) %>% 
  # Pivot to wide format
  pivot_wider(
    id_cols=Year,
    names_from=Month,
    values_from=Freq,
    values_fill=0
  ) %>% 
  # Arrange columns into a sensible order
  select(Year, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>% 
  # Calculate yearly totals
  rowwise() %>% 
  mutate(Total=sum(c_across(-Year))) %>%
  # Remove the effects of rowwise()
  ungroup() %>% 
  # Add grand total row
  add_row(Year="Total") %>% 
  mutate(
    across(
      -Year,
      function(x) ifelse(is.na(x), sum(x, na.rm=TRUE), x)
    )
  )
# A tibble: 7 × 14
  Year    Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec Total
  <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2015      0     0     0     0     0     0     0     0     1     0     0     0     1
2 2018      0     0     0     1     1     0     0     0     0     0     0     1     3
3 2019      0     0     0     1     1     1     1     0     2     0     1     1     8
4 2020      1     2     2     3     4     2     2     1     3     6     7     5    38
5 2021      6    10    11     7     7     7     4     4     4     2     3     8    73
6 2022     10     4     6     4     6     6     5     3     1     1     1     0    47
7 Total    17    16    19    16    19    16    12     8    11     9    12    15   170 

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM