简体   繁体   中英

Year on Year Comparison in R

I have the following dataframe and I would like to get a year-on-year comparison just like the image at the end.

structure(list(Var1 = structure(1:46, .Label = c("Sep 2015", 
"Apr 2018", "May 2018", "Dec 2018", "Apr 2019", "May 2019", "Jun 2019", 
"Jul 2019", "Sep 2019", "Nov 2019", "Dec 2019", "Jan 2020", "Feb 2020", 
"Mar 2020", "Apr 2020", "May 2020", "Jun 2020", "Jul 2020", "Aug 2020", 
"Sep 2020", "Oct 2020", "Nov 2020", "Dec 2020", "Jan 2021", "Feb 2021", 
"Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", "Aug 2021", 
"Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021", "Jan 2022", "Feb 2022", 
"Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", "Aug 2022", 
"Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), Freq = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 2L, 
2L, 1L, 3L, 6L, 7L, 5L, 6L, 10L, 11L, 7L, 7L, 7L, 4L, 4L, 4L, 
2L, 3L, 8L, 10L, 4L, 6L, 4L, 6L, 6L, 5L, 3L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA, 
-46L))

在此处输入图像描述

Step 2 (After Comments): I created 3 data frames, one for each year

2020

structure(list(Var1 = structure(1:3, .Label = c("Oct 2020", "Nov 2020", 
"Dec 2020"), class = "factor"), Freq = 4:6, MonthInd = c("January", 
"February", "March"), Year = c("2020", "2020", "2020")), row.names = c(NA, 
-3L), class = "data.frame")

2021

structure(list(Var1 = structure(1:12, .Label = c("Jan 2021", 
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", 
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"), 
    Freq = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 4L
    ), MonthInd = c("January", "February", "March", "April", 
    "May", "June", "July", "August", "September", "October", 
    "November", "December"), Year = c("2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", 
    "2021")), row.names = c(NA, -12L), class = "data.frame")

2022

structure(list(Var1 = structure(1:11, .Label = c("Jan 2022", 
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", 
"Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), 
    Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 13L, 4L, 5L, 4L, 2L), 
    MonthInd = c("January", "February", "March", "April", "May", 
    "June", "July", "August", "September", "October", "November"
    ), Year = c("2022", "2022", "2022", "2022", "2022", "2022", 
    "2022", "2022", "2022", "2022", "2022")), row.names = c(NA, 
-11L), class = "data.frame")

Now, I want to merge them as a data frame on the format below: Month - Freq.CNO_2020_Monthly - Freq.CNO_2021_Monthly - Freq.CNO_2022_Monthly

I tried it using list() then reduce():

ClosedYoY_List <- list(CNO_2020_Monthly, CNO_2021_Monthly, CNO_2022_Monthly)

Final <- ClosedYoY_List %>% reduce(full_join, by='MonthInd')

But the final result is faulty: October, November, and December are out of the place.

structure(list(Var1.x = structure(c(1L, 2L, 3L, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), .Label = c("Oct 2020", "Nov 2020", "Dec 2020"
), class = "factor"), Freq.x = c(4L, 5L, 6L, NA, NA, NA, NA, 
NA, NA, NA, NA, NA), MonthInd = c("January", "February", "March", 
"April", "May", "June", "July", "August", "September", "October", 
"November", "December"), Year.x = c("2020", "2020", "2020", NA, 
NA, NA, NA, NA, NA, NA, NA, NA), Var1.y = structure(1:12, .Label = c("Jan 2021", 
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", 
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"), 
    Freq.y = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 
    4L), Year.y = c("2021", "2021", "2021", "2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021"), Var1 = structure(c(1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, NA), .Label = c("Jan 2022", 
    "Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", 
    "Jul 2022", "Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"
    ), class = "factor"), Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 
    13L, 4L, 5L, 4L, 2L, NA), Year = c("2022", "2022", "2022", 
    "2022", "2022", "2022", "2022", "2022", "2022", "2022", "2022", 
    NA)), row.names = c(NA, -12L), class = "data.frame")

Any help on how to put those list side by side on a neat way? (I am aware that I will need to rename columns afterwards, so now the focus is to line them up).

Here's one option. Note that your test data does not correspond to you expected output.

library(tidyverse)

df %>% 
  # Separate Year and Month into separate columns
  separate(Var1, into=c("Month", "Year")) %>% 
  # Pivot to wide format
  pivot_wider(
    id_cols=Year,
    names_from=Month,
    values_from=Freq,
    values_fill=0
  ) %>% 
  # Arrange columns into a sensible order
  select(Year, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>% 
  # Calculate yearly totals
  rowwise() %>% 
  mutate(Total=sum(c_across(-Year))) %>%
  # Remove the effects of rowwise()
  ungroup() %>% 
  # Add grand total row
  add_row(Year="Total") %>% 
  mutate(
    across(
      -Year,
      function(x) ifelse(is.na(x), sum(x, na.rm=TRUE), x)
    )
  )
# A tibble: 7 × 14
  Year    Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec Total
  <chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2015      0     0     0     0     0     0     0     0     1     0     0     0     1
2 2018      0     0     0     1     1     0     0     0     0     0     0     1     3
3 2019      0     0     0     1     1     1     1     0     2     0     1     1     8
4 2020      1     2     2     3     4     2     2     1     3     6     7     5    38
5 2021      6    10    11     7     7     7     4     4     4     2     3     8    73
6 2022     10     4     6     4     6     6     5     3     1     1     1     0    47
7 Total    17    16    19    16    19    16    12     8    11     9    12    15   170 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM