I have the following dataframe and I would like to get a year-on-year comparison just like the image at the end.
structure(list(Var1 = structure(1:46, .Label = c("Sep 2015",
"Apr 2018", "May 2018", "Dec 2018", "Apr 2019", "May 2019", "Jun 2019",
"Jul 2019", "Sep 2019", "Nov 2019", "Dec 2019", "Jan 2020", "Feb 2020",
"Mar 2020", "Apr 2020", "May 2020", "Jun 2020", "Jul 2020", "Aug 2020",
"Sep 2020", "Oct 2020", "Nov 2020", "Dec 2020", "Jan 2021", "Feb 2021",
"Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021", "Aug 2021",
"Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021", "Jan 2022", "Feb 2022",
"Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022", "Aug 2022",
"Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"), Freq = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 2L,
2L, 1L, 3L, 6L, 7L, 5L, 6L, 10L, 11L, 7L, 7L, 7L, 4L, 4L, 4L,
2L, 3L, 8L, 10L, 4L, 6L, 4L, 6L, 6L, 5L, 3L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-46L))
Step 2 (After Comments): I created 3 data frames, one for each year
2020
structure(list(Var1 = structure(1:3, .Label = c("Oct 2020", "Nov 2020",
"Dec 2020"), class = "factor"), Freq = 4:6, MonthInd = c("January",
"February", "March"), Year = c("2020", "2020", "2020")), row.names = c(NA,
-3L), class = "data.frame")
2021
structure(list(Var1 = structure(1:12, .Label = c("Jan 2021",
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021",
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"),
Freq = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L, 4L
), MonthInd = c("January", "February", "March", "April",
"May", "June", "July", "August", "September", "October",
"November", "December"), Year = c("2021", "2021", "2021",
"2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021",
"2021")), row.names = c(NA, -12L), class = "data.frame")
2022
structure(list(Var1 = structure(1:11, .Label = c("Jan 2022",
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022", "Jul 2022",
"Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"), class = "factor"),
Freq = c(6L, 9L, 14L, 7L, 2L, 11L, 13L, 4L, 5L, 4L, 2L),
MonthInd = c("January", "February", "March", "April", "May",
"June", "July", "August", "September", "October", "November"
), Year = c("2022", "2022", "2022", "2022", "2022", "2022",
"2022", "2022", "2022", "2022", "2022")), row.names = c(NA,
-11L), class = "data.frame")
Now, I want to merge them as a data frame on the format below: Month - Freq.CNO_2020_Monthly - Freq.CNO_2021_Monthly - Freq.CNO_2022_Monthly
I tried it using list() then reduce():
ClosedYoY_List <- list(CNO_2020_Monthly, CNO_2021_Monthly, CNO_2022_Monthly)
Final <- ClosedYoY_List %>% reduce(full_join, by='MonthInd')
But the final result is faulty: October, November, and December are out of the place.
structure(list(Var1.x = structure(c(1L, 2L, 3L, NA, NA, NA, NA,
NA, NA, NA, NA, NA), .Label = c("Oct 2020", "Nov 2020", "Dec 2020"
), class = "factor"), Freq.x = c(4L, 5L, 6L, NA, NA, NA, NA,
NA, NA, NA, NA, NA), MonthInd = c("January", "February", "March",
"April", "May", "June", "July", "August", "September", "October",
"November", "December"), Year.x = c("2020", "2020", "2020", NA,
NA, NA, NA, NA, NA, NA, NA, NA), Var1.y = structure(1:12, .Label = c("Jan 2021",
"Feb 2021", "Mar 2021", "Apr 2021", "May 2021", "Jun 2021", "Jul 2021",
"Aug 2021", "Sep 2021", "Oct 2021", "Nov 2021", "Dec 2021"), class = "factor"),
Freq.y = c(9L, 7L, 4L, 4L, 1L, 5L, 10L, 12L, 10L, 8L, 4L,
4L), Year.y = c("2021", "2021", "2021", "2021", "2021", "2021",
"2021", "2021", "2021", "2021", "2021", "2021"), Var1 = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, NA), .Label = c("Jan 2022",
"Feb 2022", "Mar 2022", "Apr 2022", "May 2022", "Jun 2022",
"Jul 2022", "Aug 2022", "Sep 2022", "Oct 2022", "Nov 2022"
), class = "factor"), Freq = c(6L, 9L, 14L, 7L, 2L, 11L,
13L, 4L, 5L, 4L, 2L, NA), Year = c("2022", "2022", "2022",
"2022", "2022", "2022", "2022", "2022", "2022", "2022", "2022",
NA)), row.names = c(NA, -12L), class = "data.frame")
Any help on how to put those list side by side on a neat way? (I am aware that I will need to rename columns afterwards, so now the focus is to line them up).
Here's one option. Note that your test data does not correspond to you expected output.
library(tidyverse)
df %>%
# Separate Year and Month into separate columns
separate(Var1, into=c("Month", "Year")) %>%
# Pivot to wide format
pivot_wider(
id_cols=Year,
names_from=Month,
values_from=Freq,
values_fill=0
) %>%
# Arrange columns into a sensible order
select(Year, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec) %>%
# Calculate yearly totals
rowwise() %>%
mutate(Total=sum(c_across(-Year))) %>%
# Remove the effects of rowwise()
ungroup() %>%
# Add grand total row
add_row(Year="Total") %>%
mutate(
across(
-Year,
function(x) ifelse(is.na(x), sum(x, na.rm=TRUE), x)
)
)
# A tibble: 7 × 14
Year Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec Total
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2015 0 0 0 0 0 0 0 0 1 0 0 0 1
2 2018 0 0 0 1 1 0 0 0 0 0 0 1 3
3 2019 0 0 0 1 1 1 1 0 2 0 1 1 8
4 2020 1 2 2 3 4 2 2 1 3 6 7 5 38
5 2021 6 10 11 7 7 7 4 4 4 2 3 8 73
6 2022 10 4 6 4 6 6 5 3 1 1 1 0 47
7 Total 17 16 19 16 19 16 12 8 11 9 12 15 170
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.