I've got a dataset that looks like this
#Determine the M, M-1 and M-2 values for every site
df = data.frame(Site=c(rep('x',5),rep('y',5),rep('z',5)),
Date=c(as.Date('2020/09/11'),as.Date('2020/09/11'),
as.Date('2020/08/02'),as.Date('2020/07/09'),
as.Date('2020/05/15'),as.Date('2020/08/02'),
as.Date('2020/07/10'),as.Date('2020/07/19'),
as.Date('2020/06/27'),as.Date('2020/03/01'),
as.Date('2020/04/07'),as.Date('2020/01/01'),
as.Date('2019/11/10'),as.Date('2019/11/11'),
as.Date('2019/11/07')))
I'm trying to transform the dataset such that
I pick the top 3 most recent months from the Date column for each Site
Spread the Date column such that it is in 'M' 'M-1' and 'M-2' format
'M' being most recent month, 'M-1' being second most recent month, 'M-2' being third most recent month
Convert Date from YYYY/MM/DD format to Month-Year format eg 2020/01/01 = JAN 2020
My attempt
For step 1.
transformed = df %>%
group_by(Site) %>%
arrange(desc(Date)) %>%
slice(1:3)
This works fine up till here. I've been trying various ways of spreading the dates to the desired format, but I haven't succeeded. I'm a bit new to this so any inputs would help a lot.
Desired output
I'd like my desired output to look like this
#Output table
output = data.frame(Site=c('x','y','z'),
M=c('SEP 2020','AUG 2020','APR 2020'),
`M-1`=c('AUG 2020','JUL 2020','JAN 2020'),
`M-2`=c('JUL 2020','JUN 2020','NOV 2019'))
colnames(output)=c('Site','M','M-1','M-2')
Modification: September 21, 2020
I've slightly modified the original dataframe df
to factor in cases where we have more than one date in a given month. In such a situation, how can the solutions you've proposed be modified to get the same output
?
Try:
df %>%
group_by(Site) %>%
arrange(desc(Date)) %>%
slice(1:3) %>%
mutate(
id_within = str_c("M_", 1:3),
Date = format(Date, "%b %Y")
) %>%
pivot_wider(
id_cols = Site,
names_from = id_within,
values_from = Date
)
You can also try this solution which is close to what you want:
library(tidyverse)
#Code
df %>%
arrange(Site,desc(Date)) %>%
group_by(Site) %>%
mutate(Month=format(Date,'%b %Y')) %>%
#Index for top month
mutate(index=1:n()) %>%
#Filter
filter(index<=3) %>%
#Labels
mutate(Label=paste0('M-',(1:n())-1)) %>%
ungroup() %>%
select(-c(Date,index)) %>%
pivot_wider(names_from=Label,values_from=Month)
Output:
# A tibble: 3 x 4
Site `M-0` `M-1` `M-2`
<fct> <chr> <chr> <chr>
1 x Sep 2020 Aug 2020 Jul 2020
2 y Aug 2020 Jul 2020 Jun 2020
3 z Apr 2020 Jan 2020 Nov 2019
Update: You can try this alternative for you new data:
#Code 2
df %>%
arrange(Site,desc(Date)) %>%
group_by(Site) %>%
mutate(Month=format(Date,'%b %Y')) %>%
#Remove duplicated dates
filter(!duplicated(Month)) %>%
#Index for top month
mutate(index=1:n()) %>%
#Filter
filter(index<=3) %>%
#Labels
mutate(Label=paste0('M-',(1:n())-1)) %>%
ungroup() %>%
select(-c(Date,index)) %>%
pivot_wider(names_from=Label,values_from=Month)
Output:
# A tibble: 3 x 4
Site `M-0` `M-1` `M-2`
<fct> <chr> <chr> <chr>
1 x Sep 2020 Aug 2020 Jul 2020
2 y Aug 2020 Jul 2020 Jun 2020
3 z Apr 2020 Jan 2020 Nov 2019
Here is one way with dplyr
:
library(dplyr)
df %>%
#Get month and year information from date
mutate(month = toupper(format(Date, '%b %Y'))) %>%
#arrange data in descending order
arrange(Site, desc(Date)) %>%
#for each site
group_by(Site) %>%
#Create a unique id number
mutate(id = match(month, unique(month))) %>%
#select only 3 unique id's for each site
filter(id <= 3) %>%
#Create column name
mutate(col = c('M', 'M-1', 'M-2')[id]) %>%
#remove columns which are not required
select(-Date, -id) %>%
#get data in wide format
tidyr::pivot_wider(names_from = col, values_from = month, values_fn = unique)
# Site M `M-1` `M-2`
# <chr> <chr> <chr> <chr>
#1 x SEP 2020 AUG 2020 JUL 2020
#2 y AUG 2020 JUL 2020 JUN 2020
#3 z APR 2020 JAN 2020 NOV 2019
Use slice_max
:
library(tidyverse)
tibble(df) %>%
group_by(Site) %>%
slice_max(order_by = Date, n = 3) %>%
mutate(Date = format(Date, '%b %Y') %>% toupper(),
m = paste0("M-", 0:(n()-1)) %>% str_replace(., "-0", "")) %>%
pivot_wider(names_from = m, values_from = Date) %>% ungroup()
Output:
# A tibble: 3 x 4
Site M `M-1` `M-2`
<fct> <chr> <chr> <chr>
1 x SEP 2020 AUG 2020 JUL 2020
2 y AUG 2020 JUL 2020 JUN 2020
3 z APR 2020 JAN 2020 NOV 2019
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.