I have data like the following:
ID Flower Season
1 tulip Spring
3 rose Summer
5 rose Summer
9 tulip Spring
2 daisy Spring
12 violet Summer
I want to get a table like the following, with ranks and percentages:
Season
Spring Summer
Freq % Rank Freq % Rank Total
Flower
Tulip 2 66.7 1 0 0 3 2
Daisy 1 33.3 2 0 0 3 1
Rose 0 0 3 2 66.7 1 2
Violet 0 0 3 1 33.3 2 1
This solution works to get the base table, it will take additional work to format similar to the desire format,
df <- structure(list(iID = c(1L, 3L, 5L, 9L, 2L, 12L),
Flower = c("tulip", "rose", "rose", "tulip", "daisy", "violet"),
Season = c("Spring", "Summer", "Summer", "Spring", "Spring", "Summer")),
class = "data.frame", row.names = c(NA, -6L))
#create the contingency tabls
basetable <- table(df[,2:3]) %>%
as.data.frame() %>%
pivot_wider( names_from = "Season", values_from = "Freq")
#calculate the frequncy columns
freq <- apply(basetable[,2:3], MARGIN=2, FUN=function(col) {
col/sum(col)
})
dimnames(freq)[[2]] <- paste0(dimnames(freq)[[2]], "_freq")
#calculate the ranking columns
ranking <- apply(basetable[,2:3], MARGIN=2, FUN=function(col) {
ranking<-rank(-col, ties="min")
ranking
})
dimnames(ranking)[[2]] <- paste0(dimnames(ranking)[[2]], "_rank")
#make final answer
answer<-cbind(basetable, freq, ranking)
answer
Flower Spring Summer Spring_freq Summer_freq Spring_rank Summer_rank
1 daisy 1 0 0.3333333 0.0000000 2 3
2 rose 0 2 0.0000000 0.6666667 3 1
3 tulip 2 0 0.6666667 0.0000000 1 3
4 violet 0 1 0.0000000 0.3333333 3 2
I leave it up the reader to rearrange the columns.
Please find below one possible solution using dplyr
and tidyr
libraries
Reprex
df <- read.table(text="ID Flower Season
1 tulip Spring
3 rose Summer
5 rose Summer
9 tulip Spring
2 daisy Spring
12 violet Summer", header = TRUE)
library(dplyr)
library(tidyr)
df <- df %>%
mutate(dummy = 1) %>%
xtabs(dummy ~ Flower + Season, .) %>%
as.data.frame() %>%
pivot_wider(., names_from = Season, values_from = Freq) %>%
rename(Spring_Freq = Spring, Summer_Freq = Summer) %>%
mutate(Spring_Perc = round(Spring_Freq/sum(Spring_Freq)*100,1),
Summer_Perc = round(Summer_Freq/sum(Summer_Freq)*100,1),
Spring_Rank = dense_rank(desc(Spring_Freq)),
Summer_Rank = dense_rank(desc(Summer_Freq)),
Total = Spring_Freq + Summer_Freq) %>%
arrange(., desc(Spring_Freq)) %>%
select(order(colnames(.)))
df
#> A tibble: 4 x 8
#> Flower Spring_Freq Spring_Perc Spring_Rank Summer_Freq Summer_Perc Summer_Rank Total
#> <fct> <dbl> <dbl> <int> <dbl> <dbl> <int> <dbl>
#> 1 tulip 2 66.7 1 0 0 3 2
#> 2 daisy 1 33.3 2 0 0 3 1
#> 3 rose 0 0 3 2 66.7 1 2
#> 4 violet 0 0 3 1 33.3 2 1
>
And, please find one possible solution to format the table using the flextable
library
library(flextable)
flextable(df) %>%
set_header_labels(., Spring_Freq = "Freq", Spring_Perc = "%", Spring_Rank = "Rank",
Summer_Freq = "Freq", Summer_Perc = "%", Summer_Rank ="Rank") %>%
add_header_row(., values = c("","Spring", "Summer",""), colwidths = c(1, 2, 3, 2))
Created on 2022-01-04 by the reprex package (v2.0.1)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.