I have a dataset like this:
ID color_1 color_2 shape_1 shape_2 size class
55 red blue circle triangle small A
83 blue yellow circle NA large B
78 red yellow square circle large B
43 green NA square circle small C
29 yellow green circle triangle medium A
Before I asked a question about how to make a frequency table of my data and received this answer:
pat_words <- c("color", "shape", "size")
out <- df %>%
pivot_longer(
-ID,
names_to = "Question",
values_to = "Response"
) %>% mutate(Question = str_extract(Question, str_c(pat_words, collapse="|"))) %>% group_by(Question, Response) %>%
count(Response, name = "Freq") %>%
na.omit() %>%
group_by(Question) %>%
mutate(Percent = round(Freq/sum(Freq)*100, 2)) %>%
group_split() %>%
adorn_totals() %>%
bind_rows() %>%
mutate(Response = ifelse(Response == last(Response), last(Question), Response)) %>%
mutate(Question = ifelse(duplicated(Question) |
Question == "Total", NA, Question)) %>%
as.data.frame
And was able to get the following output:
Variable Level Freq Percent
color blue 2 22.22
red 2 22.22
yellow 3 33.33
green 2 22.22
total 9 100.00
shape circle 5 50.0
triangle 3 30.0
square 2 20.0
total 10 100.0
size small 2 33.3
medium 2 33.3
large 2 33.3
total 6 100.0
However now I want to group by a new variable "class":
Class A B C
Variable Level Freq Percent Freq Percent Freq Percent
color blue 1 25 1 25 0 0
red 1 25 1 25 0 0
yellow 1 25 2 50 0 0
green 1 25 0 0 1 100
total 4 100.00 4 100.00 1 100
shape circle 2 50 2 66.7 1 50
triangle 2 50 0 0 0 0
square 0 0 1 33.3 1 50
total 4 100.0 3 100.0 2 100
size small 1 50 0 0 1 100
medium 1 50 0 0 0 0
large 0 0 2 100 0 0
total 2 100.0 2 100 1 100
I have tried to just add class to the group by statement and I am getting an error. Any help is greatly appreciated!
We can modify the OP's code in the grouping ie grouped by 'class, 'Variable', calculate the 'Percent', and create the 'Total' row with adorn_totals
before reshaping back to 'wide' with pivot_wider
library(dplyr)
library(tidyr)
library(janitor)
library(stringr)
out <- df %>%
pivot_longer(cols = -c(ID, class), names_to = 'Variable',
values_to = 'Level', values_drop_na = TRUE) %>%
mutate(Variable = str_remove(Variable, "_\\d+")) %>%
count(class, Variable, Level, name = "Freq") %>%
group_by(class, Variable) %>%
mutate(Percent = round(100 * Freq/sum(Freq))) %>%
group_modify(~ .x %>%
adorn_totals()) %>%
ungroup %>%
pivot_wider(names_from = class, values_from = c(Freq, Percent),
values_fill = 0) %>%
select(Variable, Level, ends_with('A'), ends_with('B'), ends_with('C')) %>%
arrange(Variable)
-output
out
# A tibble: 13 × 8
Variable Level Freq_A Percent_A Freq_B Percent_B Freq_C Percent_C
<chr> <chr> <int> <dbl> <int> <dbl> <int> <dbl>
1 color blue 1 25 1 25 0 0
2 color green 1 25 0 0 1 100
3 color red 1 25 1 25 0 0
4 color yellow 1 25 2 50 0 0
5 color Total 4 100 4 100 1 100
6 shape circle 2 50 2 67 1 50
7 shape triangle 2 50 0 0 0 0
8 shape Total 4 100 3 100 2 100
9 shape square 0 0 1 33 1 50
10 size medium 1 50 0 0 0 0
11 size small 1 50 0 0 1 100
12 size Total 2 100 2 100 1 100
13 size large 0 0 2 100 0 0
The above output can be converted to a flextable
library(flextable)
out %>%
mutate(Variable = replace(Variable, duplicated(Variable), NA)) %>%
flextable()
-output
df <- structure(list(ID = c(55L, 83L, 78L, 43L, 29L), color_1 = c("red",
"blue", "red", "green", "yellow"), color_2 = c("blue", "yellow",
"yellow", NA, "green"), shape_1 = c("circle", "circle", "square",
"square", "circle"), shape_2 = c("triangle", NA, "circle", "circle",
"triangle"), size = c("small", "large", "large", "small", "medium"
), class = c("A", "B", "B", "C", "A")), class = "data.frame", row.names = c(NA,
-5L))
You could use the datasummary
function from the modelsummary
package. (Disclaimer: I am the maintainer of this package.)
Load libraries and simulate data:
library(tidyverse)
library(modelsummary)
N <- 100
colors <- c(NA, "red", "blue", "yellow")
shapes <- c(NA, "circle", "triangle", "square")
sizes <- c("small", "medium", "large")
classes <- c("A", "B", "C")
dat <- data.frame(
ID = 1:N,
color_1 = sample(colors, N, replace = TRUE),
color_2 = sample(colors, N, replace = TRUE),
shape_1 = sample(shapes, N, replace = TRUE),
shape_2 = sample(shapes, N, replace = TRUE),
size = sample(sizes, N, replace = TRUE),
class = sample(classes, N, replace = TRUE))
head(dat)
## ID color_1 color_2 shape_1 shape_2 size class
## 1 1 yellow red square circle medium B
## 2 2 blue <NA> triangle circle large B
## 3 3 <NA> yellow <NA> square small C
## 4 4 yellow blue <NA> <NA> small C
## 5 5 red blue triangle <NA> large B
## 6 6 <NA> blue <NA> circle small C
Reshape data:
dat <- dat %>%
pivot_longer(cols = matches("color|shape"),
names_sep = "_",
names_to = c("feature", "entry")) %>%
pivot_wider(names_from = "feature", values_from = "value")
Create table:
datasummary(color + shape + size ~
class * ((Freq = 1) + Percent(denom = "col")),
fmt = 0,
data = dat)
Another possible solution, based on purrr::reduce
and dplyr::full_join
:
library(tidyverse)
df %>%
pivot_longer(cols = !contains(c("ID", "class")), names_to = "Variable",
values_to = "Level") %>% drop_na %>%
mutate(Variable = str_remove(Variable,"_\\d$")) %>%
add_count(class, Variable) %>%
group_by(class, Variable, Level) %>%
summarise(count = n(), perc = 100*count/n, .groups = "drop") %>%
group_split(class) %>%
reduce(full_join, by=c("Variable", "Level"), suffix=c(".A",".B")) %>%
select(-starts_with("class")) %>% distinct %>%
arrange(Variable) %>%
rename("count.C" = "count", "perc.C" = "perc") %>%
mutate(across(contains("."), ~ replace_na(.x,0)))
#> # A tibble: 10 × 8
#> Variable Level count.A perc.A count.B perc.B count.C perc.C
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 color blue 1 25 1 25 0 0
#> 2 color green 1 25 0 0 1 100
#> 3 color red 1 25 1 25 0 0
#> 4 color yellow 1 25 2 50 0 0
#> 5 shape circle 2 50 2 66.7 1 50
#> 6 shape triangle 2 50 0 0 0 0
#> 7 shape square 0 0 1 33.3 1 50
#> 8 size medium 1 50 0 0 0 0
#> 9 size small 1 50 0 0 1 100
#> 10 size large 0 0 2 100 0 0
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.