I have this df
:
> df <- data.frame(Adults = sample(0:5, 10, replace = TRUE),
+ Children = sample(0:2, 10, replace = TRUE),
+ Teens = sample(1:3, 10, replace = TRUE),
+ stringsAsFactors = FALSE)
> df
Adults Children Teens
1 5 0 1
2 5 1 2
3 5 2 3
4 5 2 2
5 0 1 2
6 5 1 3
7 0 2 3
8 4 2 1
9 4 0 1
10 1 2 1
We can see that Children
doesn't have 3,4,5
values and Teens
doesn't have 0,4,5
values. However, we know that Adults
, Children
, and Teens
could have from 0 to 5
.
When I use group_by()
with summarise()
, summarise
drops the columns I'm not grouping. The code:
df %>%
group_by(Adults) %>% mutate(n_Adults = n()) %>%
group_by(Teens) %>% mutate(n_Teens = n()) %>%
group_by(Children) %>% mutate(n_Children = n())
And when I group by c(0,1,2,3,4,5)
(in order to have all the possible values) it gives me this error:
Error in mutate_impl(.data, dots) : Column `c(0, 1, 2, 3, 4, 5)` must be length 10 (the number of rows) or one, not 6
I'm looking for this output:
Values n_Adults n_Children n_Teens p_Adults p_Children p_Teens
0 2 2 0 0.2 0.2 0
1 1 3 4 0.1 0.1 0.4
2 0 5 3 0 0 0.3
3 0 0 3 0 0 0.3
4 2 0 0 0.2 0.2 0
5 5 0 0 0.5 0.5 0
Where n_
is the count of the respective column and p_
is the percentage of the respective column.
We can gather
the data into 'long' format, get the frequency with count
after converting the 'value' to factor
with levels
specified as 0:5, spread
to 'wide' format and create the 'p' columns by dividing with the sum
of each column and if needed change the column name (with rename_at
)
library(tidyverse)
gather(df) %>%
count(key, value = factor(value, levels = 0:5)) %>%
spread(key, n, fill = 0) %>%
mutate_at(2:4, list(p = ~./sum(.)))%>%
rename_at(2:4, ~ paste0(.x, "_n"))
df <- structure(list(Adults = c(1L, 1L, 4L, 3L, 3L, 5L, 1L, 4L, 4L,
1L), Children = c(1L, 1L, 2L, 2L, 0L, 2L, 0L, 0L, 1L, 0L), Teens = c(1L,
2L, 3L, 1L, 1L, 3L, 1L, 2L, 2L, 1L)), class = "data.frame", row.names = c(NA,
-10L))
library(reprex)
library(tidyverse)
set.seed(20)
df <- data.frame(Adults = sample(0:5, 10, replace = TRUE),
Children = sample(0:2, 10, replace = TRUE),
Teens = sample(1:3, 10, replace = TRUE),
stringsAsFactors = FALSE)
df
#> Adults Children Teens
#> 1 5 2 2
#> 2 4 2 1
#> 3 1 0 2
#> 4 3 2 1
#> 5 5 0 1
#> 6 5 1 1
#> 7 0 0 3
#> 8 0 0 3
#> 9 1 0 1
#> 10 2 2 3
df_adults <- df %>%
count(Adults) %>%
rename( n_Adults = n)
df_childred <- df %>%
count(Children) %>%
rename( n_Children = n)
df_teens <- df %>%
count(Teens) %>%
rename( n_Teens = n)
df_new <- data.frame(unique_id = 0:5)
df_new <- left_join(df_new,df_adults, by = c("unique_id"="Adults"))
df_new <- left_join(df_new,df_childred, by = c("unique_id"="Children"))
df_new <- left_join(df_new,df_teens, by = c("unique_id"="Teens"))
df_new <- df_new %>%
replace_na(list( n_Adults=0, n_Children=0, n_Teens=0))
df_new %>%
mutate(p_Adults = n_Adults/sum(n_Adults),p_Children = n_Children/sum(n_Children), p_Teens = n_Teens/sum(n_Teens))
#> unique_id n_Adults n_Children n_Teens p_Adults p_Children p_Teens
#> 1 0 2 5 0 0.2 0.5 0.0
#> 2 1 2 1 5 0.2 0.1 0.5
#> 3 2 1 4 2 0.1 0.4 0.2
#> 4 3 1 0 3 0.1 0.0 0.3
#> 5 4 1 0 0 0.1 0.0 0.0
#> 6 5 3 0 0 0.3 0.0 0.0
Created on 2019-02-25 by the reprex package (v0.2.1)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.