根据分组变量计算百分比

Question

I would like to calculate the percentage of people reported doing some work per day.我想计算每天报告做一些工作的人的百分比。 For example I would like to know the percentage of people reported doing some work on Monday from the entire sample.例如，我想知道从整个样本中报告周一做一些工作的人的百分比。

I used the following code to calculate this, but I am not sure about my result.我使用下面的代码来计算这个，但我不确定我的结果。

df1 <- structure(list(id = c(12L, 123L, 10L), t1_1 = c(0L, 0L, 1L), 
      t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L), t2_1 = c(0L, 
      1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L), t3_1 = c(1L, 
      0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L), t4_1 = c(0L, 
      1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L), t5_1 = c(0L, 
      1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L), t6_1 = c(1L, 
      0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L), t7_1 = c(0L, 
      1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
      class = "data.frame", row.names = c(NA, -3L))

Variable description t1 - Monday (t1_1, t1_2, t1_3 - are time steps that measured if work was done during Monday);变量描述 t1 - 星期一（t1_1、t1_2、t1_3 - 是衡量工作是否在星期一完成的时间步长）； t2 - Tuesday; t2 - 星期二； t3 - Wednesday; t3 - 星期三； t4 - Thursday; t4 - 星期四； t5 - Friday; t5 - 星期五； t6 - Saturda and t7- Sunday; t6 - 星期六和 t7 - 星期日； id is an identification number id 是一个标识号

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)
df2$day <- factor(df2$day, levels = variable)

df3<-df2 %>%
  group_by (day)  %>%
  mutate (percent = (value/sum(value) *100))

ggplot(df3, aes(day, group = value)) + 
  geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
  scale_fill_discrete(name="Days", labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) +
  scale_y_continuous(labels=scales::percent, limits=c(0,1)) +
  ylab("relative frequencies") + 
  theme_bw()

Result:结果：

Answer 1

library(dplyr)
df1 <- structure(
  list(id = c(12L, 123L, 10L),
       t1_1 = c(0L, 0L, 1L), t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L),
       t2_1 = c(0L, 1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L),
       t3_1 = c(1L, 0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L),
       t4_1 = c(0L, 1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L),
       t5_1 = c(0L, 1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L),
       t6_1 = c(1L, 0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L),
       t7_1 = c(0L, 1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
  class = "data.frame", row.names = c(NA, -3L))

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)

df3 <- df2 %>%
  group_by(id, day)  %>%
  summarize(count = sum(value)) %>%
  group_by(id)  %>%
  mutate(percent = count / sum(count)) %>%
  arrange(day, id)

> df3
# A tibble: 21 x 4
# Groups:   id [3]
      id day   count percent
   <int> <chr> <int>   <dbl>
 1    10 t1        3  0.143 
 2    12 t1        2  0.182 
 3   123 t1        0  0     
 4    10 t2        3  0.143 
 5    12 t2        1  0.0909
 6   123 t2        3  0.25
 ...

Is it something you are looking for?这是你要找的东西吗？

根据分组变量计算百分比

问题描述

1 个解决方案

解决方案1
1 已采纳 2020-04-03 14:14:13

根据分组变量计算百分比

问题描述

1 个解决方案

解决方案1 1 已采纳 2020-04-03 14:14:13

解决方案1
1 已采纳 2020-04-03 14:14:13