根據分組變量計算百分比

Question

我想計算每天報告做一些工作的人的百分比。 例如，我想知道從整個樣本中報告周一做一些工作的人的百分比。

我使用下面的代碼來計算這個，但我不確定我的結果。

df1 <- structure(list(id = c(12L, 123L, 10L), t1_1 = c(0L, 0L, 1L), 
      t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L), t2_1 = c(0L, 
      1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L), t3_1 = c(1L, 
      0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L), t4_1 = c(0L, 
      1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L), t5_1 = c(0L, 
      1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L), t6_1 = c(1L, 
      0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L), t7_1 = c(0L, 
      1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
      class = "data.frame", row.names = c(NA, -3L))

變量描述 t1 - 星期一（t1_1、t1_2、t1_3 - 是衡量工作是否在星期一完成的時間步長）； t2 - 星期二； t3 - 星期三； t4 - 星期四； t5 - 星期五； t6 - 星期六和 t7 - 星期日； id 是一個標識號

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)
df2$day <- factor(df2$day, levels = variable)

df3<-df2 %>%
  group_by (day)  %>%
  mutate (percent = (value/sum(value) *100))

ggplot(df3, aes(day, group = value)) + 
  geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
  scale_fill_discrete(name="Days", labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) +
  scale_y_continuous(labels=scales::percent, limits=c(0,1)) +
  ylab("relative frequencies") + 
  theme_bw()

結果：

Answer 1

library(dplyr)
df1 <- structure(
  list(id = c(12L, 123L, 10L),
       t1_1 = c(0L, 0L, 1L), t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L),
       t2_1 = c(0L, 1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L),
       t3_1 = c(1L, 0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L),
       t4_1 = c(0L, 1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L),
       t5_1 = c(0L, 1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L),
       t6_1 = c(1L, 0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L),
       t7_1 = c(0L, 1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)), 
  class = "data.frame", row.names = c(NA, -3L))

df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)

df3 <- df2 %>%
  group_by(id, day)  %>%
  summarize(count = sum(value)) %>%
  group_by(id)  %>%
  mutate(percent = count / sum(count)) %>%
  arrange(day, id)

> df3
# A tibble: 21 x 4
# Groups:   id [3]
      id day   count percent
   <int> <chr> <int>   <dbl>
 1    10 t1        3  0.143 
 2    12 t1        2  0.182 
 3   123 t1        0  0     
 4    10 t2        3  0.143 
 5    12 t2        1  0.0909
 6   123 t2        3  0.25
 ...

這是你要找的東西嗎？

根據分組變量計算百分比

問題描述

1 個解決方案

解決方案1
1 已采納 2020-04-03 14:14:13

根據分組變量計算百分比

問題描述

1 個解決方案

解決方案1 1 已采納 2020-04-03 14:14:13

解決方案1
1 已采納 2020-04-03 14:14:13