[英]How to display wide table with specific order (month year) even when data points are missing?
> df_1
# A tibble: 47 x 3
# Groups: therapy_class [9]
therapy_class Year_month count
<ord> <yearmon> <int>
1 ALK Inhibitors Dec 2019 16
2 ALK Inhibitors Jan 2020 14
3 ALK Inhibitors Feb 2020 14
4 ALK Inhibitors Mar 2020 22
5 ALK Inhibitors Apr 2020 13
6 ALK Inhibitors May 2020 17
7 Anti-VEGF-based therapies Dec 2019 33
8 Anti-VEGF-based therapies Jan 2020 35
9 Anti-VEGF-based therapies Feb 2020 36
10 Anti-VEGF-based therapies Mar 2020 20
# … with 37 more rows
A tibble: 10 x 7
therapy_class `Dec 2019` `Jan 2020` `Feb 2020` `Mar 2020` `Apr 2020` `May 2020`
<ord> <chr> <chr> <chr> <chr> <chr> <chr>
1 ALK Inhibitors "16 <br>[2.7%]" "14 <br>[2.0%]" "14 <br>[2.2%]" "22 <br>[3.3%]" "13 <br>[2.1%]" "17 <br>[3.4%]"
2 Anti-VEGF-based therapies "33 <br>[5.6%]" "35 <br>[4.9%]" "36 <br>[5.7%]" "20 <br>[3.0%]" "21 <br>[3.4%]" "20 <br>[4.0%]"
3 EGFR TKIs "52 <br>[8.8%]" "57 <br>[8.0%]" "60 <br>[9.5%]" "52 <br>[7.8%]" "56 <br>[9.2%]" "49 <br>[9.8%]"
4 EGFR-antibody based therapies "" "" "" "" "" ""
5 Non-platinum-based chemotherapy… "1 <br>[0.2%]" "4 <br>[0.6%]" "4 <br>[0.6%]" "" "1 <br>[0.2%]" ""
6 IO-based therapies "308 <br>[52.0%]" "385 <br>[54.0%]" "330 <br>[52.3%]" "379 <br>[56.7%]" "345 <br>[56.4%]" "265 <br>[52.9%]"
7 Platinum-based chemotherapy com… "123 <br>[20.8%]" "147 <br>[20.6%]" "128 <br>[20.3%]" "134 <br>[20.1%]" "120 <br>[19.6%]" "107 <br>[21.4%]"
8 Single agent chemotherapies "29 <br>[4.9%]" "33 <br>[4.6%]" "17 <br>[2.7%]" "28 <br>[4.2%]" "25 <br>[4.1%]" "22 <br>[4.4%]"
9 Other "30 <br>[5.1%]" "38 <br>[5.3%]" "42 <br>[6.7%]" "33 <br>[4.9%]" "31 <br>[5.1%]" "21 <br>[4.2%]"
10 <strong>Total</strong> "<strong>592</str… "<strong>713</str… "<strong>631</st… "<strong>668</st… "<strong>612</st… "<strong>501</st…
> df_2
# A tibble: 46 x 3
# Groups: therapy_class [9]
therapy_class Year_month count
<ord> <yearmon> <int>
1 ALK Inhibitors Dec 2019 16
2 ALK Inhibitors Feb 2020 14
3 ALK Inhibitors Mar 2020 22
4 ALK Inhibitors Apr 2020 13
5 ALK Inhibitors May 2020 17
6 Anti-VEGF-based therapies Dec 2019 33
7 Anti-VEGF-based therapies Jan 2020 35
8 Anti-VEGF-based therapies Feb 2020 36
9 Anti-VEGF-based therapies Mar 2020 20
10 Anti-VEGF-based therapies Apr 2020 21
# … with 36 more rows
> t2
# A tibble: 10 x 7
therapy_class `Dec 2019` `Feb 2020` `Mar 2020` `Apr 2020` `May 2020` `Jan 2020`
<ord> <chr> <chr> <chr> <chr> <chr> <chr>
1 ALK Inhibitors "16 <br>[2.7%]" "14 <br>[2.2%]" "22 <br>[3.3%]" "13 <br>[2.1%]" "17 <br>[3.4%]" ""
2 Anti-VEGF-based therapies "33 <br>[5.6%]" "36 <br>[5.7%]" "20 <br>[3.0%]" "21 <br>[3.4%]" "20 <br>[4.0%]" "35 <br>[5.0%]"
3 EGFR TKIs "52 <br>[8.8%]" "60 <br>[9.5%]" "52 <br>[7.8%]" "56 <br>[9.2%]" "49 <br>[9.8%]" "57 <br>[8.2%]"
4 EGFR-antibody based therapies "" "" "" "" "" ""
5 Non-platinum-based chemotherapy… "1 <br>[0.2%]" "4 <br>[0.6%]" "" "1 <br>[0.2%]" "" "4 <br>[0.6%]"
6 IO-based therapies "308 <br>[52.0%]" "330 <br>[52.3%]" "379 <br>[56.7%]" "345 <br>[56.4%]" "265 <br>[52.9%]" "385 <br>[55.1%]"
7 Platinum-based chemotherapy com… "123 <br>[20.8%]" "128 <br>[20.3%]" "134 <br>[20.1%]" "120 <br>[19.6%]" "107 <br>[21.4%]" "147 <br>[21.0%]"
8 Single agent chemotherapies "29 <br>[4.9%]" "17 <br>[2.7%]" "28 <br>[4.2%]" "25 <br>[4.1%]" "22 <br>[4.4%]" "33 <br>[4.7%]"
9 Other "30 <br>[5.1%]" "42 <br>[6.7%]" "33 <br>[4.9%]" "31 <br>[5.1%]" "21 <br>[4.2%]" "38 <br>[5.4%]"
10 <strong>Total</strong> "<strong>592</str… "<strong>631</str… "<strong>668</st… "<strong>612</st… "<strong>501</st… "<strong>699</st…
>
我正在嘗試從長表中創建一個包含計數和百分比的寬表。 列是“月年”,需要按順序排列。 我的問題是,當第一組(ALK 抑制劑)的某個“月年”缺少行時,列的順序就會被打亂。 缺少的“月年”放在最后。 長桌也不是固定桌。 它是由用戶可以選擇月份年份范圍的功能生成的。 所以 Year_month 列可以有任何范圍。
在這個例子中,我使用了 2019 年 12 月到 2020 年 5 月的 6 個月范圍。 “df_1”有全部 6 個月,因此生成的寬表符合預期。 “df_2”缺少 2020 年 1 月的 ALK 抑制劑。 因此,結果表的末尾是“Jan 2020”。
這是我用來生成寬表的代碼:
df_2 %>%
pivot_wider(names_from = Year_month, values_from = count) %>%
ungroup() %>%
mutate_at(.vars = vars(contains("20")), list(
~ ifelse(is.na(.), "", paste(., sprintf("<br>[%1.1f%%]", 100 * (. / sum(., na.rm = TRUE)))))
))
這是示例數據 df_2
structure(list(therapy_class = structure(c(1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), Year_month = structure(c(2019.91666666667,
2020.08333333333, 2020.16666666667, 2020.25, 2020.33333333333,
2019.91666666667, 2020, 2020.08333333333, 2020.16666666667, 2020.25,
2020.33333333333, 2019.91666666667, 2020, 2020.08333333333, 2020.16666666667,
2020.25, 2020.33333333333, NA, 2019.91666666667, 2020, 2020.08333333333,
2020.25, 2019.91666666667, 2020, 2020.08333333333, 2020.16666666667,
2020.25, 2020.33333333333, 2019.91666666667, 2020, 2020.08333333333,
2020.16666666667, 2020.25, 2020.33333333333, 2019.91666666667,
2020, 2020.08333333333, 2020.16666666667, 2020.25, 2020.33333333333,
2019.91666666667, 2020, 2020.08333333333, 2020.16666666667, 2020.25,
2020.33333333333), class = "yearmon"), count = c(16L, 14L, 22L,
13L, 17L, 33L, 35L, 36L, 20L, 21L, 20L, 52L, 57L, 60L, 52L, 56L,
49L, NA, 1L, 4L, 4L, 1L, 308L, 385L, 330L, 379L, 345L, 265L,
123L, 147L, 128L, 134L, 120L, 107L, 29L, 33L, 17L, 28L, 25L,
22L, 30L, 38L, 42L, 33L, 31L, 21L)), row.names = c(NA, -46L), groups = structure(list(
therapy_class = structure(1:9, .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), .rows = structure(list(
1:5, 6:11, 12:17, 18L, 19:22, 23:28, 29:34, 35:40, 41:46), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
一個選項是在執行pivot_wider
之前用complete
創建缺失的年份月份。 使用pivot_wider
,默認順序基於訂單中的唯一值外觀
library(dplyr)
library(tidyr)
library(zoo)
df_2 %>%
ungroup %>%
mutate(Year_month = as.Date(Year_month)) %>%
complete(therapy_class, Year_month = seq(from = min(Year_month,
na.rm = TRUE), to = max(Year_month, na.rm = TRUE),
by = '1 month')) %>%
mutate(Year_month = as.yearmon(Year_month)) %>%
pivot_wider(names_from = Year_month, values_from = count) %>%
ungroup() %>%
mutate_at(.vars = vars(contains("20")),
list(
~ ifelse(is.na(.), "", paste(., sprintf("<br>[%1.1f%%]",
100 * (. / sum(., na.rm = TRUE)))))
))
-輸出
# A tibble: 9 × 8
therapy_class `Dec 2019` `Jan 2020` `Feb 2020` `Mar 2020` `Apr 2020` `May 2020` `NA`
<ord> <chr> <chr> <chr> <chr> <chr> <chr> <int>
1 ALK Inhibitors "16 <br>[2.7%]" "" "14 <br>[2.2%]" "22 <br>[3.3%]" "13 <br>[2.1%… "17 <br>[3.4… NA
2 Anti-VEGF-based therapies "33 <br>[5.6%]" "35 <br>[5.0%]" "36 <br>[5.7%]" "20 <br>[3.0%]" "21 <br>[3.4%… "20 <br>[4.0… NA
3 EGFR TKIs "52 <br>[8.8%]" "57 <br>[8.2%]" "60 <br>[9.5%]" "52 <br>[7.8%]" "56 <br>[9.2%… "49 <br>[9.8… NA
4 EGFR-antibody based therapies "" "" "" "" "" "" NA
5 Non-platinum-based chemotherapy combinations "1 <br>[0.2%]" "4 <br>[0.6%]" "4 <br>[0.6%]" "" "1 <br>[0.2%]" "" NA
6 IO-based therapies "308 <br>[52.0%]" "385 <br>[55.1%]" "330 <br>[52.3%]" "379 <br>[56.7%]" "345 <br>[56.… "265 <br>[52… NA
7 Platinum-based chemotherapy combinations "123 <br>[20.8%]" "147 <br>[21.0%]" "128 <br>[20.3%]" "134 <br>[20.1%]" "120 <br>[19.… "107 <br>[21… NA
8 Single agent chemotherapies "29 <br>[4.9%]" "33 <br>[4.7%]" "17 <br>[2.7%]" "28 <br>[4.2%]" "25 <br>[4.1%… "22 <br>[4.4… NA
9 Other "30 <br>[5.1%]" "38 <br>[5.4%]" "42 <br>[6.7%]" "33 <br>[4.9%]" "31 <br>[5.1%… "21 <br>[4.2… NA
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.