[英]Tidying “Benchmark” columns
我有一个大多数整洁的数据框,但是2列包含基准,而不是将基准作为观察结果。 我如何整理这个,以便“Facility_score”和“TTP”col_names作为每个独特的FYQ和Metric组合的“Facility_label”下的观察结果添加?
输入数据:
library(zoo)
dd <- structure(list(Facility_label = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("A", "B", "C",
"D", "Nashville"), class = "factor"), FYQ = structure(c(2017.75,
2018, 2018.25, 2018.5, 2017.75, 2018, 2018.25, 2018.5, 2018.75,
2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75, 2018, 2018.25,
2018.5, 2018.75, 2017.75, 2018, 2018.25, 2018.5, 2018.75, 2017.75,
2018, 2018.25, 2018.5, 2018.75, 2017.75), class = "yearqtr"),
Metric = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L), .Label = c("Safety Recall", "Turnaround days",
"Consult Active <= 30d", "Consult Pending <- 7d", "Consult Scheduling <- 90d",
"ICB Compliance Rate", "FCA Assessment", "Minor construction execution",
"NRM funding execution", "Deficincies", "%Deficienceis corrected among corrected or action plan",
"%Deficienceis corrected or action plan", "Ratio of Hospital Staff to HR FTE",
"Turnover Rate", "GEMS no Action", "Lost time claims", "RTMS risk score",
"DOC Control", "Loaner deficiencies", "Pretreatment", "RME rate",
"SPS staff vacany rate", "Stock Inactive", "MSPV-NG", "Days to close prosthetis consult",
"%Prosthetic PO using national contracts"), class = "factor"),
Facility_score = c(84.78802993, 95.59659091, 100, 100, 77.61732852,
57.87671233, 81.28898129, 33.33333333, 31.57894737, 10.2,
7.902356902, 8.62, 11.71, 13.15, 30.98236776, 33.26086957,
31.19584055, 54.54545455, 27.27272727, 11, 17.19132653, 26.02008197,
22.29, 30.41, 89.09090909, 93.47826087, 82.10735586, 91.66666667,
87.5, 3.2), `Facility mean` = c(85.35550152, 87.31899147,
93.11498231, 100, 85.35550152, 87.31899147, 93.11498231,
100, 100, 12, 13.06073298, 12.2, 11.51, 10.56, 85.35550152,
87.31899147, 93.11498231, 100, 100, 12, 13.06073298, 12.2,
11.51, 10.56, 85.35550152, 87.31899147, 93.11498231, 100,
100, 12), TTP_score = c(100, 100, 100, 100, 100, 100, 100,
100, 100, 5.65, 5.063953488, 4.779310345, 4.47, 4.545, 100,
100, 100, 100, 100, 5.65, 5.063953488, 4.779310345, 4.47,
4.545, 100, 100, 100, 100, 100, 5.65)), row.names = c(NA,
-30L), class = c("tbl_df", "tbl", "data.frame"))
期望的输出:
dd_output <- structure(list(Facility_label = c("A", "Facility mean", "TTP score",
"A", "Facility mean", "TTP score", "A", "Facility mean", "TTP score",
"A", "Facility mean", "TTP score", "B", "B", "B", "B", "B", "B",
"Facility mean", "TTP score", "B", "Facility mean", "TTP score",
"B", "Facility mean", "TTP score", "B", "Facility mean", "TTP score",
"B", "Facility mean", "TTP score", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "D", "D", "D", "D", "D", "D"), FYQ = c("2017 Q4",
"2017 Q4", "2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2",
"2018 Q2", "2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2017 Q4",
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4", "2017 Q4",
"2017 Q4", "2018 Q1", "2018 Q1", "2018 Q1", "2018 Q2", "2018 Q2",
"2018 Q2", "2018 Q3", "2018 Q3", "2018 Q3", "2018 Q4", "2018 Q4",
"2018 Q4", "2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4",
"2017 Q4", "2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4",
"2018 Q1", "2018 Q2", "2018 Q3", "2018 Q4", "2017 Q4"), Metric = c("Safety Recall",
"Safety Recall", "safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Turnaround days", "Turnaround days", "Safety Recall",
"Safety Recall", "Safety Recall", "Safety Recall", "Safety Recall",
"Turnaround days", "Turnaround days", "Turnaround days", "Turnaround days",
"Turnaround days", "Safety Recall", "Safety Recall", "Safety Recall",
"Safety Recall", "Safety Recall", "Turnaround days"), Facility_score = c(84.78802993,
85.35550152, 100, 95.59659091, 87.31899147, 100, 100, 93.11498231,
100, 100, 100, 100, 77.61732852, 57.87671233, 81.28898129, 33.33333333,
31.57894737, 10.2, 12, 5.65, 7.902356902, 13.06073298, 5.063953488,
8.62, 12.2, 4.779310345, 11.71, 11.51, 4.47, 13.15, 10.56, 4.545,
30.98236776, 33.26086957, 31.19584055, 54.54545455, 27.27272727,
11, 17.19132653, 26.02008197, 22.29, 30.41, 89.09090909, 93.47826087,
82.10735586, 91.66666667, 87.5, 3.2)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -48L), spec = structure(list(
cols = list(X1 = structure(list(), class = c("collector_skip",
"collector")), Facility_label = structure(list(), class = c("collector_character",
"collector")), FYQ = structure(list(), class = c("collector_character",
"collector")), Metric = structure(list(), class = c("collector_character",
"collector")), Facility_score = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))
我们可以用tidyr::gather
和一些dplyr::mutate
操作来做到这tidyr::gather
:
library(tidyverse)
dd %>%
mutate(ID = row_number()) %>%
gather(var, Facility_score, Facility_score:TTP_score) %>%
group_by(FYQ, Metric,
temp_ID = case_when(var == "Facility mean" ~ 1,
var == "TTP_score" ~ 2,
TRUE ~ 0)) %>%
slice(if(any(temp_ID == 0)) row_number() else 1) %>%
mutate(Facility_label = if_else(var == "Facility_score", as.character(Facility_label), var)) %>%
ungroup() %>%
arrange(ID, temp_ID) %>%
select(ID, everything(), -var, -temp_ID)
请注意,我已添加ID
列以指示原始行号。 这使得当同一行中的所有分数组合时不那么混乱。
笔记:
gather
从宽到长格式TTP_score
,将Facility_score
, Facility mean
和TTP_score
的条目视为新的Facility_score
。 创建变量var
以临时存储值标签。
然后,我们group_by
FYQ
, Metric
,和临时ID变量( temp_ID
通过设置创建) Facility mean
到1
, TTP_score
至2
,一切都在其他var
到0
。
基于temp_ID
,我们使用slice
来获取所有行(如果它是0
,否则只使用第一行。 这有效地返回与Facility_score
对应的所有行,但在每个FYQ
+ Metric
组合中仅返回Facility mean
和TTP_score
中的一个。
接下来,我们将Facility_label
替换为var
的相应标签。
最后, ungroup
, arrange
由ID
和temp_ID
,并重新排列列的顺序,而删除var
和temp_ID
( everything
,当我们想在前面带一个变量(一个或多个),同时保持其他变量不变是很有用的)。
输出:
# A tibble: 50 x 5
ID Facility_label FYQ Metric Facility_score
<int> <chr> <S3: yearqtr> <fct> <dbl>
1 1 A 2017 Q4 Safety Recall 84.8
2 1 Facility mean 2017 Q4 Safety Recall 85.4
3 1 TTP_score 2017 Q4 Safety Recall 100
4 2 A 2018 Q1 Safety Recall 95.6
5 2 Facility mean 2018 Q1 Safety Recall 87.3
6 2 TTP_score 2018 Q1 Safety Recall 100
7 3 A 2018 Q2 Safety Recall 100
8 3 Facility mean 2018 Q2 Safety Recall 93.1
9 3 TTP_score 2018 Q2 Safety Recall 100
10 4 A 2018 Q3 Safety Recall 100
11 4 Facility mean 2018 Q3 Safety Recall 100
12 4 TTP_score 2018 Q3 Safety Recall 100
13 5 B 2017 Q4 Safety Recall 77.6
14 6 B 2018 Q1 Safety Recall 57.9
15 7 B 2018 Q2 Safety Recall 81.3
16 8 B 2018 Q3 Safety Recall 33.3
17 9 B 2018 Q4 Safety Recall 31.6
18 9 Facility mean 2018 Q4 Safety Recall 100
19 9 TTP_score 2018 Q4 Safety Recall 100
20 10 B 2017 Q4 Turnaround days 10.2
# ... with 30 more rows
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.