在 R 中收集和变异多个列

Question

I have three measurements, each with two time points.我有三个测量值，每个测量值都有两个时间点。

library(tidyverse)
library(tableone)

test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)

df <- data.frame(test_1_pre, test_1_post, test_2_pre,
                 test_2_post, test_3_pre, test_3_post)

df_2 <- df %>%
  gather(test_1_pre, test_1_post,
         key="test_1_old", value="test_1") %>%
  gather(test_2_pre, test_2_post,
         key="test_2_old", value="test_2") %>%
  gather(test_3_pre, test_3_post,
         key="test_3_old", value="test_3") %>%
  mutate(pre_post = case_when(test_1_old == "test_1_pre" ~ 'pre',
                              test_1_old == "test_1_post" ~'post',
                              test_1_old == "test_2_pre" ~ 'pre',
                              test_1_old == "test_2_post" ~ 'post',
                              test_1_old == "test_3_pre" ~ 'pre',
                              test_1_old == "test_3_post" ~'post'))

vars_df <- c("test_1", "test_2", "test_3")


table_df <- CreateTableOne(vars = vars_df,
                                 data = df_2,
                                 strata = "pre_post")

tabelle.table_df<-print(table_df)

My aim is to get a table which compares the two time points "pre" and "post" of the different tests.我的目标是获得一个表格，比较不同测试的“前”和“后”两个时间点。

It works for test 1, but not the following ones.它适用于测试 1，但不适用于以下测试。

Could anyone help, would be much appreciated!谁能帮忙，将不胜感激！

Result:

                    Stratified by pre_post
                     post        pre         p      test
  n                    16          16                   
  test_1 (mean (sd)) 4.50 (2.13) 4.00 (2.83)  0.576     
  test_2 (mean (sd)) 5.88 (2.75) 5.88 (2.75)  1.000     
  test_3 (mean (sd)) 5.88 (2.85) 5.88 (2.85)  1.000

Answer 1

nr <- c("1", "2", "3", "4")
test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)

df <- data.frame(nr, test_1_pre, test_1_post, test_2_pre,
                 test_2_post, test_3_pre, test_3_post)

df_2 <- df %>%
  gather(test_1_pre, test_1_post, test_2_pre, test_2_post,
         test_3_pre, test_3_post,
         key="score", value="value") %>%
  mutate(pre_post = case_when(score == "test_1_pre" ~ 'pre',
                              score == "test_1_post" ~'post',
                              score == "test_2_pre" ~ 'pre',
                              score == "test_2_post" ~ 'post',
                              score == "test_3_pre" ~ 'pre',
                              score == "test_3_post" ~'post'))%>%
  pivot_wider(names_from="score", values_from="value")%>%
  gather(test_1_pre, test_1_post,
         key="test_1_old", value="test_1") %>%
  gather(test_2_pre, test_2_post,
         key="test_2_old", value="test_2") %>%
  gather(test_3_pre, test_3_post,
         key="test_3_old", value="test_3")

vars_df <- c("test_1", "test_2", "test_3")


table_df <- CreateTableOne(vars = vars_df,
                                 data = df_2,
                                 strata = "pre_post")

tabelle.table_df<-print(table_df)

Found the solution: first gather everything into one column, mutate the new column, spread the columns again and gather them separately.找到解决方案：首先将所有内容收集到一个列中，改变新列，再次展开列并分别收集它们。

                   Stratified by pre_post
                     post        pre         p      test
  n                    32          32                   
  test_1 (mean (SD)) 4.50 (2.13) 4.00 (2.83)  0.576     
  test_2 (mean (SD)) 6.75 (3.45) 5.00 (1.26)  0.067     
  test_3 (mean (SD)) 5.75 (1.13) 6.00 (3.86)  0.805

Answer 2

The problem arises because the pre_post column you create is incorrect for tests 2 and 3, which you can verify by inspecting the df_2 dataframe.出现问题是因为您创建的pre_post列对于测试 2 和 3 不正确，您可以通过检查df_2 dataframe 来验证。 For example row 5 from your output is:例如 output 中的第 5 行是：

test_1_post      2  test_2_pre      6  test_3_pre     12     post

Whilst a single data pipeline is appropriate is many circumstances, this problem is more easily solved by splitting up the tables and using a union_all , as per the following:虽然在许多情况下单个数据管道是合适的，但通过拆分表并使用union_all更容易解决此问题，如下所示：

library(tidyverse)
library(tableone)

test_1_pre <- c(1,5,8,2)
test_1_post <- c(2,7,3,6)
test_2_pre <- c(6,3,6,5)
test_2_post <- c(9,8,9,1)
test_3_pre <- c(12,2,4,6)
test_3_post <- c(4,7,6,6)

df <- data.frame(test_1_pre, test_1_post, test_2_pre,
                 test_2_post, test_3_pre, test_3_post)

get_dfs <- function(df, suffix) {
  df %>% 
    select(ends_with(suffix)) %>% 
    mutate(pre_post = suffix) %>% 
    # Drop _pre / _post suffixes in test column names ahead of union
    rename_with(.fn = function(x) gsub(paste0("_", suffix), "", x),
                .cols = starts_with("test"))
}

df_2 <- union_all(get_dfs(df, "pre"), get_dfs(df, "post"))

vars_df <- c("test_1", "test_2", "test_3")

table_df <- CreateTableOne(vars = vars_df,
                           data = df_2,
                           strata = "pre_post")

table_df

The output given is:给出的 output 是：

# Stratified by pre_post
# post        pre         p      test
# n                     4           4                   
# test_1 (mean (SD)) 4.50 (2.38) 4.00 (3.16)  0.809     
# test_2 (mean (SD)) 6.75 (3.86) 5.00 (1.41)  0.427     
# test_3 (mean (SD)) 5.75 (1.26) 6.00 (4.32)  0.915

This contains more conservative p-values arising from the different estimated standard errors, as compared with @Frank's answer.与@Frank 的答案相比，这包含由不同估计标准误差产生的更保守的 p 值。

Moreover, the structure of df_2 is cleaner:此外， df_2的结构更简洁：

# test_1 test_2 test_3 pre_post
#      1      6     12      pre
#      5      3      2      pre
#      8      6      4      pre
#      2      5      6      pre
#      2      9      4     post
#      7      8      7     post
#      3      9      6     post
#      6      1      6     post

在 R 中收集和变异多个列

问题描述

2 个解决方案

解决方案1
1 2022-01-14 14:57:17

解决方案2
1 已采纳 2022-01-14 16:16:05

在 R 中收集和变异多个列

问题描述

2 个解决方案

解决方案1 1 2022-01-14 14:57:17

解决方案2 1 已采纳 2022-01-14 16:16:05

解决方案1
1 2022-01-14 14:57:17

解决方案2
1 已采纳 2022-01-14 16:16:05