繁体   English   中英

使用 R 进行数据转换

[英]data transformation using R

我得到了一些这样的数据

structure(list(id = c(1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3), dead = c(1, 
1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0), futime = c(2062, 2062, 2062, 
2062, 2151, 2151, 388, 388, 388, 388, 388, 388), event = c("hosp", 
"out", "hosp", "out", "hosp", "out", "hosp", "out", "hosp", "out", 
"hosp", "out"), event_time = c(36, 52, 775, 776, 1268, 1283, 
178, 192, 271, 272, 387, 377.9)), class = "data.frame", row.names = c(NA, 
-12L))

我想让它看起来像这样

structure(list(id2 = c(1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 
3, 3), dead2 = c(NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, 
NA, NA, NA), futime2 = c(NA, NA, NA, NA, 2062, NA, NA, 2151, 
NA, NA, NA, NA, NA, NA, 388), event2 = c("hosp", "out", "hosp", 
"out", "death", "hosp", "out", "death", "hosp", "out", "hosp", 
"out", "hosp", "out", "censored"), event_time2 = c(36, 52, 775, 
776, 2062, 1268, 1283, 2151, 178, 192, 271, 272, 387, 377.9, 
388)), class = "data.frame", row.names = c(NA, -15L))

所以基本上,我希望 dead == 1 和 futime 列中的值出现在每个 id 的最后一次观察中。 并创建一个新列,其中所有事件都按顺序输入。 谢谢

我没有将“2”放在结果的列名中,但如果需要,您可以轻松地进行更改。

library(dplyr)
last_rows = df %>%
  select(id, dead, futime) %>%
  group_by(id) %>%
  slice(1) %>%
  mutate(
    event = ifelse(dead == 1, "death", "censored"),
    event_time = futime
  )

result = df %>%
  mutate(
    dead = NA,
    futime = NA
  ) %>%
  bind_rows(last_rows) %>%
  arrange(id, event_time)

result
#    id dead futime    event event_time
# 1   1   NA     NA     hosp       36.0
# 2   1   NA     NA      out       52.0
# 3   1   NA     NA     hosp      775.0
# 4   1   NA     NA      out      776.0
# 5   1    1   2062    death     2062.0
# 6   2   NA     NA     hosp     1268.0
# 7   2   NA     NA      out     1283.0
# 8   2    1   2151    death     2151.0
# 9   3   NA     NA     hosp      178.0
# 10  3   NA     NA      out      192.0
# 11  3   NA     NA     hosp      271.0
# 12  3   NA     NA      out      272.0
# 13  3   NA     NA      out      377.9
# 14  3   NA     NA     hosp      387.0
# 15  3    0    388 censored      388.0

这是使用group_modifyadd_row的一种方法

library(dplyr)
library(tibble)
df1 %>%
    group_by(id, futime) %>%
    group_modify(~ .x %>% 
    add_row(dead = NA^!last(.x$dead), event_time = last(.y$futime), 
      event = if(last(.x$dead) == 1) "death" else "censored")) %>% 
    mutate(across(c(dead), ~ replace(., row_number() != n(), NA))) %>% 
    group_by(id) %>% 
    mutate(futime = replace(futime, duplicated(futime, fromLast = TRUE), 
         NA)) %>% 
    ungroup

-输出

# A tibble: 15 × 5
      id futime  dead event    event_time
   <dbl>  <dbl> <dbl> <chr>         <dbl>
 1     1     NA    NA hosp            36 
 2     1     NA    NA out             52 
 3     1     NA    NA hosp           775 
 4     1     NA    NA out            776 
 5     1   2062     1 death         2062 
 6     2     NA    NA hosp          1268 
 7     2     NA    NA out           1283 
 8     2   2151     1 death         2151 
 9     3     NA    NA hosp           178 
10     3     NA    NA out            192 
11     3     NA    NA hosp           271 
12     3     NA    NA out            272 
13     3     NA    NA hosp           387 
14     3     NA    NA out            378.
15     3    388    NA censored       388 

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM