如果数据集已经包含时间、生存概率和上/下 95% 置信区间，则绘制 kaplan Meier 生存数据

Question

I have a kaplan meier survival dataset that already contains time, survival probabilities values, and survival probability data points for both the lower & upper 95% CI.我有一个 kaplan meier 生存数据集，其中已经包含时间、生存概率值和生存概率数据点，用于下限和上限 95% CI。 I have posted a clip of my dataset below.我在下面发布了我的数据集的剪辑。 I was hoping if anyone knew how merge my two plots, normalize them, and make my plots continuous despite missing values.我希望是否有人知道如何合并我的两个图，将它们标准化，并使我的图连续，尽管缺少值。 I was hoping for my final graph to look like this 2 .我希望我的最终图表看起来像这样2 。

kmcurvetest_2[1:20, ] %>% dput()

structure(list(Time = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 
12, 13, 15, 16, 17, 18, 19, 20), Cohort1 = c(0.904255319148936, 
0.898936170212766, 0.887769261266023, 0.887769261266023, 0.887769261266023, 
0.87631417402388, 0.87631417402388, NA, NA, 0.87631417402388, 
0.864551567661143, 0.858629981581273, 0.852708395501402, NA, 
0.852708395501402, 0.846745399728665, 0.846745399728665, 0.840740113205766, 
NA, 0.840740113205766), C1Lower95 = c(0.852338104650895, 0.846140749965675, 
0.833054851312184, 0.833054851312184, 0.833054851312184, 0.819696863257612, 
0.819696863257612, NA, NA, 0.819696863257612, 0.806043967960357, 
0.799218079053227, 0.792429563598159, NA, 0.792429563598159, 
0.785616930383783, 0.785616930383783, 0.778778500012501, NA, 
0.778778500012501), C1Upper95 = c(0.938570469008423, 0.934312293965728, 
0.92534844712446, 0.92534844712446, 0.92534844712446, 0.916056348120451, 
0.916056348120451, NA, NA, 0.916056348120451, 0.906427391600421, 
0.901537491012523, 0.8966168920045, NA, 0.8966168920045, 0.891638921203334, 
0.891638921203334, 0.886603579837755, NA, 0.886603579837755), 
    Cohort2 = c(0.707462686567164, 0.692537313432835, 0.683384837924912, 
    0.674232362416989, 0.674232362416989, 0.668074989244231, 
    NA, 0.664996302657852, 0.664996302657852, 0.658781383941424, 
    0.652507275522934, 0.649370221313689, 0.646217938685953, 
    0.643065656058216, 0.630394411603867, 0.62722660049028, 0.624058789376693, 
    0.620890978263105, 0.617723167149518, 0.614539027112665), 
    C2Lower95 = c(0.655564487332025, 0.640091667602195, 0.630607727619003, 
    0.62114710952213, 0.62114710952213, 0.614788099004335, NA, 
    0.611612499799214, 0.611612499799214, 0.605202384226936, 
    0.598734349944198, 0.595504428845739, 0.592259587632446, 
    0.589017489398546, 0.576004700295779, 0.572758317180272, 
    0.569514623188025, 0.566273601091399, 0.56303523423295, 0.5597807789553
    ), C2Upper95 = c(0.753046097156017, 0.738936670959587, 0.730275198102735, 
    0.721591223004285, 0.721591223004285, 0.715742377703966, 
    NA, 0.712814219355565, 0.712814219355565, 0.706901638437748, 
    0.700928732359048, 0.697938428282602, 0.694932646561064, 
    0.691924293962202, 0.679812432812405, 0.67677809121533, 0.673741229385084, 
    0.670701861632804, 0.667660001811057, 0.664601682804447)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

My data set contains missing values and I attempted to make my geom_line continuous despite the missing values using ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)我的数据集包含缺失值，我尝试使用 ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2) 使我的 geom_line 连续

My codes for the two plots are...我对这两个情节的代码是......

# plot cohort 1

ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort1),] , mapping = aes(x = Time, y = Cohort1)) +  
  geom_point(size = 1 ) +
  geom_line(color = "blue") +
  geom_ribbon(aes(x = Time, ymin = C1Lower95, ymax = C1Upper95), 
            fill = "blue", alpha = 0.2) + 
  labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_color_discrete(name = "Cohort", labels = c("Cohort1"))

# plot cohort 2

ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)) +  
  geom_point(size = 1 ) +
  geom_line(color = "red") +
  geom_ribbon(aes(x = Time, ymin = C2Lower95, ymax = C2Upper95), 
            fill = "red", alpha = 0.2) + 
  labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_color_discrete(name = "Cohort", labels = c("Cohort1"))

Thank you I really appreciate it - I have attached the images in question for reference above!谢谢你，我真的很感激 - 我附上了有问题的图片供上面参考！

Answer 1

Something like this?像这样的东西？

library(tidyverse)
df1 <- df %>% 
  slice(1) %>% 
  mutate(across(-time, ~paste(1))) %>% 
  type.convert(as.is = TRUE) %>% 
  bind_rows(df %>% mutate(time = time+1)) 

ggplot(df1, aes(x=factor(time), group=1)) +
  geom_line(data = df1 %>% dplyr::select(1:4), aes(y=C1survival, color = "red"), size=1)+
  geom_point(data = df1 %>% dplyr::select(1:4), aes(y=C1survival), shape = 3, color = "black")+
  geom_ribbon(data = df1 %>% dplyr::select(1:4), aes(ymin = C1lower95.CI, ymax = C1upper95.CI), alpha = 0.2)+
  labs(title = paste("Survival cohort1"), x = "Time [days]", y = "Survival [%]") + 
  geom_line(data = df1 %>% dplyr::select(1, 5:7), aes(y=C2survival, color = "blue"), size=1)+
  geom_point(data = df1 %>% dplyr::select(1:5:7), aes(y=C2survival), shape = 3, color = "black")+
  geom_ribbon(data = df1 %>%  dplyr::select(1, 5:7), aes(ymin = C2lower95.CI,  ymax = C2upper95.CI), alpha = 0.2)+
  scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
  scale_color_discrete(name = "Cohort", labels = c("Cohort1", "Cohort2"))+
  theme_classic()+
  theme(
    axis.line = element_line(colour = "black", size = 0.24),
    aspect.ratio = 4 / 5,
    legend.position = "bottom",
    legend.box = "horizontal")

data:数据：

structure(list(time = 0:8, C1survival = c(0.904255319, 0.89893617, 
0.887769261, 0.887769261, 0.887769261, 0.876314174, 0.876314174, 
0.664996303, 0.664996303), C1lower95.CI = c(0.852338105, 0.84614075, 
0.833054851, 0.833054851, 0.833054851, 0.819696863, 0.819696863, 
0.6116125, 0.6116125), C1upper95.CI = c(0.938570469, 0.934312294, 
0.925348447, 0.925348447, 0.925348447, 0.916056348, 0.916056348, 
0.712814219, 0.712814219), C2survival = c(0.707462687, 0.692537313, 
0.683384838, 0.674232362, 0.674232362, 0.668074989, NA, NA, NA
), C2lower95.CI = c(0.655564487, 0.640091668, 0.630607728, 0.62114711, 
0.62114711, 0.614788099, NA, NA, NA), C2upper95.CI = c(0.753046097, 
0.738936671, 0.730275198, 0.721591223, 0.721591223, 0.715742378, 
NA, NA, NA)), class = "data.frame", row.names = c(NA, -9L))

如果数据集已经包含时间、生存概率和上/下 95% 置信区间，则绘制 kaplan Meier 生存数据

问题描述

1 个解决方案

解决方案1
0 2022-06-22 16:28:19

如果数据集已经包含时间、生存概率和上/下 95% 置信区间，则绘制 kaplan Meier 生存数据

问题描述

1 个解决方案

解决方案1 0 2022-06-22 16:28:19

解决方案1
0 2022-06-22 16:28:19