[英]Plotting kaplan meier survival data if dataset already contains time , survival probability, and upper/lower 95% confidence intervals
I have a kaplan meier survival dataset that already contains time, survival probabilities values, and survival probability data points for both the lower & upper 95% CI.我有一个 kaplan meier 生存数据集,其中已经包含时间、生存概率值和生存概率数据点,用于下限和上限 95% CI。 I have posted a clip of my dataset below.我在下面发布了我的数据集的剪辑。 I was hoping if anyone knew how merge my two plots, normalize them, and make my plots continuous despite missing values.我希望是否有人知道如何合并我的两个图,将它们标准化,并使我的图连续,尽管缺少值。 I was hoping for my final graph to look like this 2 .我希望我的最终图表看起来像这样2 。
kmcurvetest_2[1:20, ] %>% dput()
structure(list(Time = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 15, 16, 17, 18, 19, 20), Cohort1 = c(0.904255319148936,
0.898936170212766, 0.887769261266023, 0.887769261266023, 0.887769261266023,
0.87631417402388, 0.87631417402388, NA, NA, 0.87631417402388,
0.864551567661143, 0.858629981581273, 0.852708395501402, NA,
0.852708395501402, 0.846745399728665, 0.846745399728665, 0.840740113205766,
NA, 0.840740113205766), C1Lower95 = c(0.852338104650895, 0.846140749965675,
0.833054851312184, 0.833054851312184, 0.833054851312184, 0.819696863257612,
0.819696863257612, NA, NA, 0.819696863257612, 0.806043967960357,
0.799218079053227, 0.792429563598159, NA, 0.792429563598159,
0.785616930383783, 0.785616930383783, 0.778778500012501, NA,
0.778778500012501), C1Upper95 = c(0.938570469008423, 0.934312293965728,
0.92534844712446, 0.92534844712446, 0.92534844712446, 0.916056348120451,
0.916056348120451, NA, NA, 0.916056348120451, 0.906427391600421,
0.901537491012523, 0.8966168920045, NA, 0.8966168920045, 0.891638921203334,
0.891638921203334, 0.886603579837755, NA, 0.886603579837755),
Cohort2 = c(0.707462686567164, 0.692537313432835, 0.683384837924912,
0.674232362416989, 0.674232362416989, 0.668074989244231,
NA, 0.664996302657852, 0.664996302657852, 0.658781383941424,
0.652507275522934, 0.649370221313689, 0.646217938685953,
0.643065656058216, 0.630394411603867, 0.62722660049028, 0.624058789376693,
0.620890978263105, 0.617723167149518, 0.614539027112665),
C2Lower95 = c(0.655564487332025, 0.640091667602195, 0.630607727619003,
0.62114710952213, 0.62114710952213, 0.614788099004335, NA,
0.611612499799214, 0.611612499799214, 0.605202384226936,
0.598734349944198, 0.595504428845739, 0.592259587632446,
0.589017489398546, 0.576004700295779, 0.572758317180272,
0.569514623188025, 0.566273601091399, 0.56303523423295, 0.5597807789553
), C2Upper95 = c(0.753046097156017, 0.738936670959587, 0.730275198102735,
0.721591223004285, 0.721591223004285, 0.715742377703966,
NA, 0.712814219355565, 0.712814219355565, 0.706901638437748,
0.700928732359048, 0.697938428282602, 0.694932646561064,
0.691924293962202, 0.679812432812405, 0.67677809121533, 0.673741229385084,
0.670701861632804, 0.667660001811057, 0.664601682804447)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
My data set contains missing values and I attempted to make my geom_line continuous despite the missing values using ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)我的数据集包含缺失值,我尝试使用 ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2) 使我的 geom_line 连续
My codes for the two plots are...我对这两个情节的代码是......
# plot cohort 1
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort1),] , mapping = aes(x = Time, y = Cohort1)) +
geom_point(size = 1 ) +
geom_line(color = "blue") +
geom_ribbon(aes(x = Time, ymin = C1Lower95, ymax = C1Upper95),
fill = "blue", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
# plot cohort 2
ggplot(data = kmcurvetest_2[!is.na(kmcurvetest_2$Cohort2),] , mapping = aes(x = Time, y = Cohort2)) +
geom_point(size = 1 ) +
geom_line(color = "red") +
geom_ribbon(aes(x = Time, ymin = C2Lower95, ymax = C2Upper95),
fill = "red", alpha = 0.2) +
labs(title = paste("Inpatient Hospitalization"), x = "Time [Days]", y = "Survival [%]") +
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1"))
Thank you I really appreciate it - I have attached the images in question for reference above!谢谢你,我真的很感激 - 我附上了有问题的图片供上面参考!
Something like this?像这样的东西?
library(tidyverse)
df1 <- df %>%
slice(1) %>%
mutate(across(-time, ~paste(1))) %>%
type.convert(as.is = TRUE) %>%
bind_rows(df %>% mutate(time = time+1))
ggplot(df1, aes(x=factor(time), group=1)) +
geom_line(data = df1 %>% dplyr::select(1:4), aes(y=C1survival, color = "red"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:4), aes(y=C1survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1:4), aes(ymin = C1lower95.CI, ymax = C1upper95.CI), alpha = 0.2)+
labs(title = paste("Survival cohort1"), x = "Time [days]", y = "Survival [%]") +
geom_line(data = df1 %>% dplyr::select(1, 5:7), aes(y=C2survival, color = "blue"), size=1)+
geom_point(data = df1 %>% dplyr::select(1:5:7), aes(y=C2survival), shape = 3, color = "black")+
geom_ribbon(data = df1 %>% dplyr::select(1, 5:7), aes(ymin = C2lower95.CI, ymax = C2upper95.CI), alpha = 0.2)+
scale_y_continuous(limits = c(0, 1), labels = scales::percent) +
scale_color_discrete(name = "Cohort", labels = c("Cohort1", "Cohort2"))+
theme_classic()+
theme(
axis.line = element_line(colour = "black", size = 0.24),
aspect.ratio = 4 / 5,
legend.position = "bottom",
legend.box = "horizontal")
data:数据:
structure(list(time = 0:8, C1survival = c(0.904255319, 0.89893617,
0.887769261, 0.887769261, 0.887769261, 0.876314174, 0.876314174,
0.664996303, 0.664996303), C1lower95.CI = c(0.852338105, 0.84614075,
0.833054851, 0.833054851, 0.833054851, 0.819696863, 0.819696863,
0.6116125, 0.6116125), C1upper95.CI = c(0.938570469, 0.934312294,
0.925348447, 0.925348447, 0.925348447, 0.916056348, 0.916056348,
0.712814219, 0.712814219), C2survival = c(0.707462687, 0.692537313,
0.683384838, 0.674232362, 0.674232362, 0.668074989, NA, NA, NA
), C2lower95.CI = c(0.655564487, 0.640091668, 0.630607728, 0.62114711,
0.62114711, 0.614788099, NA, NA, NA), C2upper95.CI = c(0.753046097,
0.738936671, 0.730275198, 0.721591223, 0.721591223, 0.715742378,
NA, NA, NA)), class = "data.frame", row.names = c(NA, -9L))
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.