繁体   English   中英

用ggplot facet_wrap覆盖R中的不同vline

[英]Overlaying different vlines in R with ggplot facet_wrap

我正在尝试生成一组密度图,以显示四种细胞类型中两组基因表达水平分布的差异。 除了密度图外,我还希望将两组的中位表达水平叠加在每个图上。 根据一些类似问题的答案,我已经能够获得正确的图或正确的中位数,但不能同时获得两者。 我没有主意,希望有人能帮我找对。 谢谢!

示例数据可在此处获取: https//github.com/adadiehl/sample_data/blob/master/sample.data

第一次尝试。 产生正确的图,但在所有四个图上绘制相同的中位数:

dat = read.table("sample.data")

g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + geom_vline(data=dat, aes(xintercept = median(dat$FPKM[ which(dat$FPKM > 0 & dat$class == "Other") ]) ), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=dat, aes(xintercept = median(dat$FPKM[ which(dat$FPKM > 0 & dat$class == "a_MCKG") ]) ), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")

第二次尝试:更正图,但将所有中位数置于所有图上:

dat = read.table("sample.data")
vline.dat = data.frame(z=levels(dat$source), vl=tapply(dat$FPKM[which(dat$class != "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class != "a_MCKG" & dat$FPKM > 0)], median), vm=tapply(dat$FPKM[which(dat$class == "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class == "a_MCKG" & dat$FPKM > 0)], median))

g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + geom_vline(data=vline.dat, aes(xintercept = vl), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=vline.dat, aes(xintercept = vm), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")

第三次尝试:曲线图都一样,但中位数正确。

dat = read.table("sample.data")
vline.dat = data.frame(z=levels(dat$source), vl=tapply(dat$FPKM[which(dat$class != "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class != "a_MCKG" & dat$FPKM > 0)], median), vm=tapply(dat$FPKM[which(dat$class == "a_MCKG" & dat$FPKM > 0)], dat$source[which(dat$class == "a_MCKG" & dat$FPKM > 0)], median))

g = ggplot(dat[which(dat$FPKM > 0),], aes(x = FPKM))
g = g + geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2)
g = g + facet_wrap(~source, ncol=2, scales="free")
g = g + geom_vline(data=vline.dat, aes(xintercept = vl), colour="turquoise3", linetype="longdash")
g = g + geom_vline(data=vline.dat, aes(xintercept = vm), colour="tomato1", linetype="longdash")
g = g + facet_wrap(~z, ncol=2, scales="free")
g = g + ggtitle("Distribution of FPKM, MCKG vs. Other")
g = g + xlab("FPKM > 0")

传递预先汇总的数据是一种方法:

library(plyr)

names(dat) <- c("FPKM", "class", "source")
dat2 <- subset(dat, FPKM > 0)

ggplot(dat2, aes(x = FPKM)) + 
  geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2) +
  geom_vline(data = ddply(dat2, .(source, class), summarize, mmed = median(FPKM)),
             aes(xintercept = mmed, color = class)) +
  facet_wrap(~ source, ncol = 2, scales = "free") +
  ggtitle("Distribution of FPKM, MCKG vs. Other") +
  xlab("FPKM > 0")

或者,您可以使用基数R实现相同的目的:

dat3 <- aggregate(FPKM ~ source + class, data = dat2, FUN = median)

ggplot(dat2, aes(x = FPKM)) + 
  geom_density(aes(y = ..density.., group = class, color = class, fill = class), alpha=0.2) +
  geom_vline(data = dat3,
             aes(xintercept = FPKM, color = class)) +
  facet_wrap(~ source, ncol = 2, scales = "free") +
  ggtitle("Distribution of FPKM, MCKG vs. Other") +
  xlab("FPKM > 0")

注意:您可能要避免使用诸如sourceclass列名,因为它们与内置函数冲突。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM