繁体   English   中英

在 R 中绘制相关性 dataframe 和置信区间

[英]plotting correlation dataframe with confidence intervals in R

有没有办法将 plot 和 dataframe 的上下置信区间作为ggplot()中的相关矩阵?

我可以像这样使用ggplot()来强制执行各种相关“矩阵”:

指定 dataframe:

phen1<-c("Activity", "Aggression", "PC1", "PC2", "Activity", "Aggression")
phen2<-c("Aggression",  "PC1",         "PC2", "Activity", "PC1",     "PC2")
cors<-c(0.06,            -0.003,        -0.04, -0.001,   -0.003,      0.004)
upper<-c(0.10,          0.01,       0.002, 0.02,        0.02,       0.02)
lower<-c(0.03,          -0.01,      -0.08, -0.02,       -0.01,  -0.02)
data<- data.frame(phen1, phen2, cors, upper, lower)

> data
       phen1      phen2   cors upper lower
1   Activity Aggression  0.060 0.100  0.03
2 Aggression        PC1 -0.003 0.010 -0.01
3        PC1        PC2 -0.040 0.002 -0.08
4        PC2   Activity -0.001 0.020 -0.02
5   Activity        PC1 -0.003 0.020 -0.01
6 Aggression        PC2  0.004 0.020 -0.02

将其转换为矩阵:

corrdata<-data %>%
 select(-c(upper, lower)) %>% #exclude the CIs here because they cause problems when using spread()
 spread(phen1, cors) %>%
 rename(phen = "phen2") %>%
 bind_rows(data %>%
         select(-c(upper, lower)) %>%
            spread(phen2, cors) %>%
            rename(phen = "phen1")) %>%
 group_by(phen) %>%
 summarise_all(~ ifelse(all(is.na(.)), 1, first(na.omit(.))))

> corrdata
# A tibble: 4 x 5
  phen       Activity Aggression    PC1    PC2
  <fct>         <dbl>      <dbl>  <dbl>  <dbl>
1 Activity      1          0.06  -0.003 -0.001
2 Aggression    0.06       1     -0.003  0.004
3 PC1          -0.003     -0.003  1     -0.04 
4 PC2          -0.001      0.004 -0.04   1    

创建一个 function 来提取相关矩阵的下半部分:

get_lower_tri<-function(corrdata){
  corrdata[upper.tri(corrdata)] <- NA
  return(corrdata)
}

lower_tri <- get_lower_tri(corrdata)
melted_corr <- melt(lower_tri, na.rm = TRUE)

Plot 数据:

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
#add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank(),
          axis.text=element_text(size = 15), #changes size of axes #s 
          axis.title=element_text(size= 15), #changes size of axes labels 
          text = element_text(size = 17), 
          legend.position = c(0.15,0.8), #move legend into plot
          legend.title=element_blank())+
#add CI values manually
  annotate("text", x = 1, y = 0.75, label = "(0.03, 0.10)", size = 5)+
  annotate("text", x = 2, y = 0.75, label = "(-0.01, 0.02)", size = 5)+
  annotate("text", x = 2, y = 1.75, label = "(-0.01, 0.01)", size = 5)+
  annotate("text", x = 3, y = 0.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 1.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 2.75, label = "(-0.08, 0.002)", size = 5)+
#add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

这给了我我需要的东西,但它不是一个非常优雅的解决方案并且涉及很多annotate()

在此处输入图像描述

有没有人对我如何将 plot 我的 dataframe 的上下置信区间作为ggplot()中的相关矩阵提出建议?

我认为你可以以不同的方式重塑你的原始数据,因为它让你在绘图阶段有太多的工作要做。 您可以这样做,而不是所有的扩散和融化:

# Make a copy of data but with the first two columns switched
data2 <- data[c(2:1, 3:5)]
names(data2) <- names(data)

# Stick the two data frames together. 
bigdata <- rbind(data, data2)

# Create the confidence intervals using paste
bigdata$CI <- paste0("(", bigdata$lower, ", ", bigdata$upper, ")")

# Since bigdata contains each possible pair apart from diagonals,
# we can get just the lower triangle by selecting only those
# entries where the factor level in column 2 is lower than the
# factor level in column 3:
bigdata <- bigdata[which(as.numeric(as.factor(bigdata$phen2)) <
                         as.numeric(as.factor(bigdata$phen1))),]

这将您的 plot 简化为:

library(ggplot2)

ggplot(data = bigdata, aes(x = phen1, y = phen2, fill = cors)) + 
  geom_tile(color = "white")+
  geom_text(aes(label = cors), size = 7, position = position_nudge(y = 0.1)) +
  geom_text(aes(label = CI), size = 5, position = position_nudge(y = -0.1)) +
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), name = "") +
  scale_y_discrete(position = "right", name = "") +
  labs(x = "") +
  coord_fixed() +
  theme_classic() + 
  theme(axis.text       = element_text(size = 15),
        axis.title      = element_text(size = 15), 
        text            = element_text(size = 17), 
        legend.position = c(0.15 ,0.8))

reprex package (v0.3.0) 创建于 2020-11-13

也许您可以将置信区间字符串添加到melted_corr数据 object 并在第二个geom_text行中使用它们,同时还使用vjust美学调整 ci 字符串的垂直 position?

melted_corr$ci <- c("(0.03, 0.10)","(-0.01, 0.02)","(-0.02, 0.02)","(-0.01, 0.01)","(-0.02, 0.02)", "(-0.08, 0.002)")

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
  #add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  geom_text(aes(phen, variable, label = ci), color = "black", size = 5,
            vjust = 2.5) + # ci labels added here
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.text=element_text(size = 15), #changes size of axes #s 
        axis.title=element_text(size= 15), #changes size of axes labels 
        text = element_text(size = 17), 
        legend.position = c(0.15,0.8), #move legend into plot
        legend.title=element_blank())+
  #add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

输出2

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM