简体   繁体   中英

plotting correlation dataframe with confidence intervals in R

Is there a way to plot a dataframe with upper and lower confidence intervals as a correlation matrix in ggplot() ?

I am able to force a correlation "matrix" of sorts using ggplot() like so:

Specify the dataframe:

phen1<-c("Activity", "Aggression", "PC1", "PC2", "Activity", "Aggression")
phen2<-c("Aggression",  "PC1",         "PC2", "Activity", "PC1",     "PC2")
cors<-c(0.06,            -0.003,        -0.04, -0.001,   -0.003,      0.004)
upper<-c(0.10,          0.01,       0.002, 0.02,        0.02,       0.02)
lower<-c(0.03,          -0.01,      -0.08, -0.02,       -0.01,  -0.02)
data<- data.frame(phen1, phen2, cors, upper, lower)

> data
       phen1      phen2   cors upper lower
1   Activity Aggression  0.060 0.100  0.03
2 Aggression        PC1 -0.003 0.010 -0.01
3        PC1        PC2 -0.040 0.002 -0.08
4        PC2   Activity -0.001 0.020 -0.02
5   Activity        PC1 -0.003 0.020 -0.01
6 Aggression        PC2  0.004 0.020 -0.02

Convert this to a matrix:

corrdata<-data %>%
 select(-c(upper, lower)) %>% #exclude the CIs here because they cause problems when using spread()
 spread(phen1, cors) %>%
 rename(phen = "phen2") %>%
 bind_rows(data %>%
         select(-c(upper, lower)) %>%
            spread(phen2, cors) %>%
            rename(phen = "phen1")) %>%
 group_by(phen) %>%
 summarise_all(~ ifelse(all(is.na(.)), 1, first(na.omit(.))))

> corrdata
# A tibble: 4 x 5
  phen       Activity Aggression    PC1    PC2
  <fct>         <dbl>      <dbl>  <dbl>  <dbl>
1 Activity      1          0.06  -0.003 -0.001
2 Aggression    0.06       1     -0.003  0.004
3 PC1          -0.003     -0.003  1     -0.04 
4 PC2          -0.001      0.004 -0.04   1    

Create a function to extract lower half of correlation matrix:

get_lower_tri<-function(corrdata){
  corrdata[upper.tri(corrdata)] <- NA
  return(corrdata)
}

lower_tri <- get_lower_tri(corrdata)
melted_corr <- melt(lower_tri, na.rm = TRUE)

Plot the data:

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
#add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.border = element_blank(),
          panel.background = element_blank(),
          axis.text=element_text(size = 15), #changes size of axes #s 
          axis.title=element_text(size= 15), #changes size of axes labels 
          text = element_text(size = 17), 
          legend.position = c(0.15,0.8), #move legend into plot
          legend.title=element_blank())+
#add CI values manually
  annotate("text", x = 1, y = 0.75, label = "(0.03, 0.10)", size = 5)+
  annotate("text", x = 2, y = 0.75, label = "(-0.01, 0.02)", size = 5)+
  annotate("text", x = 2, y = 1.75, label = "(-0.01, 0.01)", size = 5)+
  annotate("text", x = 3, y = 0.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 1.75, label = "(-0.02, 0.02)", size = 5)+
  annotate("text", x = 3, y = 2.75, label = "(-0.08, 0.002)", size = 5)+
#add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

This gives me what I need, but it isn't a very elegant solution and involves a lot of annotate() .

在此处输入图像描述

Does anyone have suggestions for how I could plot my dataframe with upper and lower confidence intervals as a correlation matrix in ggplot() ?

I think you can reshape your original data differently because it is leaving you too much work to do at the plotting stage. Instead of all the spreading and melting, you can do:

# Make a copy of data but with the first two columns switched
data2 <- data[c(2:1, 3:5)]
names(data2) <- names(data)

# Stick the two data frames together. 
bigdata <- rbind(data, data2)

# Create the confidence intervals using paste
bigdata$CI <- paste0("(", bigdata$lower, ", ", bigdata$upper, ")")

# Since bigdata contains each possible pair apart from diagonals,
# we can get just the lower triangle by selecting only those
# entries where the factor level in column 2 is lower than the
# factor level in column 3:
bigdata <- bigdata[which(as.numeric(as.factor(bigdata$phen2)) <
                         as.numeric(as.factor(bigdata$phen1))),]

Which simplifies your plot to:

library(ggplot2)

ggplot(data = bigdata, aes(x = phen1, y = phen2, fill = cors)) + 
  geom_tile(color = "white")+
  geom_text(aes(label = cors), size = 7, position = position_nudge(y = 0.1)) +
  geom_text(aes(label = CI), size = 5, position = position_nudge(y = -0.1)) +
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), name = "") +
  scale_y_discrete(position = "right", name = "") +
  labs(x = "") +
  coord_fixed() +
  theme_classic() + 
  theme(axis.text       = element_text(size = 15),
        axis.title      = element_text(size = 15), 
        text            = element_text(size = 17), 
        legend.position = c(0.15 ,0.8))

Created on 2020-11-13 by the reprex package (v0.3.0)

Perhaps you could add the confidence interval strings to the melted_corr data object and use them in a second geom_text line while also adjusting the vertical position of the ci strings with the vjust aesthetic?

melted_corr$ci <- c("(0.03, 0.10)","(-0.01, 0.02)","(-0.02, 0.02)","(-0.01, 0.01)","(-0.02, 0.02)", "(-0.08, 0.002)")

ggplot(data = melted_corr, aes(x=phen, y=variable, fill=value)) + 
  geom_tile(color = "white")+
  #add a colour gradient to specify which values are larger
  scale_fill_gradient2(low = "gray40", high = "gray40", mid = "white", 
                       midpoint = 0, limit = c(-0.10,0.10), 
                       name="Robust\ncorrelation") + 
  theme_minimal()+ 
  coord_fixed()+
  scale_y_discrete(position = "right")+
  geom_text(aes(phen, variable, label = value), color = "black", size = 7) +
  geom_text(aes(phen, variable, label = ci), color = "black", size = 5,
            vjust = 2.5) + # ci labels added here
  labs(y="", x="")+
  theme(axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.text=element_text(size = 15), #changes size of axes #s 
        axis.title=element_text(size= 15), #changes size of axes labels 
        text = element_text(size = 17), 
        legend.position = c(0.15,0.8), #move legend into plot
        legend.title=element_blank())+
  #add symbols to specify significance manually
  annotate("text", x = 1.2, y = 1, label = "*", size = 7)+
  annotate("text", x = 3.22, y = 3, label = "*", size = 7)

输出2

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM