Missing values in heatmap

Question

I am working to generate a heatmap of the distribution of biological functional classes by tissue type for an analysis that I'm working on. I've successfully generated the heatmap using geom_tile, but would like to maintain the grid within the white space that is generated in the heatmap.

This white space is generated because there are no data in those comparisons (not NAs or zeros, but completely absent). Is it possible to either 1) edit the graphics to include the grid over the white space, or 2) edit the data frame to include NA's or zeros where those data are currently absent?

Here are my data:

structure(list(Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("FB", 
"SOG", "MG", "HG", "MT"), class = "factor"), Transcript_Count = c(64, 
36, 35, 42, 66, 122, 62, 40, 34, 46, 40, 36, 41, 37, 36, 37, 
40, 35, 38, 40, 53, 37, 36, 36, 68, 40, 40, 116, 84, 149, 45, 
72, 42, 65, 78, 37, 62, 35, 35, 43, 38, 152, 37, 60, 36, 66, 
40, 60, 45, 35, 36, 35, 129, 193, 153, 420, 247, 357, 237, 343, 
199, 484, 112, 464, 244, 150, 127, 151, 247, 152, 238, 246, 127, 
127, 120, 182, 245, 128, 388, 279, 246, 139, 120, 120, 120, 146, 
119, 143, 144, 133, 126, 133, 143, 143, 218, 131, 121, 120, 119, 
124, 127, 119, 124, 124, 119, 224, 306, 387, 102, 108, 122, 136, 
186, 373, 85, 151, 156, 83, 161, 127, 286, 135, 82, 180, 150, 
158, 157, 76, 142, 95, 79, 81, 78, 79, 77, 183, 88, 99, 189, 
356, 162, 150, 125, 110, 96, 98, 88, 91, 100, 93, 101, 150, 90, 
88, 193, 96, 100, 336, 275, 410, 108, 225, 103, 187, 237, 90, 
163, 131, 100, 92, 427, 90, 171, 88, 190, 102, 175, 109, 107, 
80, 97, 87, 72, 256, 185, 144, 266, 233, 150, 83, 106, 133, 133, 
133, 69, 217, 70, 134, 131, 101, 121, 58, 67, 65, 61, 58, 64, 
64, 64, 65, 58, 57), GO.ID = structure(c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 19L, 9L, 10L, 6L, 37L, 
35L, 8L, 29L, 39L, 42L, 53L, 30L, 34L, 31L, 22L, 49L, 25L, 21L, 
1L, 46L, 43L, 36L, 12L, 48L, 5L, 41L, 28L, 32L, 7L, 40L, 23L, 
15L, 18L, 33L, 38L, 20L, 47L, 26L, 54L, 11L, 27L, 17L, 44L, 13L, 
14L, 51L, 3L, 24L, 16L, 52L, 2L, 45L, 50L, 29L, 6L, 42L, 9L, 
39L, 8L, 37L, 35L, 30L, 10L, 1L, 34L, 49L, 25L, 21L, 28L, 7L, 
31L, 32L, 48L, 46L, 5L, 27L, 44L, 4L, 47L, 40L, 17L, 33L, 20L, 
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 16L, 17L, 
19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 
33L, 34L, 35L, 36L, 37L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 37L, 9L, 8L, 39L, 10L, 30L, 29L, 35L, 42L, 6L, 
32L, 21L, 7L, 5L, 25L, 34L, 31L, 28L, 49L, 46L, 1L, 48L, 44L, 
11L, 40L, 47L, 55L, 26L, 27L, 17L, 20L, 33L, 13L, 16L), .Label = c("GO:0006139", 
"GO:0006351", "GO:0006355", "GO:0006508", "GO:0006725", "GO:0006807", 
"GO:0006810", "GO:0007154", "GO:0007165", "GO:0009058", "GO:0009059", 
"GO:0009889", "GO:0010467", "GO:0010468", "GO:0010556", "GO:0016070", 
"GO:0018130", "GO:0019219", "GO:0019222", "GO:0019438", "GO:0019538", 
"GO:0031323", "GO:0031326", "GO:0032774", "GO:0034641", "GO:0034645", 
"GO:0034654", "GO:0043170", "GO:0044237", "GO:0044238", "GO:0044249", 
"GO:0044260", "GO:0044271", "GO:0046483", "GO:0050794", "GO:0051171", 
"GO:0051234", "GO:0051252", "GO:0051716", "GO:0055085", "GO:0060255", 
"GO:0071704", "GO:0080090", "GO:0090304", "GO:0097659", "GO:1901360", 
"GO:1901362", "GO:1901564", "GO:1901576", "GO:1903506", "GO:2000112", 
"GO:2001141", "GO:0003008", "GO:0006811", "GO:0006259"), class = "factor")), row.names = c(NA, 
-212L), class = "data.frame")

And my code to generate the heatmap:

(ggplot(All_Tissues_BP_Head, aes(Tissue, GO.ID)) +
    Alex_Theme +
    geom_tile(aes(fill = Transcript_Count), color = "black") +
    scale_fill_gradient2(low = "white", mid = "blue", high= "black", 
                         midpoint =  mean(All_Tissues_BP$Transcript_Count)) +
    scale_x_discrete(expand = c(0,0)) +
    ggtitle(expression(atop(bold("Biological Processes")))) +
    theme(legend.title = element_text(size=12),
          legend.text = element_text(size=12)) +
    theme(axis.text = element_text(size=12),
        axis.title.y = element_blank(),
        axis.title.x = element_text(size = 12)) +
    labs(fill = "Transcript \n count"))

Answer 1

Use the complete function from tidyr to fill in missing factor combinations in your data.frame with NA .

Then you can use the na.value parameter in the color gradient to set the color.

library(ggplot2)
library(dplyr)
library(tidyr)

# z <- complete(All_Tissues_BP_Head, Tissue, GO.ID)

ggplot(complete(All_Tissues_BP_Head, Tissue, GO.ID), aes(Tissue, GO.ID)) +
    geom_tile(aes(fill = Transcript_Count), color = "black") +
    scale_fill_gradient2(low = "white", mid = "blue", high= "black", 
                         midpoint =  mean(All_Tissues_BP_Head$Transcript_Count), na.value="black") +
    scale_x_discrete(expand = c(0,0)) +
    ggtitle(expression(atop(bold("Biological Processes")))) +
    theme(legend.title = element_text(size=12),
          legend.text = element_text(size=12)) +
    theme(axis.text = element_text(size=12),
          axis.title.y = element_blank(),
          axis.title.x = element_text(size = 12)) +
    labs(fill = "Transcript \n count")

Missing values in heatmap

Question

1 answers

solution1
0 ACCPTED 2019-05-03 16:32:53

Missing values in heatmap

Question

1 answers

solution1 0 ACCPTED 2019-05-03 16:32:53

solution1
0 ACCPTED 2019-05-03 16:32:53