I am working to generate a heatmap of the distribution of biological functional classes by tissue type for an analysis that I'm working on. I've successfully generated the heatmap using geom_tile, but would like to maintain the grid within the white space that is generated in the heatmap.
This white space is generated because there are no data in those comparisons (not NAs or zeros, but completely absent). Is it possible to either 1) edit the graphics to include the grid over the white space, or 2) edit the data frame to include NA's or zeros where those data are currently absent?
Here are my data:
structure(list(Tissue = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("FB",
"SOG", "MG", "HG", "MT"), class = "factor"), Transcript_Count = c(64,
36, 35, 42, 66, 122, 62, 40, 34, 46, 40, 36, 41, 37, 36, 37,
40, 35, 38, 40, 53, 37, 36, 36, 68, 40, 40, 116, 84, 149, 45,
72, 42, 65, 78, 37, 62, 35, 35, 43, 38, 152, 37, 60, 36, 66,
40, 60, 45, 35, 36, 35, 129, 193, 153, 420, 247, 357, 237, 343,
199, 484, 112, 464, 244, 150, 127, 151, 247, 152, 238, 246, 127,
127, 120, 182, 245, 128, 388, 279, 246, 139, 120, 120, 120, 146,
119, 143, 144, 133, 126, 133, 143, 143, 218, 131, 121, 120, 119,
124, 127, 119, 124, 124, 119, 224, 306, 387, 102, 108, 122, 136,
186, 373, 85, 151, 156, 83, 161, 127, 286, 135, 82, 180, 150,
158, 157, 76, 142, 95, 79, 81, 78, 79, 77, 183, 88, 99, 189,
356, 162, 150, 125, 110, 96, 98, 88, 91, 100, 93, 101, 150, 90,
88, 193, 96, 100, 336, 275, 410, 108, 225, 103, 187, 237, 90,
163, 131, 100, 92, 427, 90, 171, 88, 190, 102, 175, 109, 107,
80, 97, 87, 72, 256, 185, 144, 266, 233, 150, 83, 106, 133, 133,
133, 69, 217, 70, 134, 131, 101, 121, 58, 67, 65, 61, 58, 64,
64, 64, 65, 58, 57), GO.ID = structure(c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 19L, 9L, 10L, 6L, 37L,
35L, 8L, 29L, 39L, 42L, 53L, 30L, 34L, 31L, 22L, 49L, 25L, 21L,
1L, 46L, 43L, 36L, 12L, 48L, 5L, 41L, 28L, 32L, 7L, 40L, 23L,
15L, 18L, 33L, 38L, 20L, 47L, 26L, 54L, 11L, 27L, 17L, 44L, 13L,
14L, 51L, 3L, 24L, 16L, 52L, 2L, 45L, 50L, 29L, 6L, 42L, 9L,
39L, 8L, 37L, 35L, 30L, 10L, 1L, 34L, 49L, 25L, 21L, 28L, 7L,
31L, 32L, 48L, 46L, 5L, 27L, 44L, 4L, 47L, 40L, 17L, 33L, 20L,
1L, 2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 13L, 14L, 16L, 17L,
19L, 20L, 21L, 22L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L,
33L, 34L, 35L, 36L, 37L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L,
47L, 48L, 49L, 37L, 9L, 8L, 39L, 10L, 30L, 29L, 35L, 42L, 6L,
32L, 21L, 7L, 5L, 25L, 34L, 31L, 28L, 49L, 46L, 1L, 48L, 44L,
11L, 40L, 47L, 55L, 26L, 27L, 17L, 20L, 33L, 13L, 16L), .Label = c("GO:0006139",
"GO:0006351", "GO:0006355", "GO:0006508", "GO:0006725", "GO:0006807",
"GO:0006810", "GO:0007154", "GO:0007165", "GO:0009058", "GO:0009059",
"GO:0009889", "GO:0010467", "GO:0010468", "GO:0010556", "GO:0016070",
"GO:0018130", "GO:0019219", "GO:0019222", "GO:0019438", "GO:0019538",
"GO:0031323", "GO:0031326", "GO:0032774", "GO:0034641", "GO:0034645",
"GO:0034654", "GO:0043170", "GO:0044237", "GO:0044238", "GO:0044249",
"GO:0044260", "GO:0044271", "GO:0046483", "GO:0050794", "GO:0051171",
"GO:0051234", "GO:0051252", "GO:0051716", "GO:0055085", "GO:0060255",
"GO:0071704", "GO:0080090", "GO:0090304", "GO:0097659", "GO:1901360",
"GO:1901362", "GO:1901564", "GO:1901576", "GO:1903506", "GO:2000112",
"GO:2001141", "GO:0003008", "GO:0006811", "GO:0006259"), class = "factor")), row.names = c(NA,
-212L), class = "data.frame")
And my code to generate the heatmap:
(ggplot(All_Tissues_BP_Head, aes(Tissue, GO.ID)) +
Alex_Theme +
geom_tile(aes(fill = Transcript_Count), color = "black") +
scale_fill_gradient2(low = "white", mid = "blue", high= "black",
midpoint = mean(All_Tissues_BP$Transcript_Count)) +
scale_x_discrete(expand = c(0,0)) +
ggtitle(expression(atop(bold("Biological Processes")))) +
theme(legend.title = element_text(size=12),
legend.text = element_text(size=12)) +
theme(axis.text = element_text(size=12),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 12)) +
labs(fill = "Transcript \n count"))
Use the complete
function from tidyr to fill in missing factor combinations in your data.frame with NA
.
Then you can use the na.value
parameter in the color gradient to set the color.
library(ggplot2)
library(dplyr)
library(tidyr)
# z <- complete(All_Tissues_BP_Head, Tissue, GO.ID)
ggplot(complete(All_Tissues_BP_Head, Tissue, GO.ID), aes(Tissue, GO.ID)) +
geom_tile(aes(fill = Transcript_Count), color = "black") +
scale_fill_gradient2(low = "white", mid = "blue", high= "black",
midpoint = mean(All_Tissues_BP_Head$Transcript_Count), na.value="black") +
scale_x_discrete(expand = c(0,0)) +
ggtitle(expression(atop(bold("Biological Processes")))) +
theme(legend.title = element_text(size=12),
legend.text = element_text(size=12)) +
theme(axis.text = element_text(size=12),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 12)) +
labs(fill = "Transcript \n count")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.