简体   繁体   中英

Is it possible to extend the intervals of the x-axis in R?

I have two plots: a barplot, and a ggplot(geom_jitter bubbleplot). Ultimately, I am using a photo editing app to line up these two plots. As you can see, the intervals in the bottom of these two plots do not match up, which is my problem here. I would like to make it so I can just change the bottom x-axis of both plots to 400 (lowest common interval to cover x-axis of both plots). I do not want to change the data values, just the axis values.

Barplot Code

GYPCdomain <- read.csv(file.choose(), header=TRUE)

GYPCbarplot <- barplot(as.matrix(GYPCdomain), horiz=TRUE, xlab = "Length (Protein Domains Shown)",
                         col=c("azure", "plum1", "skyblue"), 
                         legend = c("Cytoplasmic", "Helical Membrane", "Extracellular"))

sample data: 

structure(list(GYPC = c(0L, 0L, 171L, 0L, 72L, 0L, 141L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L)), class = "data.frame", row.names = c(NA, -42L))

Bubbleplot Code

library(ggplot2)
library(scales)
data(GYPC, package="ggplot2")

GYPC <- read.csv(file.choose(), header = TRUE)

GYPCggplot <- ggplot(GYPC, aes(Position, log10(Frequency)))+
  geom_jitter(aes(col=Geographical.Location, size =(p.value)))+
  labs(subtitle="Frequency of Various Polymorphisms", title="GYPC Gene") +
  labs(color = "Geographical Location") +
  labs(size = "p-value") + labs(x = "Position of Polymorphism on GYPC Gene") + 
  scale_size_continuous(range=c(1,4.5), trans = "reverse") +
  guides(size = guide_legend(reverse = TRUE))


sample data:
structure(list(Variant = structure(c(4L, 4L, 4L, 4L, 4L, 8L, 
8L, 8L, 8L, 8L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 12L, 
12L, 12L, 12L, 12L, 14L, 14L, 14L, 14L, 14L, 2L, 2L, 2L, 2L, 
2L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 
11L, 13L, 13L, 13L, 13L, 13L, 15L, 15L, 15L, 15L, 15L), .Label = c("rs111631066", 
"rs114199197", "rs115178969", "rs115201071", "rs139780142", "rs139816143", 
"rs143080607", "rs143216051", "rs199797395", "rs531807314", "rs545780841", 
"rs551011574", "rs560942282", "rs567759380", "rs571586275"), class = "factor"), 
    Position = c(213L, 213L, 213L, 213L, 213L, 60L, 60L, 60L, 
    60L, 60L, 249L, 249L, 249L, 249L, 249L, 183L, 183L, 183L, 
    183L, 183L, 282L, 282L, 282L, 282L, 282L, 294L, 294L, 294L, 
    294L, 294L, 150L, 150L, 150L, 150L, 150L, 135L, 135L, 135L, 
    135L, 135L, 258L, 258L, 258L, 258L, 258L, 255L, 255L, 255L, 
    255L, 255L, 138L, 138L, 138L, 138L, 138L, 159L, 159L, 159L, 
    159L, 159L, 141L, 141L, 141L, 141L, 141L, 198L, 198L, 198L, 
    198L, 198L, 258L, 258L, 258L, 258L, 258L), Geographical.Location = structure(c(1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 
    2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L), .Label = c("AFR", 
    "AMR", "EAS", "EUR", "SAS"), class = "factor"), Frequency = c(0.023, 
    0.001, 0, 0, 0, 0.017, 0.001, 0, 0, 0, 0.012, 0, 0, 0, 0, 
    0.002, 0.003, 0.002, 0.023, 0.016, 0.001, 0, 0, 0, 0, 0, 
    0, 0, 0, 0.004, 0, 0, 0, 0.001, 0, 0, 0, 0, 0, 0.001, 0, 
    0, 0.001, 0, 0, 0.001, 0, 0, 0, 0, 0, 0.001, 0, 0, 0, 0, 
    0, 0, 0, 0.002, 0, 0, 0.001, 0, 0, 0, 0, 0, 0, 0.001, 0, 
    0, 0.001, 0, 0), pre.p.value = c(6.32e-17, 0.113, 0.00126, 
    0.00126, 0.00211, 2.51e-12, 0.356, 0.00806, 0.00809, 0.0139, 
    4.86e-10, 0.15, 0.0542, 0.0542, 0.0537, 0.000376, 0.0778, 
    0.0068, 7.4e-06, 0.0109, 0.264, 1, 1, 1, 1, 0.579, 1, 0.589, 
    0.59, 0.00144, 1, 1, 1, 0.201, 1, 1, 1, 1, 1, 0.195, 1, 1, 
    0.201, 1, 1, 1, 1, 0.201, 1, 1, 1, 0.139, 1, 1, 1, 1, 1, 
    1, 1, 0.0381, 1, 1, 0.201, 1, 1, 1, 1, 1, 1, 0.195, 1, 1, 
    0.201, 1, 1), p.value = c(0, 0.75, 0.5, 0.5, 0.5, 0, 0.75, 
    0.5, 0.5, 0.75, 0, 0.75, 0.75, 0.75, 0.75, 0.5, 0.75, 0.5, 
    0.25, 0.75, 0.75, 1, 1, 1, 1, 1, 1, 1, 1, 0.5, 1, 1, 1, 0.75, 
    1, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1, 1, 1, 1, 0.75, 1, 1, 
    1, 0.75, 1, 1, 1, 1, 1, 1, 1, 0.75, 1, 1, 0.75, 1, 1, 1, 
    1, 1, 1, 0.75, 1, 1, 0.75, 1, 1), log.p.value. = c(-16.19928292, 
    -0.947, -2.899629455, -2.899629455, -2.675717545, -11.60032628, 
    -0.449, -2.093664958, -2.092051478, -1.8569852, -9.313363731, 
    -0.824, -1.266000713, -1.266000713, -1.270025714, -3.424812155, 
    -1.11, -2.167491087, -5.13076828, -1.962573502, -0.5783960731, 
    0, 0, 0, 0, -0.2373214363, 0, -0.2298847052, -0.2291479884, 
    -2.841637508, 0, 0, 0, -0.6968039426, 0, 0, 0, 0, 0, -0.7099653886, 
    0, 0, -0.6968039426, 0, 0, 0, 0, -0.6968039426, 0, 0, 0, 
    -0.857, 0, 0, 0, 0, 0, 0, 0, -1.419075024, 0, 0, -0.6968039426, 
    0, 0, 0, 0, 0, 0, -0.7099653886, 0, 0, -0.6968039426, 0, 
    0), X = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), range = structure(c(2L, 6L, 5L, 4L, 3L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "0 < p-value < 1E-9", 
    "1E-2 < p-value < 1", "1E-4 < p-value < 1E-2", "1E-6 < p-value < 1E-4", 
    "1E-9 < p-value < 1E-6"), class = "factor")), class = "data.frame", row.names = c(NA, 
-75L))

在此处输入图片说明

在此处输入图片说明

I took the liberty to produce your barplot also with ggplot, because than we can use the awesome features of the cowplot package, which was made for things like these. Setting axis limits can be done with ylim() or xlim() but because of different width of the legends, we need the cowplot package to truly align the plots (or the legends would need to go below the plots)

#recreating the barplot
library(dplyr) #needed for data wrangling

GYPCbarplot_ggplot=GYPCdomain %>% 
  filter(GYPC>0) %>% 
  mutate(domain=factor(c("Cytoplasmic", "Helical Membrane", "Extracellular"),
                       levels=c("Cytoplasmic", "Helical Membrane", "Extracellular"),
                       ordered = T)) %>% 
ggplot(aes(x=1,y=GYPC,fill=domain))+
  geom_col(position="stack")+
  scale_fill_manual(values=c("Cytoplasmic"="azure", "Helical Membrane"="plum1", "Extracellular"="skyblue"))+
  coord_flip()+
  xlab("GYPC")+
  ylab( "Length (Protein Domains Shown)")+
  ylim(0,400)+ #creates the limit
  theme(panel.grid.major.y = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank())

#the bubbleplot
GYPC_bubbleplot <- ggplot(GYPC_data, aes(Position, log10(Frequency)))+
  geom_jitter(aes(col=Geographical.Location, size =(p.value)))+
  labs(subtitle="Frequency of Various Polymorphisms", title="GYPC Gene") +
  labs(color = "Geographical Location") +
  labs(size = "p-value") + labs(x = "Position of Polymorphism on GYPC Gene") + 
  scale_size_continuous(range=c(1,4.5), trans = "reverse") +
  guides(size = guide_legend(reverse = TRUE))+
  xlim(0,400) #added this limit

library(cowplot) #used to arrange the two plots

plot_grid(GYPCbarplot_ggplot,GYPC_bubbleplot,
    ncol = 1, #both plots in one column (below each other)
    align = "v", #align both bottom axes
    rel_heights = c(1,1.5) #make bottom plot a bit higher    
)

et voila: 在此处输入图片说明

If I understand correctly, the OP is asking to synchronise the x-axes in order to show the protein domains a certain position on the GYPC gene belongs to.

If my assumption is correct then there is an alternative approach which fills the background of the bubble plot according to the protein domains:

library(dplyr)
domain_name <- c("Cytoplasmic", "Helical Membrane", "Extracellular")
domain_fill <- c("azure", "plum1", "skyblue")
names(domain_fill) <- domain_name
GPYCdomain_2 <- GYPCdomain %>%
  filter(GYPC > 0) %>%
  mutate(domain_name = forcats::fct_inorder(rev(domain_name)),
         end_pos = cumsum(GYPC),
         start_pos = lag(end_pos, default = 0L)) 

library(ggplot2)
ggplot(GYPC, aes(Position, log10(Frequency))) +
  geom_rect(aes(xmin = start_pos, xmax = end_pos, ymin = -Inf, ymax = Inf, fill = domain_name),
            data = GPYCdomain_2, inherit.aes = FALSE, alpha = 0.6) +
  scale_fill_manual(values = domain_fill) +
  geom_jitter(aes(color = Geographical.Location, size = (p.value))) +
  labs(subtitle = "Frequency of Various Polymorphisms", title = "GYPC Gene") +
  labs(color = "Geographical Location") +
  labs(size = "p-value") + 
  labs(x = "Position of Polymorphism on GYPC Gene") +
  labs(fill = "Protein Domain") + 
  scale_size_continuous(range = c(1, 4.5), trans = "reverse") +
  guides(size = guide_legend(reverse = TRUE))

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM