简体   繁体   中英

ggplot2: change break points of discrete scale to be between two break points

I have the following dataset:

df <- data.frame(dens = rnorm(5000),
             split = as.factor(sample(1:2, 5000, replace = T)),
             method = as.factor(sample(c("A","B"), 5000, replace = T)),
             counts = sample(c(1, 10, 100, 1000, 10000), 5000, replace = T))

I have the following split violin plots for splits 1 and 2 within groups A and B for each count. We have four groups for each setting but there is a nested aspect to it:

library(ggplot2)
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
                           draw_group = function(self, data, ..., draw_quantiles = NULL){
                               ## By @YAK: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
                               data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
                               grp <- data[1,'group']
                               newdata <- plyr::arrange(transform(data, x = if(grp%%2==1) xminv else xmaxv), if(grp%%2==1) y else -y)
                               newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
                               newdata[c(1,nrow(newdata)-1,nrow(newdata)), 'x'] <- round(newdata[1, 'x']) 
                               if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
                                   stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <= 1))
                                   quantiles <- create_quantile_segment_frame(data, draw_quantiles, split = TRUE, grp = grp)
                                   aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
                                   aesthetics$alpha <- rep(1, nrow(quantiles))
                                   both <- cbind(quantiles, aesthetics)
                                   quantile_grob <- GeomPath$draw_panel(both, ...)
                                   ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
                               }
                               else {
                                   ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
                               }
                           }
                           )

create_quantile_segment_frame <- function (data, draw_quantiles, split = FALSE, grp = NULL) {
    dens <- cumsum(data$density)/sum(data$density)
    ecdf <- stats::approxfun(dens, data$y)
    ys <- ecdf(draw_quantiles)
    violin.xminvs <- (stats::approxfun(data$y, data$xminv))(ys)
    violin.xmaxvs <- (stats::approxfun(data$y, data$xmaxv))(ys)
    violin.xs <- (stats::approxfun(data$y, data$x))(ys)
    if (grp %% 2 == 0) {
        data.frame(x = ggplot2:::interleave(violin.xs, violin.xmaxvs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    } else {
        data.frame(x = ggplot2:::interleave(violin.xminvs, violin.xs), 
                   y = rep(ys, each = 2), group = rep(ys, each = 2)) 
    }
}

geom_split_violin <- function (mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) {
    layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, position = position, show.legend = show.legend, inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}



df$key <- factor(paste(df$split, df$method))

levels(df$split) <- factor(0:2)
library(ggplot2)
ggplot(df, aes(x = interaction(split, counts), y = dens, fill = key)) +geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + theme_light() + theme(legend.position="bottom") + scale_x_discrete(limits=levels(interaction(df$split,df$counts))[-length(levels(interaction(df$split,df$counts)))],drop = FALSE, name = "Counts")

And I get the following:

在此输入图像描述

Which is great, except that I would like to only have labels of counts 1, 10, 100, 1000, 10000 on the x-axis and in between the blue and the green violin plots. So label 1 in between the first blue and the green violin plots, 10 in between the second blue and the green violin plots, 100 in between the second blue and the green violin plots and so on.

Thanks for any suggestions on how to do this.

Instead of changing the break point for a discrete scale, you can try adding a text layer to the plot itself, which is able to accept non-integer values for discrete scale positions:

ggplot(df,
       aes(x = x, y = dens, fill = key)) + 
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +

  # annotate layer with non-integer positions
  annotate(geom = "text", x = c(1.5, 4.5, 7.5, 10.5, 13.5), y = -3.75,
           label = c("1", "10", "100", "1000", "10000")) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired", n=4)) + 
  scale_x_discrete(name = "Counts", drop = FALSE) +
  theme_minimal() + 

  # hide the actual discrete labels / ticks
  theme(legend.position="bottom",
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank())

情节

I usually solve these issues with facets, then format the strips as though they are axis labels. This also naturally puts the pairs closer together, without any hacks, and you can change the distance by changing theme(panel.spacing = .....) , if needed. Eg:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_light() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        strip.background = element_blank(), strip.text = element_text(color = 'black'))

在此输入图像描述

Or a different theme with less obvious facets:

ggplot(df, aes(x = split, y = dens, fill = key)) +
  geom_split_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
  scale_fill_manual(values=RColorBrewer::brewer.pal(name="Paired",n=4)) + 
  xlab('count') +
  facet_grid(~counts, scales = 'free_x', switch = 'x') +
  theme_minimal() + 
  theme(legend.position = "bottom", axis.text.x = element_blank(), axis.ticks.x = element_blank())

在此输入图像描述

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM