简体   繁体   中英

density shadow around the data with ggplot2 (R)

I am trying to have 2 "shadows" on the background of the below plot. These shadows should represent the density of the orange and blue points separately. Does it make sense?

Here is the ggplot to improve: 在此输入图像描述

Here is the code and the data (matrix df ) I used to create this plot:

                         PC1           PC2 aa
A_akallopisos    0.043272525  0.0151023307  2
A_akindynos     -0.020707141 -0.0158198405  1
A_allardi       -0.020277664 -0.0221016281  2
A_barberi       -0.023165596  0.0389906701  2
A_bicinctus     -0.025354572 -0.0059122384  2
A_chrysogaster   0.012608835 -0.0339330213  2
A_chrysopterus  -0.022402365 -0.0092476009  1
A_clarkii       -0.014474658 -0.0127024469  1
A_ephippium     -0.016859412  0.0320034231  2
A_frenatus      -0.024190876  0.0238499714  2
A_latezonatus   -0.010718845 -0.0289904165  1
A_latifasciatus -0.005645811 -0.0183202248  2
A_mccullochi    -0.031664307 -0.0096059126  2
A_melanopus     -0.026915545  0.0308399009  2
A_nigripes       0.023420045  0.0293801537  2
A_ocellaris      0.052042539  0.0126144250  2
A_omanensis     -0.020387101  0.0010944998  2
A_pacificus      0.042406273 -0.0260308092  2
A_percula        0.034591721  0.0071153133  2
A_perideraion    0.052830132  0.0064495142  2
A_polymnus       0.030902254 -0.0005091421  2
A_rubrocinctus  -0.033318659  0.0474995722  2
A_sandaracinos   0.055839755  0.0093724082  2
A_sebae          0.021767793 -0.0218640814  2
A_tricinctus    -0.016230301 -0.0018526482  1
P_biaculeatus   -0.014466403  0.0024864574  2



 ggplot(data=df,aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + ggtitle(paste('Site n° ',Sites_names[j],sep='')) +geom_smooth(se=F, method='lm')+ geom_point() + scale_color_manual(name='mutation', values = c("darkorange2","cornflowerblue"), labels = c("A","S")) + geom_text(hjust=0.5, vjust=-1 ,size=3) + xlim(-0.05,0.07)

Here are some possible approaches using stat_density2d() with geom="polygon" and mapping or setting alpha transparency for the density fill regions. If you are willing to experiment with some the parameters, I think you can get some very useful plots. Specifically, you may want to adjust the following:

  1. n controls the smoothness of the density polygon.
  2. h is the bandwidth of the density estimation.
  3. bins controls the number of density levels.

在此输入图像描述

df = read.table(header=TRUE, text=
"                         PC1           PC2 aa
A_akallopisos    0.043272525  0.0151023307  2
A_akindynos     -0.020707141 -0.0158198405  1
A_allardi       -0.020277664 -0.0221016281  2
A_barberi       -0.023165596  0.0389906701  2
A_bicinctus     -0.025354572 -0.0059122384  2
A_chrysogaster   0.012608835 -0.0339330213  2
A_chrysopterus  -0.022402365 -0.0092476009  1
A_clarkii       -0.014474658 -0.0127024469  1
A_ephippium     -0.016859412  0.0320034231  2
A_frenatus      -0.024190876  0.0238499714  2
A_latezonatus   -0.010718845 -0.0289904165  1
A_latifasciatus -0.005645811 -0.0183202248  2
A_mccullochi    -0.031664307 -0.0096059126  2
A_melanopus     -0.026915545  0.0308399009  2
A_nigripes       0.023420045  0.0293801537  2
A_ocellaris      0.052042539  0.0126144250  2
A_omanensis     -0.020387101  0.0010944998  2
A_pacificus      0.042406273 -0.0260308092  2
A_percula        0.034591721  0.0071153133  2
A_perideraion    0.052830132  0.0064495142  2
A_polymnus       0.030902254 -0.0005091421  2
A_rubrocinctus  -0.033318659  0.0474995722  2
A_sandaracinos   0.055839755  0.0093724082  2
A_sebae          0.021767793 -0.0218640814  2
A_tricinctus    -0.016230301 -0.0018526482  1
P_biaculeatus   -0.014466403  0.0024864574  2")


library(ggplot2)

p1 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
     ggtitle(paste('Site n° ',sep='')) +
     stat_density2d(aes(fill=factor(aa), alpha = ..level..), 
                    geom="polygon", color=NA, n=200, h=0.03, bins=4) + 
     geom_smooth(se=F, method='lm') + 
     geom_point() + 
     scale_color_manual(name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     scale_fill_manual( name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     geom_text(hjust=0.5, vjust=-1 ,size=3, color="black") + 
     scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
     scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
     coord_cartesian(xlim=c(-0.05, 0.07),
                     ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.


p2 = ggplot(data=df, aes(x=PC1, y=PC2, color=factor(aa), label=rownames(df))) + 
     ggtitle(paste('Site n° ',sep='')) +
     stat_density2d(aes(fill=factor(aa)), alpha=0.2,
                    geom="polygon", color=NA, n=200, h=0.045, bins=2) + 
     geom_smooth(se=F, method='lm', size=1) + 
     geom_point(size=2) + 
     scale_color_manual(name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     scale_fill_manual( name='mutation', 
                        values = c("darkorange2","cornflowerblue"), 
                        labels = c("A","S")) + 
     geom_text(hjust=0.5, vjust=-1 ,size=3) + 
     scale_x_continuous(expand=c(0.3, 0)) + # Zooms out so that density polygons
     scale_y_continuous(expand=c(0.3, 0)) + # don't reach edges of plot.
     coord_cartesian(xlim=c(-0.05, 0.07),
                     ylim=c(-0.04, 0.05)) # Zooms back in for the final plot.

library(gridExtra)
ggsave("plots.png", plot=arrangeGrob(p1, p2, ncol=1), width=8, height=11, dpi=120)

Here's my suggestion. Using shadows or polygons is going to get pretty ugly when you overlay two colors and densities. Contour plot could be nicer to look at and is certainly easier to work with.

I've created some random data as a reproducible example and used a simple density function that uses the average distance of the nearest 5 points.

df <- data.frame(PC1 = runif(20),
            PC2 = runif(20),
            aa = rbinom(20,1,0.5))


point.density <- function(row){
  points <- df[df$aa == row[[3]],]
  x.dist <- (points$PC1 - row[[1]])^2
  y.dist <- (points$PC2 - row[[2]])^2
  x <- x.dist[order(x.dist)[1:5]]
  y <- y.dist[order(y.dist)[1:5]]
  1/mean(sqrt(x + y))
}

# you need to calculate the density for the whole grid.
res <- c(1:100)/100 # this is the resolution, so gives a 100x100 grid

plot.data0 <- data.frame(x.val = rep(res,each = length(res)),
                        y.val = rep(res, length(res)),
                        type = rep(0,length(res)^2))

plot.data1 <- data.frame(x.val = rep(res,each = length(res)),
                         y.val = rep(res, length(res)),
                         type = rep(1,length(res)^2))

plot.data <- rbind(plot.data0,plot.data1)

# we need a density value for each point type, so 2 grids
densities <- apply(plot.data,1,point.density)
plot.data <- cbind(plot.data, z.val = densities)

library(ggplot2)

# use stat_contour to draw the densities. Be careful to specify which dataset you're using
ggplot() +  stat_contour(data = plot.data, aes(x=x.val, y=y.val, z=z.val, colour =    factor(type)), bins = 20, alpha = 0.4) + geom_point(data = df, aes(x=PC1,y=PC2,colour = factor(aa)))

contour plot http://img34.imageshack.us/img34/6215/1yvb.png

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM