简体   繁体   中英

Shading confidence intervals in R - base R if possible

I am comparing two lines that were regressed using LOESS. I want to display the confidence intervals of the two lines clearly, and I am having some difficulties.

I have tried using a variety of line types and colors, but the result is still to busy and messy in my opinion. I think that shading between the confidence intervals might make things clearer, but I am having some difficulties wrapping my head around the problem considering how my coding is structured so far. I have included the produced plot, the data for the two sets Analysis5k and Analysis5kz, and my code up to this point.

I have seen some examples where two polygons were overlapped to show where the confidence intervals overlap which seems like it might be a good way to present the data. If there was a way to draw the polygon in the area that is shared by the two confidence intervals, that might be another good way to present the data.

I understand the basic concept of how the polygon should be done, but the examples I have found have been applied to much more simplistic lines and data. Part of this is my own fault for some poor organization up to this point, but as this step is basically the finishing touch on my data presentation, I really don't want to have to rework everything from ground up.

Any help or insight is greatly appreciated.

UPDATE

I updated the Title. I received some great examples using ggplot, and while I would like to get around to working with ggplot in the future, I have only dealt with base R up to this point. For this particular project would like to try to keep this in base R if possible. 没有阴影的情节

Analysis5k

Period  15p5    Total_5plus
-4350   0.100529101 12.6
-3900   0.4 20
-3650   0.0625  9.6
-3900   0.126984127 16.8
-3958   0.133333333 5
-4350   0.150943396 10.6
-3400   0.146341463 8.2
-3650   0.255319149 9.4
-3400   0.222222222 9
-3500   0.245014245 39
-3600   0.125   8
-3808   0.1 20
-3900   0.160493827 18
-3958   0.238095238 7
-4058   0.2 5
-3500   0.086956522 28.75
-4117   0.141414141 6.6
-4350   0.171038825 31.76666667
-4350   0.166666667 6
-3650   0.143798024 30.36666667
-2715   0.137931034 7.25
-4350   0.235588972 26.6
-3500   0.228840125 79.75
-4350   0.041666667 8
-3650   0.174757282 20.6
-2715   0.377777778 11.25
-3500   0.2 7.5
-3650   0.078947368 7.6
-3400   0.208333333 24
-4233   0.184027778 19.2
-3650   0.285714286 12.6
-4350   0.166666667 6

Analysis5kz

Period  15p5    Total_5plus
-4350   0.100529101 12.6
-4350   0   5
-3900   0.4 20
-3650   0.0625  9.6
-3400   0   6
-3900   0.126984127 16.8
-3958   0.133333333 5
-4350   0.150943396 10.6
-3400   0.146341463 8.2
-3650   0.255319149 9.4
-3400   0.222222222 9
-3500   0.245014245 39
-3600   0.125   8
-3650   0   28
-3808   0.1 20
-3900   0.160493827 18
-3958   0.238095238 7
-4058   0.2 5
-3500   0   25
-3500   0.086956522 28.75
-4117   0.141414141 6.6
-4350   0.171038825 31.76666667
-4350   0.166666667 6
-3650   0.143798024 30.36666667
-2715   0.137931034 7.25
-4350   0.235588972 26.6
-3500   0.228840125 79.75
-4350   0.041666667 8
-3500   0   5
-3650   0.174757282 20.6
-3800   0   9
-2715   0.377777778 11.25
-3500   0.2 7.5
-3650   0.078947368 7.6
-4117   0   8
-4350   0   8
-3400   0.208333333 24
-4233   0.184027778 19.2
-3025   0   7
-3650   0.285714286 12.6
-4350   0.166666667 6

Code

  ppi <- 300 
  png("5+ KC shaded CI.png", width=6*ppi, height=6*ppi, res=ppi) 
  library(Hmisc) 
  Analysis5k <- read.csv(file.choose(), header = T) 
  Analysis5kz <- read.csv(file.choose(), header = T)
  par(mfrow = c(1,1), pty = "s", oma=c(1,2,1,1), mar=c(4,4,2,2)) 
  plot(X15p5 ~ Period, Analysis5kz, xaxt = "n", yaxt= "n", ylim=c(-0.2,0.7), xlim=c(-5000,-2500), xlab = "Years B.P.", ylab = expression(''[15]*'p'[5]), main = "") 
  vx <- seq(-5000,-2000, by = 500) 
  vy <- seq(-0.2,0.7, by = 0.1) 
  axis(1, at = vx) 
  axis(2, at = vy) 
  a5k <- order(Analysis5k$Period) 
  a5kz <- order(Analysis5kz$Period)
  Analysis5k.lo <- loess(X15p5 ~ Period, Analysis5k, weights = Total_5plus, span = 0.6) 
  Analysis5kz.lo <- loess(X15p5 ~ Period, Analysis5kz, weights = Total_5plus, span = 0.6)      
  pred5k <- predict(Analysis5k.lo, se = TRUE) 
  pred5kz <- predict(Analysis5kz.lo, se = TRUE)      
  lines(Analysis5k$Period[a5k], pred5k$fit[a5k], col="blue", lwd=2) 
  lines(Analysis5kz$Period[a5kz], pred5kz$fit[a5kz], col="skyblue", lwd=2)          
  lines(Analysis5K$Period[a5K], pred5K$fit[a5K] - qt(0.975, pred5K$df)*pred5K$se[a5K],col="blue",lty=2) 
  lines(Analysis5K$Period[a5K], pred5K$fit[a5K] + qt(0.975, pred5K$df)*pred5K$se[a5K],col="blue",lty=2)      
  lines(Analysis5Kz$Period[a5Kz], pred5Kz$fit[a5Kz] - qt(0.975, pred5Kz$df)*pred5Kz$se[a5Kz],col="skyblue",lty=2) 
  lines(Analysis5Kz$Period[a5Kz], pred5Kz$fit[a5Kz] + qt(0.975, pred5Kz$df)*pred5Kz$se[a5Kz],col="skyblue",lty=2)
  abline(h=0.173, lty=3) 
  abline(v=-4700, lty=3)
  abline(v=-4000, lty=3)
  abline(v=-3000, lty=3)
  minor.tick(nx=5, ny=4, tick.ratio=0.5) 
  dev.off()

Here is one way to do it with ggplot:

(1) Apply the loess smoothing to both data.sets

library(dplyr)
df.lo <- lapply(datlist, function(x)loess(X15p5 ~ Period, data=x, weights = Total_5plus, span = 0.6)) 

(2) create a new data.frame that expand the min (-4350) and max Period (-2715) of data.set:

nd1 <- nd2 <- expand.grid(Period=seq(-4350, -2715, length=100))

(3) predict the fit and se for each of the loess smoother and bind into a single data.frame:

nd1[,c("fit", "se")] <- predict(df1.lo[[1]], newdata=nd1, se=T)[1:2]
nd1 <- nd1 %>% mutate(group="5k")
nd2[,c("fit", "se")] <- predict(df2.lo[[2]], newdata=nd1, se=T)[1:2]
nd2 <- nd2 %>% mutate(group="5kz")

ndata <- rbind(nd1, nd2)

(4) With the predicted data, use ggplot2::geom_ribbon to show overlapping se:

library(ggplot2)
p <- ggplot(ndata, aes(Period, fit)) + 
  geom_line(aes(colour=group)) + 
  geom_ribbon(aes(ymin=fit-1.96*se, ymax=fit+1.96*se, fill=group), alpha=.2) 

p

在此处输入图片说明

(5) add data points and abline:

dat <- do.call(rbind, datlist)
p + 
  geom_point(data=dat, aes(y=X15p5, shape=as.factor(group)), alpha=.2) + 
  geom_hline(yintercept=0.173, linetype="dotted") + 
  geom_vline(xintercept=c(-4700, -4000, -3000), linetype="dotted") +
  ylab("X15p5") + 
  theme_bw()

在此处输入图片说明

The source data datlist is the list of the two data.frames "Analysis5k" and "Analysis5kz". The dput as follow:

structure(list(`5k` = structure(list(Period = c(-4350L, -3900L, 
-3650L, -3900L, -3958L, -4350L, -3400L, -3650L, -3400L, -3500L, 
-3600L, -3808L, -3900L, -3958L, -4058L, -3500L, -4117L, -4350L, 
-4350L, -3650L, -2715L, -4350L, -3500L, -4350L, -3650L, -2715L, 
-3500L, -3650L, -3400L, -4233L, -3650L, -4350L), X15p5 = c(0.100529101, 
0.4, 0.0625, 0.126984127, 0.133333333, 0.150943396, 0.146341463, 
0.255319149, 0.222222222, 0.245014245, 0.125, 0.1, 0.160493827, 
0.238095238, 0.2, 0.086956522, 0.141414141, 0.171038825, 0.166666667, 
0.143798024, 0.137931034, 0.235588972, 0.228840125, 0.041666667, 
0.174757282, 0.377777778, 0.2, 0.078947368, 0.208333333, 0.184027778, 
0.285714286, 0.166666667), Total_5plus = c(12.6, 20, 9.6, 16.8, 
5, 10.6, 8.2, 9.4, 9, 39, 8, 20, 18, 7, 5, 28.75, 6.6, 31.76666667, 
6, 30.36666667, 7.25, 26.6, 79.75, 8, 20.6, 11.25, 7.5, 7.6, 
24, 19.2, 12.6, 6), group = c("5k", "5k", "5k", "5k", "5k", "5k", 
"5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", 
"5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", "5k", 
"5k", "5k", "5k", "5k")), .Names = c("Period", "X15p5", "Total_5plus", 
"group"), row.names = c(NA, 32L), class = "data.frame"), `5kz` = 
structure(list(
    Period = c(-4350L, -4350L, -3900L, -3650L, -3400L, -3900L, 
    -3958L, -4350L, -3400L, -3650L, -3400L, -3500L, -3600L, -3650L, 
    -3808L, -3900L, -3958L, -4058L, -3500L, -3500L, -4117L, -4350L, 
    -4350L, -3650L, -2715L, -4350L, -3500L, -4350L, -3500L, -3650L, 
    -3800L, -2715L, -3500L, -3650L, -4117L, -4350L, -3400L, -4233L, 
    -3025L, -3650L, -4350L), X15p5 = c(0.100529101, 0, 0.4, 0.0625, 
    0, 0.126984127, 0.133333333, 0.150943396, 0.146341463, 0.255319149, 
    0.222222222, 0.245014245, 0.125, 0, 0.1, 0.160493827, 0.238095238, 
    0.2, 0, 0.086956522, 0.141414141, 0.171038825, 0.166666667, 
    0.143798024, 0.137931034, 0.235588972, 0.228840125, 0.041666667, 
    0, 0.174757282, 0, 0.377777778, 0.2, 0.078947368, 0, 0, 0.208333333, 
    0.184027778, 0, 0.285714286, 0.166666667), Total_5plus = c(12.6, 
    5, 20, 9.6, 6, 16.8, 5, 10.6, 8.2, 9.4, 9, 39, 8, 28, 20, 
    18, 7, 5, 25, 28.75, 6.6, 31.76666667, 6, 30.36666667, 7.25, 
    26.6, 79.75, 8, 5, 20.6, 9, 11.25, 7.5, 7.6, 8, 8, 24, 19.2, 
    7, 12.6, 6), group = c("5kz", "5kz", "5kz", "5kz", "5kz", 
    "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", 
    "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", 
    "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", 
    "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz", "5kz"
    )), .Names = c("Period", "X15p5", "Total_5plus", "group"), row.names = 33:73, class = "data.frame")), .Names = c("5k", 
"5kz"))

I'd propose a tidyverse solution. In this approach you first create a function which will compute and extract the needed statistics. Then you create a list-column with nest , map the function on that list, and unnest the result.

You can read more about this approach at http://r4ds.had.co.nz/many-models.html .


library(tidyverse)

# create function to retrieve fit and se
pred_fun <- function(df) {
  model <- loess(`15p5` ~ Period, df, weights = Total_5plus, span = .6)
  preds <- predict(model, se = T)

  data_frame(fit = preds[["fit"]],
             se = preds[["se.fit"]])
}

# nest, map and unnest fits
nested <- bind_rows(df_5k, df_5kz) %>% 
  group_by(origin) %>% 
  nest() %>% 
  mutate(preds = map(data, pred_fun)) %>% 
  unnest(data, preds)


# plot result
ggplot(nested, aes(Period, `15p5`)) +
  geom_ribbon(aes(ymin = fit - 1.96 * se, ymax = fit + 1.96 * se, fill = origin),
              alpha = .2) +
  geom_point() +
  geom_line(aes(y = fit, colour = origin)) +
  scale_y_continuous(expand = c(.3, 0)) +
  scale_x_continuous(expand = c(.3, 0), breaks = scales::pretty_breaks(6)) +
  theme_bw() +
  theme(legend.position = "bottom") +
  labs(x = "Years B.P.", y = expression(''[15]*'p'[5]))

Of course you can edit the colors of the groups, for example like so:

cols <- c(df_5k = "blue", df_5kz = "skyblue")

ggplot...
...
scale_fill_manual(values = cols) +
scale_color_manual(values = cols)

Edit:

Since I don't know, how to do what you want with base graphics, I'd try to make the plot look like base, using ggthemes::theme_base and changing the point type like this:

ggplot(nested, aes(Period, `15p5`)) +
  ggthemes::theme_base() +
  geom_hline(yintercept = 0.173, linetype = "dotted") +
  geom_vline(xintercept = c(-4700, -4000, -3000), linetype = "dotted") +
  geom_ribbon(aes(ymin = fit - 1.96 * se, ymax = fit + 1.96 * se, fill = origin),
              alpha = .2) +
  geom_point(shape = 1) +
  geom_line(aes(y = fit, colour = origin)) +
  scale_y_continuous(expand = c(.3, 0)) +
  scale_x_continuous(expand = c(.3, 0), breaks = scales::pretty_breaks(6)) +
  theme(legend.position = "bottom") +
  labs(x = "Years B.P.", y = expression(''[15]*'p'[5]),
       colour = NULL, fill = NULL)

Data import

df_5k <- "Period  15p5    Total_5plus
-4350   0.100529101 12.6
-3900   0.4 20
-3650   0.0625  9.6
-3900   0.126984127 16.8
-3958   0.133333333 5
-4350   0.150943396 10.6
-3400   0.146341463 8.2
-3650   0.255319149 9.4
-3400   0.222222222 9
-3500   0.245014245 39
-3600   0.125   8
-3808   0.1 20
-3900   0.160493827 18
-3958   0.238095238 7
-4058   0.2 5
-3500   0.086956522 28.75
-4117   0.141414141 6.6
-4350   0.171038825 31.76666667
-4350   0.166666667 6
-3650   0.143798024 30.36666667
-2715   0.137931034 7.25
-4350   0.235588972 26.6
-3500   0.228840125 79.75
-4350   0.041666667 8
-3650   0.174757282 20.6
-2715   0.377777778 11.25
-3500   0.2 7.5
-3650   0.078947368 7.6
-3400   0.208333333 24
-4233   0.184027778 19.2
-3650   0.285714286 12.6
-4350   0.166666667 6"

df_5k <- read_table2(df_5k) %>% 
  mutate(origin = "df_5k")

df_5kz <- "Period  15p5    Total_5plus
-4350   0.100529101 12.6
-4350   0   5
-3900   0.4 20
-3650   0.0625  9.6
-3400   0   6
-3900   0.126984127 16.8
-3958   0.133333333 5
-4350   0.150943396 10.6
-3400   0.146341463 8.2
-3650   0.255319149 9.4
-3400   0.222222222 9
-3500   0.245014245 39
-3600   0.125   8
-3650   0   28
-3808   0.1 20
-3900   0.160493827 18
-3958   0.238095238 7
-4058   0.2 5
-3500   0   25
-3500   0.086956522 28.75
-4117   0.141414141 6.6
-4350   0.171038825 31.76666667
-4350   0.166666667 6
-3650   0.143798024 30.36666667
-2715   0.137931034 7.25
-4350   0.235588972 26.6
-3500   0.228840125 79.75
-4350   0.041666667 8
-3500   0   5
-3650   0.174757282 20.6
-3800   0   9
-2715   0.377777778 11.25
-3500   0.2 7.5
-3650   0.078947368 7.6
-4117   0   8
-4350   0   8
-3400   0.208333333 24
-4233   0.184027778 19.2
-3025   0   7
-3650   0.285714286 12.6
-4350   0.166666667 6"

df_5kz <- read_table2(df_5kz) %>% 
  mutate(origin = "df_5kz")

Here is a solution with base plot based on your code.

The trick with polygon is that you must provide 2 times the x coordinates in one vector, once in normal order and once in reverse order (with function rev ) and you must provide the y coordinates as a vector of the upper bounds followed by the lower bounds in reverse order.

We use the adjustcolor function to make standard colors transparent.

library(Hmisc) 
ppi <- 300 
par(mfrow = c(1,1), pty = "s", oma=c(1,2,1,1), mar=c(4,4,2,2)) 
plot(X15p5 ~ Period, Analysis5kz, xaxt = "n", yaxt= "n", ylim=c(-0.2,0.7), xlim=c(-5000,-2500), xlab = "Years B.P.", ylab = expression(''[15]*'p'[5]), main = "") 
vx <- seq(-5000,-2000, by = 500) 
vy <- seq(-0.2,0.7, by = 0.1) 
axis(1, at = vx) 
axis(2, at = vy) 
a5k <- order(Analysis5k$Period) 
a5kz <- order(Analysis5kz$Period)
Analysis5k.lo <- loess(X15p5 ~ Period, Analysis5k, weights = Total_5plus, span = 0.6) 
Analysis5kz.lo <- loess(X15p5 ~ Period, Analysis5kz, weights = Total_5plus, span = 0.6)      
pred5k <- predict(Analysis5k.lo, se = TRUE) 
pred5kz <- predict(Analysis5kz.lo, se = TRUE)      

polygon(x = c(Analysis5k$Period[a5k], rev(Analysis5k$Period[a5k])),
        y = c(pred5k$fit[a5k] - qt(0.975, pred5k$df)*pred5k$se[a5k], 
              rev(pred5k$fit[a5k] + qt(0.975, pred5k$df)*pred5k$se[a5k])),
        col =  adjustcolor("dodgerblue", alpha.f = 0.10), border = NA)

polygon(x = c(Analysis5kz$Period[a5kz], rev(Analysis5kz$Period[a5kz])),
        y = c(pred5kz$fit[a5kz] - qt(0.975, pred5kz$df)*pred5kz$se[a5kz], 
              rev( pred5kz$fit[a5kz] + qt(0.975, pred5kz$df)*pred5kz$se[a5kz])),
        col =  adjustcolor("orangered", alpha.f = 0.10), border = NA)

lines(Analysis5k$Period[a5k], pred5k$fit[a5k], col="dodgerblue", lwd=2) 
lines(Analysis5kz$Period[a5kz], pred5kz$fit[a5kz], col="orangered", lwd=2)   

abline(h=0.173, lty=3) 
abline(v=-4700, lty=3)
abline(v=-4000, lty=3)
abline(v=-3000, lty=3)
minor.tick(nx=5, ny=4, tick.ratio=0.5) 

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM