簡體   English   中英

在 R 中使用 ggplot2 繪制您自己生成的置信區間

[英]Plot your own generated confidence interval with ggplot2 in R

在這個問題Bootstrapping CI for a quantile regression in R outside the quantreg framework之后,我想在我的分位數回歸圖中繪制使用提供的解決方案獲得的置信區間。

圖書館:

library(ggplot2)
library(dplyr)
library(tidyverse)

回歸函數:

logcosh <- function(x) log(cosh(x))

minimize.logcosh <- function(par, X, y, tau) {
  diff <- y-(X %*% par)
  check <- (tau-0.5)*diff+(0.5/0.7)*logcosh(0.7*diff)+0.4
  return(sum(check))
}

smrq <- function(X, y, tau){
  p <- ncol(X)
  op.result <- optim(
    rep(0, p),
    fn = minimize.logcosh,
    method = 'BFGS',
    X = X,
    y = y,
    tau = tau
  )
  beta <- op.result$par
  return(beta)
}

run_smrq <- function(data, fml, response, n=99) {
  x <- model.matrix(fml, data) #modify
  y <- data[[response]]
  #X <- cbind(x, rep(1,nrow(x)))
  X <- x
  
  betas <- sapply(1:n, function(i) smrq(X, y, tau=i/(n+1)))
  return(betas) 
}

樣本數據:

> dput(head(df, 20))
structure(list(lat = c("59", "59", "55", "59", "59", "63", "59", 
"59", "59", "59", "63", "59", "59", "59", "57", "56", "56", "59", 
"63", "63"), long = c(18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 
18, 18, 18, 18, 18, 18, 18, 18, 18, 18), date = c("1951-03-22", 
"1951-04-08", "1952-02-03", "1952-03-08", "1953-02-22", "1953-03-12", 
"1954-01-16", "1954-02-06", "1954-03-14", "1954-03-28", "1954-04-02", 
"1955-01-23", "1955-03-06", "1955-03-13", "1955-04-08", "1955-04-11", 
"1955-04-12", "1956-03-25", "1956-04-01", "1956-04-02"), julian_day = c(81, 
98, 34, 68, 53, 71, 16, 37, 73, 87, 92, 23, 65, 72, 98, 101, 
102, 85, 92, 93), year = c(1951L, 1951L, 1952L, 1952L, 1953L, 
1953L, 1954L, 1954L, 1954L, 1954L, 1954L, 1955L, 1955L, 1955L, 
1955L, 1955L, 1955L, 1956L, 1956L, 1956L), decade = c("1950-1959", 
"1950-1959", "1950-1959", "1950-1959", "1950-1959", "1950-1959", 
"1950-1959", "1950-1959", "1950-1959", "1950-1959", "1950-1959", 
"1950-1959", "1950-1959", "1950-1959", "1950-1959", "1950-1959", 
"1950-1959", "1950-1959", "1950-1959", "1950-1959"), time = c(10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L), lat_grouped = c("1", "1", "1", 
"1", "1", "2", "1", "1", "1", "1", "2", "1", "1", "1", "1", "1", 
"1", "1", "2", "2"), year_centered = structure(c(-36, -36, -35, 
-35, -34, -34, -33, -33, -33, -33, -33, -32, -32, -32, -32, -32, 
-32, -31, -31, -31), class = "AsIs")), row.names = 24:43, class = "data.frame")

我如何獲得回歸圖:

#Quantile regression

smrq_models <- df %>%
group_by(lat_grouped) %>%
group_map(~ run_smrq(data=., fml=julian_day~year_centered, response="julian_day"), n=99)

#Gives 3 models; I show for the first one

model1 = as.data.frame(t(smrq_models[[1]]))

names(model1)[1] <- 'intercept'
names(model1)[2] <- 'julian_day'
model1 = rownames_to_column(model1, var = "tau")
model1$tau = seq(0.01, 0.99, by = 0.01)

model1 %>% 
  mutate(Quantile=row_number()) %>% 
  pivot_longer(!Quantile,names_to="beta",values_to = "Coefficient") %>% 
  ggplot(aes(Quantile,Coefficient,color=beta)) + 
  geom_line() +
  facet_wrap(~beta, scales="free_y")

如何獲得置信區間:

boot_fun <- function(data, n) {
  i <- sample(nrow(data), nrow(data), replace = TRUE)
  df <- data[i, ]
  df %>%
    group_by(lat_grouped) %>%
    group_map(~ run_smrq(data=., fml=julian_day~year_centered, response="julian_day", n=n))
}

set.seed(2022)

n <- 99L
R <- 10L
boot_smrq_models <- vector("list", length = R)
for(i in seq.int(R)) {
  boot_smrq_models[[i]] <- boot_fun(df, n)
}

l <- length(boot_smrq_models[[1]])
smrq_models_all <- vector("list", length = l)
smrq_models_int <- vector("list", length = l)
for(i in seq.int(l)) {
  tmp <- array(dim = c(R, dim(boot_smrq_models[[1]][[i]])))
  for(j in seq.int(R)) {
    tmp[j, , ] <- boot_smrq_models[[j]][[i]]
  }
  smrq_models_all[[i]] <- t(apply(tmp, 2:3, mean))
  smrq_models_int[[i]] <- apply(tmp, 2:3, quantile, probs = c(0.025, 0.975))
  rownames(smrq_models_all[[i]]) <- sprintf("tau_%03.02f", (1:99)/(99+1))
}

CI <- smrq_models_int
CI_mod1 = smrq_models_int[[1]]

如果可行,所需的輸出將是,將兩者結合起來,將 CI_mod1 值添加到回歸圖中,得到類似這樣的結果(隨機示例):

在此處輸入圖像描述

非常感謝您的幫助,如果我缺少提供一些信息,請不要猶豫,我會編輯我的帖子。

如評論中所述,您可以使用geom_ribbon()執行此操作,您只需要將 CI 數據與模型系數數據合並。

CIs_to_plot <- map_dfr(1:dim(CI_mod1)[3], ~as_tibble(CI_mod1[,,.x], rownames = "pctle"), 
                       .id = "Quantile") %>% 
  pivot_wider(names_from = "pctle", values_from = c("V1", "V2")) %>% 
  rename("intercept.low" = `V1_2.5%`, "intercept.high" = `V1_97.5%`,
         "julian_day.low" =`V2_2.5%`, "julian_day.high" = `V2_97.5%`) %>% 
  pivot_longer(-Quantile, names_sep = "\\.", names_to = c("beta", ".value")) %>% 
  mutate(Quantile = parse_number(Quantile))
  
model_to_plot <- model1 %>% 
  mutate(Quantile=row_number()) %>% 
  pivot_longer(!Quantile,names_to="beta",values_to = "Coefficient")

model_to_plot %>% 
  left_join(CIs_to_plot, by = c("Quantile", "beta")) %>% 
  ggplot(aes(Quantile,Coefficient)) + 
  geom_ribbon(aes(ymin = low, ymax = high), fill = "grey", alpha = .5) +
  geom_line(aes(color=beta)) +
  facet_wrap(~beta, scales="free_y") +
  theme_minimal()

問題中的圖表,帶有灰色置信帶。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM