简体   繁体   中英

Bayesian Rolling Poisson Regression in Jags (via R2jags)

The Problem

I have a small dataset (N=100). I need to run a Poisson regression, but excluding one observation at a time (hence, a Rolling Poisson Regression).

There are several predictors in the equation, but I care about one (call it bx ). My idea is to see how much bx varies across the 100 models. Then, I'd like to plot these 100 point estimates with the effect sizes on the Y-axis, and the model number in the X-axis.

In summary, I need the following:

  1. Run a Rolling Poisson Regression in JAGS (via R2jags).

  2. After getting the estimates, plot them.

Please note that my Poisson model in JAGS is running fine (below is a sample toy of my model/data). However, I haven't been able to implement the "Rolling" version .

The Self-Contained Example

# clear R
rm(list=ls())
cat("\014")

# load libraries
if (!require("pacman")) install.packages("pacman"); library(pacman) 
p_load(R2jags)


# Toy Data
N <- 100
x <- rnorm(n=N)  # standard Normal predictor
y <- rpois(n=N, lambda = 1)  # Poisson DV


# model
model <- function() {
    ## Likelihood
    for(i in 1:N){
            y[i] ~ dpois(lambda[i])
            log(lambda[i]) <- 
                    mu + # intercept
                    b.x*x[i]
            }
    ## Priors 
    mu ~  dnorm(0, 0.01) ## intercept
    b.x ~ dnorm(0, 0.01)
    }

# list elements
data.list <- list(N = N, y = y, x = x)

# run model
model.fit <- jags(
    data=data.list,
    inits=NULL,
    parameters.to.save = c("b.x"),
    n.chains = 1,
    n.iter = 20,
    n.burnin = 2, 
    model.file=model,
    progress.bar = "none")

Ok. That's the model. In model.fit there is bx , the coefficient I have to get 100 times. With my current code, I am able to obtain it just once, with the full dataset. However, I need to obtain it a second time with the first row of the df excluded, and then a third time but with the second row of the df excluded, and so on and so forth. And then, plot all these bx 's.


Now, just for the sake of the example, I will create a simple table, just to signal that I need the first element (the coefficient of of bx ).

## I sourced this function below from    https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R

# Function to Create Table
mcmctab <- function(sims, ci = .8, digits = 2){

    require(coda) 

    if(class(sims) == "jags" | class(sims) == "rjags"){
            sims <- as.matrix(as.mcmc(sims))
    }
    if(class(sims) == "bugs"){
            sims <- sims$sims.matrix
    }  
    if(class(sims) == "mcmc"){
            sims <- as.matrix(sims)
    }    
    if(class(sims) == "mcmc.list"){
            sims <- as.matrix(sims)
    }      
    if(class(sims) == "stanfit"){
            stan_sims <- rstan::As.mcmc.list(sims)
            sims <- as.matrix(stan_sims)
    }      


    dat <- t(sims)

    mcmctab <- apply(dat, 1,
                     function(x) c(Mean = round(mean(x), digits = digits), # Posterior mean
                                   SD = round(sd(x), digits = 3), # Posterior SD
                                   Lower = as.numeric(
                                           round(quantile(x, probs = c((1 - ci) / 2)), 
                                                 digits = digits)), # Lower CI of posterior
                                   Upper = as.numeric(
                                           round(quantile(x, probs = c((1 + ci) / 2)), 
                                                 digits = digits)), # Upper CI of posterior
                                   Pr. = round(
                                           ifelse(mean(x) > 0, length(x[x > 0]) / length(x),
                                                  length(x[x < 0]) / length(x)), 
                                           digits = digits) # Probability of posterior >/< 0
                     ))
    return(t(mcmctab))
}


# this is the coefficient I need, but with different data frames.
mcmctab(model.fit)[1,1]

Sorry I can't even provide an attempted solution here. Thanks very much in advance.

Use a for-loop or one of the apply family members to exclude one observation at a time:

sims <- lapply(1:100, function(i) {
  data.list <- list(N = N - 1, y = y[-i], x = x[-i])
  # run model
  model.fit <- jags(
      data=data.list,
      inits=NULL,
      parameters.to.save = c("b.x"),
      n.chains = 1,
      n.iter = 20,
      n.burnin = 2, 
      model.file=model,
      progress.bar = "none")
  return(model.fit)
})

You can then extract your quantity of interest by looping through the output:

sapply(sims, function(x) x$BUGSoutput$mean$b.x)
#  [1] -0.018966261 -0.053383364 -0.030193649 -0.097046841 -0.026258934
#  [6] -0.005486296  0.084811315 -0.047736880  0.142379194 -0.026583145
# <snip>
# clear R
rm(list=ls())

# load libraries
library(R2jags)

# Toy Data
set.seed(123) # set RNG seed for reproducibility
N <- 100
x <- rnorm(n=N)  # standard Normal predictor
y <- rpois(n=N, lambda = 1)  # Poisson DV

# model
model <- function() {
  ## Likelihood
  for(i in 1:N){
    y[i] ~ dpois(lambda[i])
    log(lambda[i]) <- 
      mu + # intercept
      b.x*x[i]
  }
  ## Priors 
  mu ~  dnorm(0, 0.01) ## intercept
  b.x ~ dnorm(0, 0.01)
}

# list elements
data.list <- list() # create empty list to fill in next line

# fill list with one data set for each step, with one row excluded per step
for(i in 1:100){
  data.list[[i]] <- list(N = 99, y = y[-i], x = x[-i])
}

# Starting value for reproducibility
model.inits <- function(){
  list("b.x" = 0)
}

# run model
model.fit <- list() # again, create empty list first

for(i in 1:100){  # use loop here to fit one model per data set
model.fit[[i]] <- jags(
  data=data.list[[i]],
  inits=NULL,
  parameters.to.save = c("b.x"),
  n.chains = 1,
  n.iter = 20,
  n.burnin = 2, 
  model.file=model,
  progress.bar = "none")
}


# helper function for output
devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")

# create empty data frame to be filled with estimation results per data set
tab <- data.frame(index = c(1:100), b = rep(NA, 100), lower = rep(NA, 100), upper = rep(NA, 100))

# fill with estimates, using mcmctab to extract mean & lower & upper CIs
for(i in 1:100){
  tab[i, 2] <- mcmctab(model.fit[[i]])[1, 1]
  tab[i, 3] <- mcmctab(model.fit[[i]])[1, 3]
  tab[i, 4] <- mcmctab(model.fit[[i]])[1, 4]
}

# plot results
library(ggplot2)

p <- ggplot(data = tab, aes(x = b, y = index)) + geom_point() + geom_segment(aes(x = lower, xend = upper, yend = index))
p

产量

I thank Johannes Karreth for kindly providing this great answer.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM