简体   繁体   中英

Estimating logsitic parameters and random effects with nlme

I have managed to fit logistic curves to fit growth models for 129 fish belonging to 3 groups. Unfortunately the parameters I got were not consistent and very often the models I tried have crashed. Therefore I've simulated a data set on which I've tried to fit these parameters and to add a random effect to handle the individual vaiability. I must have missed something with nlme as I was either able to get consistent coefficients or consitent variance estimation but not the two.

set.seed(100)
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469, 
68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426, 
32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
)))
# one curve for each individual
nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))

for (i in 1:sum(nl)) {
  auxi <- tab$Individu %in% i
  sec <- unique(tab$group[auxi])
  Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
  xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
  scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
  crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
  tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)

Once I got this data set I tried the following model :

# Initialising coefficients
cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- aggregate(. ~ fac, cbind(cfs, fac = rep(levels(tab$group), nl)), mean)
debt <- lapply(cfs[-1], function(x) c(x[1], x[-1]-x[1]))
debt <- unlist(debt)
# control arguments
lmc <- lmeControl(1e3, 1e3, niterEM=200, msMaxEval = 1e3) 
# logistic model for each group
nlme(Li ~  Asym/(1+exp((xmid-Time)/scal)), data = tab,
         fixed =  Asym + xmid + scal ~ group,
         random = Asym + xmid + scal ~  1|Individual ,
         start = debt,
         control = lmc)

And I got the following message : "Error in nlme.formula(Li ~ Asym/(1 + exp((xmid - Time)/scal)), data = tab, : step halving factor reduced below minimum in PNLS step"

I have tried many different formulas and I was not able to get coefficients and random effects estimations.

Regards, Maxime

Well I didin't find a satisfying answer to this issue. I've tried ADMB, but I've encountered different issues, either I was not able to code this model or I was not able to compile the .ptl file.

I finally used jags to dot it, with the library R2jags.

I hope it could be useful for someone else :

# the code of the bayesian model stored in the file "growth.txt"
model {
for (i in 1:K) {
  for (j in 1:n) {
    Y[j, i] ~ dnorm(eta[j, i], tauC)
    eta[j, i] <- phi1[i] / (1 + exp(-(x[j]-phi2[i])/phi3[i]))
    }
  ## random effect of iˆth tree
  phi1[i] <- mu1 + a2*gr2[i] + a3*gr3[i] + a[i]   
  a[i] ~ dnorm(0, tau1)
  phi2[i] <- mu2 + b2*gr2[i] + b3*gr3[i] + b[i]   
  b[i] ~ dnorm(0, tau2)
  phi3[i] <- mu3 + c2*gr2[i] + c3*gr3[i] + c[i]   
  c[i] ~ dnorm(0, tau3)
  }
## priors
tauC ~ dgamma(1.0E-3, 1.0E-3)
logSigma <- -0.5*log(tauC)
logSigmaA <- -0.5*log(tau1)
logSigmaB <- -0.5*log(tau2)
logSigmaC <- -0.5*log(tau3)
mu1 ~ dnorm(0, 1.0E-4)
mu2 ~ dnorm(0, 1.0E-4)
mu3 ~ dnorm(0, 1.0E-4)
a2 ~ dnorm(0, 1.0E-4)
a3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
b2 ~ dnorm(0, 1.0E-4)
b3 ~ dnorm(0, 1.0E-4)
c2 ~ dnorm(0, 1.0E-4)
c3 ~ dnorm(0, 1.0E-4)
tau1 ~ dgamma(1.0E-3, 1.0E-3)
tau2 ~ dgamma(1.0E-3, 1.0E-3)
tau3 ~ dgamma(1.0E-3, 1.0E-3)
}

And the associated R lines :

# 
library(tidyr)
tabw <- spread(tab[-4], Individual, Li,-2, drop = TRUE)

x <- tabw[,1] # Time

# each Individual belong to one of the three groups
grs <- unique(tab[c(1,4)])
grs <- grs$group[match(colnames(tabw)[-1], grs$Individual)]

# dummy variable 
gr2 <- (grs %in% "gr2")*1
gr3 <- (grs %in% "gr3")*1
BUGSData<-list(n = length(x), K = ncol(tabw)-1, x = tabw[,1], Y = tabw[,-1], gr2 = gr2, gr3 = gr3)


cfs <- coef(nlsList(Li ~ SSlogis(Time, Asym, xmid, scal)|Individual, data = tab))
cfs <- cbind(cfs, gr = grs) %>% group_by(gr) %>% summarise_all(funs(mean, sd))

cfs <- cfs %>% mutate(Asym_mean = Asym_mean-Asym_mean[1]*0^((1:n())==1),
                      xmid_mean = xmid_mean-xmid_mean[1]*0^((1:n())==1),
                      scal_mean = scal_mean-scal_mean[1]*0^((1:n())==1))

debt <- c(unlist(cfs[2:4]), cfs %>% select(ends_with("sd")) %>% colMeans())
names(debt) <- c("mu1", "a2", "a3", "mu2", "b2", "b3", "mu3", "c2", "c3", "tau1", "tau2", "tau3")
debt <- as.list(debt)

set.seed(1001) ## set RNG seed for R
inits<-c(debt, tauC = 0.1, 
            .RNG.name="base::Wichmann-Hill", ## set RNG seed/type for JAGS
            .RNG.seed=round(runif(1)*1000000))

tfit_jags <- jags(model="growth.txt",
                  data=BUGSData,
                  parameters.to.save= c(names(debt), 
                                        "logSigma", "logSigmaA", "logSigmaB", "logSigmaC",
                                        "phi1", "phi2", "phi3"),
                  n.chains=1,
                  inits=list(inits),
                  progress.bar="none",
                  n.iter =  2e3, # 1e6
                  n.burnin = 1e3 # 1e5,
                  ) # n.thin = 1e3

Finally, I dit it also with ADMB and the library R2 admb with the following code. This code was freely adapted from the Orange example that can be found here : https://github.com/admb-project/admb-examples/tree/master/growth-models/orange-trees

The code for growth6.tpl file :

DATA_SECTION

  init_int n            // Number of data points
  init_vector y(1,n)        // Response vector
  init_vector t(1,n)        // Primary covariate
  init_int M            // Number of groups     
  init_vector ngroup(1,M)   // Group indicator
  init_int m            // Number of parameters in nonlinear regression model
  init_vector gr2(1,M)  // dummy variable for being in group 2
  init_vector gr3(1,M)  // dummy variable for being in group 3  

PARAMETER_SECTION

  init_bounded_vector beta(1,3,-40,40,1)        // Fixed effects parameters
  init_bounded_number log_sigma(-5,5.0,1)   // log(residual variance)
  init_bounded_number log_sigma_u(-10,5,2)      // 0.5*log(variance component)
  init_bounded_number log_sigma_v(-10,5,3)      // 0.5*log(variance component)
  init_bounded_number log_sigma_w(-10,5,4)      // 0.5*log(variance component)
  init_bounded_vector beta2(1,3,-40,40,1)  // Fixed effects for group 2
  init_bounded_vector beta3(1,3,-40,40,1)  // Fixed effects for group 3

  random_effects_vector u(1,M,2)            // Unscaled random effects
  random_effects_vector v(1,M,3)
  random_effects_vector w(1,M,3)
  objective_function_value g

PRELIMINARY_CALCS_SECTION
  cout << setprecision(4); // 

GLOBALS_SECTION

  #include <df1b2fun.h>
  //#include <fvar.hpp>

PROCEDURE_SECTION

  int i,ii,iii;

  g = 0.0;

  ii = 0;
  iii = 0;
  for(i=1;i<=(int) M;i++) // loop on individuals
  {
    fit_individual_tree(beta(1),beta(2),beta(3),beta2(1),beta2(2),beta2(3),beta3(1),beta3(2),beta3(3),u(i),v(i),w(i),i,ii,iii,log_sigma,log_sigma_u,log_sigma_v,log_sigma_w);
  }

SEPARABLE_FUNCTION void fit_individual_tree(const dvariable& beta1,const dvariable& beta2,const dvariable& beta3,const dvariable& a1,const dvariable& a2,const dvariable& a3,const dvariable& b1,const dvariable& b2,const dvariable& b3,const dvariable& u1,const dvariable& v1,const dvariable& w1,int i,int& ii,int& iii,const dvariable& log_sigma,const dvariable& log_sigma_u,const dvariable& log_sigma_v,const dvariable& log_sigma_w)
    int j;
    int g1;
    int g2;
    int g3;

    iii++;
    dvar_vector a(1,3);             // Basic model function parameters

    g2 = gr2(iii);
    g3 = gr3(iii);
    g1 = 1-g2-g3;

    a(1) = 62.26 + beta1*g1 + a1*g2 + b1*g3 + u1;           
    a(2) = 72.90 + beta2*g1 + a2*g2 + b2*g3 + v1;
    a(3) = 31.35 + beta3*g1 + a3*g2 + b3*g3 + w1;

    dvariable tmp, f;
    dvariable sigma = mfexp(log_sigma);

    // Random effects contribution
    g -= -(log_sigma_u);
    g -= -.5*(square(u1/mfexp(log_sigma_u)));
    g -= -(log_sigma_v);
    g -= -.5*(square(v1/mfexp(log_sigma_v)));
    g -= -(log_sigma_w);
    g -= -.5*(square(w1/mfexp(log_sigma_w)));

    for(j=1;j<=ngroup(i);j++)
    {
      g -= -log_sigma;
      ii++;
      f = a(1)/(1+mfexp(-(t(ii)-a(2))/a(3)));
      tmp = y(ii) - f;
      tmp /= sigma;
      g -= -0.5*tmp*tmp;
    }

REPORT_SECTION
  //report << beta0+beta << endl;
  report << exp(log_sigma) << endl;
  report << exp(log_sigma_u) << endl;

TOP_OF_MAIN_SECTION
  arrmblsize = 40000000L;
  gradient_structure::set_GRADSTACK_BUFFER_SIZE(300000000);
  gradient_structure::set_CMPDIF_BUFFER_SIZE(20000000);
  gradient_structure::set_MAX_NVAR_OFFSET(1000000);

Then the R code to estimate parameters :

library(dplyr)
library(tidyr)
library(nlme)
library(R2admb)

set.seed(100)
# coefficients for each group
# coefficients for each group
cf <- structure(c(58.8007098743483, 68.9526514961022, 75.7517805503469, 
                  68.2111807884739, 79.0803042994813, 75.2743397284317, 29.8661527230426, 
                  32.7502759832602, 30.7439702116961), .Dim = c(3L, 3L), .Dimnames = list(
                    c("gr1", "gr2", "gr3"), c("Asym_mean", "xmid_mean", "scal_mean"
                    )))

nl <- c(68, 38, 23)
Time <- 1:130
tab <- expand.grid(Individual = 1:sum(nl), Time = Time)
tab <- tab[do.call(order, tab),]
tab$Li <- numeric(nrow(tab))
tab$group <- factor(rep(c("gr1", "gr2", "gr3"), nl*130))

for (i in 1:sum(nl)) {
  auxi <- tab$Individu %in% i
  sec <- unique(tab$group[auxi])
  Asym1 <- rnorm(1, cf[sec, "Asym_mean"], 13)
  xmid1 <- rnorm(1, cf[sec, "xmid_mean"], 15)
  scal1 <- rnorm(1, cf[sec, "scal_mean"], 4.6)
  crois <- sort(SSlogis(Time, Asym1, xmid1, scal1) + rnorm(130, 0, 0.3))
  tab$Li[auxi] <- crois
}
tab$Individual <- factor(tab$Individual)

grs <- unique(tab[c("Individual", "group")])
gr2 <- as.integer((grs$group == "gr2")*1)
gr3 <- as.integer((grs$group == "gr3")*1)

do_admb("growth6",
        data = 
          list(n = nrow(tab), y = tab$Li, t = tab$Time, M = 129, ngroup = rep(130, 129), m=3,
               gr2 = gr2, gr3 = gr3),
        params = 
          list(beta = rep(0, 3), 
               log_sigma = 1, log_sigma_u = 1, log_sigma_v = 1, log_sigma_w = 1,
               beta2 = rep(0, 3), beta3 = rep(0, 3),
               u = rep(0, 129), v = rep(0, 129), w = rep(0, 129)),
        run.opts = run.control(clean_files = "none")
)

ted <- read_admb("growth6")
cfe <- matrix(coef(ted)[grep("beta", names(coef(ted)))]+c(62.26, 72.90, 31.35), 3)
rownames(cfe) <- sprintf("phi%d", 1:3)
colnames(cfe) <- sprintf("gr%d", 1:3)

# we can compare with
coef(nlsList(Li ~ SSlogis(Time, phi1, phi2, phi3)|group, tab))

I hope this could help someone-else. Max

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM