[英]Fit a generalized linear model (glm) with a categorical variable of month using the function monthglm() in R
问题
我有一个数据框(见下文),我想根据季节和年份的协变量使用 function monthglm() 来拟合带有月份分类变量的一般线性 model (glm)。
在我运行以下 function 之后,该文档由Barnett, AG, Dobson, AJ (2010) Analyzing Seasonal Health Data 编写。 施普林格。 (见下文),我不断收到此错误消息。
如果有人可以提供帮助,我将不胜感激。
加载包
library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)
Function:
monthglm<-function(formula,data,family=gaussian(),refmonth=1,
monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
## checks
if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
## original call with defaults (see amer package)
ans <- as.list(match.call())
frmls <- formals(deparse(ans[[1]]))
add <- which(!(names(frmls) %in% names(ans)))
call<-as.call(c(ans, frmls[add]))
monthvar=with(data,get(monthvar))
cmonthvar=class(monthvar)
## If month is a character, create the numbers
if(cmonthvar%in%c('factor','character')){
if(cmonthvar=='character'){
if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
monthvar=factor(monthvar,levels=mlevels)
}
months=as.numeric(monthvar)
data$month=months # add to data for flagleap
months=as.factor(months)
levels(months)[months]<-month.abb[months]
months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
}
## Transform month numbers to names
if(cmonthvar%in%c('integer','numeric')){
months.u<-as.factor(monthvar)
nums<-as.numeric(nochars(levels(months.u))) # Month numbers
levels(months.u)[nums]<-month.abb[nums]
months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
}
## prepare data/formula
parts<-paste(formula)
f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
dep<-parts[2] # dependent variable
days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
l<-nrow(data)
if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} #
if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
### data$off<-log(poff*moff)
off<-log(poff*moff) #
fit<-glm(formula=f,data=data,family=family,offset=off)
## return
toret<-list()
toret$call<-call
toret$glm<-fit
toret$fitted.values<-fitted(fit)
toret$residuals<-residuals(fit)
class(toret)<-'monthglm'
return(toret)
}
#The levels of a factor must match the observed values.
#If you want to change how those values print out, you need to change the labels.
错误信息
model<-monthglm(formula=Frequency_Blue~Year+Monsoon_Season, family=gaussian,
+ offsetmonth=TRUE, refmonth=1, data=Final_New_Blue)
Error in nochars(levels(months.u)) : could not find function "nochars"
Dataframe
structure(list(Year = c(2015L, 2016L, 2017L, 2015L, 2016L, 2017L,
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L,
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L,
2015L, 2016L, 2017L, 2015L, 2016L, 2017L, 2015L, 2016L, 2017L,
2015L, 2016L, 2017L), Month = structure(c(5L, 5L, 5L, 4L, 4L,
4L, 8L, 8L, 8L, 1L, 1L, 1L, 9L, 9L, 9L, 7L, 7L, 7L, 6L, 6L, 6L,
2L, 2L, 2L, 12L, 12L, 12L, 11L, 11L, 11L, 10L, 10L, 10L, 3L,
3L, 3L), .Label = c("April", "August", "December", "Feb", "Jan",
"July", "June", "Mar", "May", "November", "October", "September"
), class = "factor"), Frequency_Blue_Whales_Year_Month = c(76L,
78L, 66L, 28L, 54L, 37L, 39L, 31L, 88L, 46L, 23L, 54L, 5L, 8L,
0L, 0L, 0L, 0L, 0L, 4L, 7L, 22L, 6L, 44L, 10L, 30L, 35L, 88L,
41L, 35L, 4L, 30L, 43L, 65L, 43L, 90L), Season = structure(c(4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
5L, 5L, 5L), .Label = c("Autumn", "Spring", "Summer", "winter",
"Winter"), class = "factor")), class = "data.frame", row.names = c(NA,
-36L))
Arguments
通过对代码和您的 function 调用进行以下简单更改,我能够让 model 运行。 我将它命名为monthglm2
,以将其与package function区分开来。 调用您的数据df
:
library(season)
library(MASS) # for mvrnorm
library(survival) # for coxph
library(ggplot2)
monthglm2<-function(formula,data,family=gaussian(),refmonth=1,
monthvar='month',offsetmonth=FALSE,offsetpop=NULL){
## checks
if (refmonth<1|refmonth>12){stop("Reference month must be between 1 and 12")}
## original call with defaults (see amer package)
ans <- as.list(match.call())
frmls <- formals(deparse(ans[[1]]))
add <- which(!(names(frmls) %in% names(ans)))
call<-as.call(c(ans, frmls[add]))
monthvar=with(data,get(monthvar))
cmonthvar=class(monthvar)
## If month is a character, create the numbers
if(cmonthvar%in%c('factor','character')){
if(cmonthvar=='character'){
if(max(nchar(monthvar))==3){mlevels=substr(month.name,1,3)}else{mlevels=month.name}
monthvar=factor(monthvar,levels=mlevels)
}
months=as.numeric(monthvar)
data$month=months # add to data for flagleap
months=as.factor(months)
levels(months)[months]<-month.abb[months]
months<-relevel(months,ref=month.abb[refmonth]) # set reference month ### TYPO HERE, changed from months.u
}
## Transform month numbers to names
if(cmonthvar%in%c('integer','numeric')){
months.u<-as.factor(monthvar)
nums<-as.numeric(nochars(levels(months.u))) # Month numbers
levels(months.u)[nums]<-month.abb[nums]
months<-relevel(months.u,ref=month.abb[refmonth]) # set reference month
}
## prepare data/formula
parts<-paste(formula)
f<-as.formula(paste(parts[2],parts[1],parts[3:length(formula)],'+months'))
dep<-parts[2] # dependent variable
days<-flagleap(data=data,report=FALSE,matchin=T) # get the number of days in each month
l<-nrow(data)
if(is.null(offsetpop)==FALSE){poff=with(data,eval(offsetpop))} else{poff=rep(1,l)} #
if(offsetmonth==TRUE){moff=days$ndaysmonth/(365.25/12)} else{moff=rep(1,l)} # days per month divided by average month length
### data$off<-log(poff*moff)
off<-log(poff*moff) #
fit<-glm(formula=f,data=data,family=family,offset=off)
## return
toret<-list()
toret$call<-call
toret$glm<-fit
toret$fitted.values<-fitted(fit)
toret$residuals<-residuals(fit)
class(toret)<-'monthglm'
return(toret)
}
df$year <- df$Year
monthglm2(formula=Frequency_Blue_Whales_Year_Month~Year+Season, family=gaussian(), offsetmonth=TRUE, monthvar='Month', refmonth=1, data=df)
function 中存在另一个问题,我必须将列重命名为year
。 如果您查看此package的 github,则只有一个贡献者,没有提出任何问题。 使用这样的包有利有弊:它们可能有有用的新方法,但无法快速识别和解决错误。 如果您继续进行季节性分析,我建议您尝试学习如何直接在glm
中包含季节性建模
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.