[英]Tabulate coefficients from lm
我有10個線性模型,僅需要一些信息,即:r平方,p值,斜率和截距系數。 我設法提取了這些值(通過荒謬地重復代碼)。 現在,我需要將這些值制成表格(列中的信息;行列出了線性模型1-10的結果)。 誰能幫幫我嗎? 我還有數百個線性模型要做。 我敢肯定一定有辦法。
碼:
d<-read.csv("example.csv",header=T)
# Subset data
A3G1 <- subset(d, CatChro=="A3G1"); A4G1 <- subset(d, CatChro=="A4G1")
A3G2 <- subset(d, CatChro=="A3G2"); A4G2 <- subset(d, CatChro=="A4G2")
A3G3 <- subset(d, CatChro=="A3G3"); A4G3 <- subset(d, CatChro=="A4G3")
A3G4 <- subset(d, CatChro=="A3G4"); A4G4 <- subset(d, CatChro=="A4G4")
A3G5 <- subset(d, CatChro=="A3G5"); A4G5 <- subset(d, CatChro=="A4G5")
A3D1 <- subset(d, CatChro=="A3D1"); A4D1 <- subset(d, CatChro=="A4D1")
A3D2 <- subset(d, CatChro=="A3D2"); A4D2 <- subset(d, CatChro=="A4D2")
A3D3 <- subset(d, CatChro=="A3D3"); A4D3 <- subset(d, CatChro=="A4D3")
A3D4 <- subset(d, CatChro=="A3D4"); A4D4 <- subset(d, CatChro=="A4D4")
A3D5 <- subset(d, CatChro=="A3D5"); A4D5 <- subset(d, CatChro=="A4D5")
# Fit individual lines
rA3G1 <- lm(Qend~Rainfall, data=A3G1); summary(rA3G1)
rA3D1 <- lm(Qend~Rainfall, data=A3D1); summary(rA3D1)
rA3G2 <- lm(Qend~Rainfall, data=A3G2); summary(rA3G2)
rA3D2 <- lm(Qend~Rainfall, data=A3D2); summary(rA3D2)
rA3G3 <- lm(Qend~Rainfall, data=A3G3); summary(rA3G3)
rA3D3 <- lm(Qend~Rainfall, data=A3D3); summary(rA3D3)
rA3G4 <- lm(Qend~Rainfall, data=A3G4); summary(rA3G4)
rA3D4 <- lm(Qend~Rainfall, data=A3D4); summary(rA3D4)
rA3G5 <- lm(Qend~Rainfall, data=A3G5); summary(rA3G5)
rA3D5 <- lm(Qend~Rainfall, data=A3D5); summary(rA3D5)
rA4G1 <- lm(Qend~Rainfall, data=A4G1); summary(rA4G1)
rA4D1 <- lm(Qend~Rainfall, data=A4D1); summary(rA4D1)
rA4G2 <- lm(Qend~Rainfall, data=A4G2); summary(rA4G2)
rA4D2 <- lm(Qend~Rainfall, data=A4D2); summary(rA4D2)
rA4G3 <- lm(Qend~Rainfall, data=A4G3); summary(rA4G3)
rA4D3 <- lm(Qend~Rainfall, data=A4D3); summary(rA4D3)
rA4G4 <- lm(Qend~Rainfall, data=A4G4); summary(rA4G4)
rA4D4 <- lm(Qend~Rainfall, data=A4D4); summary(rA4D4)
rA4G5 <- lm(Qend~Rainfall, data=A4G5); summary(rA4G5)
rA4D5 <- lm(Qend~Rainfall, data=A4D5); summary(rA4D5)
# Gradient
summary(rA3G1)$coefficients[2,1]
summary(rA3D1)$coefficients[2,1]
summary(rA3G2)$coefficients[2,1]
summary(rA3D2)$coefficients[2,1]
summary(rA3G3)$coefficients[2,1]
summary(rA3D3)$coefficients[2,1]
summary(rA3G4)$coefficients[2,1]
summary(rA3D4)$coefficients[2,1]
summary(rA3G5)$coefficients[2,1]
summary(rA3D5)$coefficients[2,1]
# Intercept
summary(rA3G1)$coefficients[2,2]
summary(rA3D1)$coefficients[2,2]
summary(rA3G2)$coefficients[2,2]
summary(rA3D2)$coefficients[2,2]
summary(rA3G3)$coefficients[2,2]
summary(rA3D3)$coefficients[2,2]
summary(rA3G4)$coefficients[2,2]
summary(rA3D4)$coefficients[2,2]
summary(rA3G5)$coefficients[2,2]
summary(rA3D5)$coefficients[2,2]
# r-sq
summary(rA3G1)$r.squared
summary(rA3D1)$r.squared
summary(rA3G2)$r.squared
summary(rA3D2)$r.squared
summary(rA3G3)$r.squared
summary(rA3D3)$r.squared
summary(rA3G4)$r.squared
summary(rA3D4)$r.squared
summary(rA3G5)$r.squared
summary(rA3D5)$r.squared
# adj r-sq
summary(rA3G1)$adj.r.squared
summary(rA3D1)$adj.r.squared
summary(rA3G2)$adj.r.squared
summary(rA3D2)$adj.r.squared
summary(rA3G3)$adj.r.squared
summary(rA3D3)$adj.r.squared
summary(rA3G4)$adj.r.squared
summary(rA3D4)$adj.r.squared
summary(rA3G5)$adj.r.squared
summary(rA3D5)$adj.r.squared
# p-level
p <- summary(rA3G1)$fstatistic
pf(p[1], p[2], p[3], lower.tail=FALSE)
p2 <- summary(rA3D1)$fstatistic
pf(p2[1], p2[2], p2[3], lower.tail=FALSE)
p3 <- summary(rA3G2)$fstatistic
pf(p3[1], p3[2], p3[3], lower.tail=FALSE)
p4 <- summary(rA3D2)$fstatistic
pf(p4[1], p4[2], p4[3], lower.tail=FALSE)
p5 <- summary(rA3G3)$fstatistic
pf(p5[1], p5[2], p5[3], lower.tail=FALSE)
p6 <- summary(rA3D3)$fstatistic
pf(p6[1], p6[2], p6[3], lower.tail=FALSE)
p7 <- summary(rA3G4)$fstatistic
pf(p7[1], p7[2], p7[3], lower.tail=FALSE)
p8 <- summary(rA3D4)$fstatistic
pf(p8[1], p8[2], p8[3], lower.tail=FALSE)
p9 <- summary(rA3G5)$fstatistic
pf(p9[1], p9[2], p9[3], lower.tail=FALSE)
p10 <- summary(rA3D5)$fstatistic
pf(p10[1], p10[2], p10[3], lower.tail=FALSE)
這是我預期結果的結構:
有什么辦法可以做到這一點?
使用library(data.table)
可以
d <- fread("example.csv")
d[, .(
r2 = (fit <- summary(lm(Qend~Rainfall)))$r.squared,
adj.r2 = fit$adj.r.squared,
intercept = fit$coefficients[1,1],
gradient = fit$coefficients[2,1],
p.value = {p <- fit$fstatistic; pf(p[1], p[2], p[3], lower.tail=FALSE)}),
by = CatChro]
# CatChro r2 adj.r2 intercept gradient p.value
# 1: A3G1 0.03627553 0.011564648 0.024432020 0.0001147645 0.2329519751
# 2: A3D1 0.28069553 0.254054622 0.011876543 0.0004085644 0.0031181110
# 3: A3G2 0.06449971 0.041112205 0.026079409 0.0004583538 0.1045970987
# 4: A3D2 0.03384173 0.005425311 0.023601325 0.0005431693 0.2828170556
# 5: A3G3 0.07587433 0.054383038 0.043537869 0.0006964512 0.0670399684
# 6: A3D3 0.04285322 0.002972105 0.022106960 0.0001451185 0.3102578215
# 7: A3G4 0.17337420 0.155404076 0.023706652 0.0006442175 0.0032431299
# 8: A3D4 0.37219027 0.349768492 0.009301843 0.0006614213 0.0003442445
# 9: A3G5 0.17227383 0.150491566 0.025994831 0.0006658466 0.0077413595
#10: A3D5 0.04411669 -0.008987936 0.014341399 0.0001084626 0.3741011769
考慮建立lm
結果矩陣。 首先創建一個定義的函數,以處理帶有結果提取的通用數據框架模型。 然后,調用by
其可以通過一個因子列子集數據幀,並通過每個子集成定義的方法。 最后,將所有分組的矩陣rbind
在一起以獲得奇異的輸出
lm_results <- function(df) {
model <- lm(Qend ~ Rainfall, data = df)
res <- summary(model)
p <- res$fstatistic
c(gradient = res$coefficients[2,1],
intercept = res$coefficients[2,2],
r_sq = res$r.squared,
adj_r_sq = res$adj.r.squared,
f_stat = p[['value']],
p_value = unname(pf(p[1], p[2], p[3], lower.tail=FALSE))
)
}
matrix_list <- by(d, d$group, lm_results)
final_matrix <- do.call(rbind, matrix_list)
演示隨機的種子數據
set.seed(12262018)
data_tools <- c("sas", "stata", "spss", "python", "r", "julia")
d <- data.frame(
group = sample(data_tools, 500, replace=TRUE),
int = sample(1:15, 500, replace=TRUE),
Qend = rnorm(500) / 100,
Rainfall = rnorm(500) * 10
)
結果
mat_list <- by(d, d$group, lm_results)
final_matrix <- do.call(rbind, mat_list)
final_matrix
# gradient intercept r_sq adj_r_sq f_stat p_value
# julia -1.407313e-04 1.203832e-04 0.017219149 0.004619395 1.3666258 0.24595273
# python -1.438116e-04 1.125170e-04 0.018641512 0.007230367 1.6336233 0.20464162
# r 2.031717e-04 1.168037e-04 0.041432175 0.027738349 3.0256098 0.08635510
# sas -1.549510e-04 9.067337e-05 0.032476668 0.021355710 2.9203121 0.09103619
# spss 9.326656e-05 1.068516e-04 0.008583473 -0.002682623 0.7618853 0.38511469
# stata -7.079514e-05 1.024010e-04 0.006013841 -0.006568262 0.4779679 0.49137093
這是基本的R解決方案:
data <- read.csv("./data/so53933238.csv",header=TRUE)
# split by value of CatChro into a list of datasets
dataList <- split(data,data$CatChro)
# process the list with lm(), extract results to a data frame, write to a list
lmResults <- lapply(dataList,function(x){
y <- summary(lm(Qend ~ Rainfall,data = x))
Intercept <- y$coefficients[1,1]
Slope <- y$coefficients[2,1]
rSquared <- y$r.squared
adjRSquared <- y$adj.r.squared
f <- y$fstatistic[1]
pValue <- pf(y$fstatistic[1],y$fstatistic[2],y$fstatistic[3],lower.tail = FALSE)
data.frame(Slope,Intercept,rSquared,adjRSquared,pValue)
})
lmResultTable <- do.call(rbind,lmResults)
# add CatChro indicators
lmResultTable$catChro <- names(dataList)
lmResultTable
...以及輸出:
> lmResultTable
Slope Intercept rSquared adjRSquared pValue catChro
A3D1 0.0004085644 0.011876543 0.28069553 0.254054622 0.0031181110 A3D1
A3D2 0.0005431693 0.023601325 0.03384173 0.005425311 0.2828170556 A3D2
A3D3 0.0001451185 0.022106960 0.04285322 0.002972105 0.3102578215 A3D3
A3D4 0.0006614213 0.009301843 0.37219027 0.349768492 0.0003442445 A3D4
A3D5 0.0001084626 0.014341399 0.04411669 -0.008987936 0.3741011769 A3D5
A3G1 0.0001147645 0.024432020 0.03627553 0.011564648 0.2329519751 A3G1
A3G2 0.0004583538 0.026079409 0.06449971 0.041112205 0.1045970987 A3G2
A3G3 0.0006964512 0.043537869 0.07587433 0.054383038 0.0670399684 A3G3
A3G4 0.0006442175 0.023706652 0.17337420 0.155404076 0.0032431299 A3G4
A3G5 0.0006658466 0.025994831 0.17227383 0.150491566 0.0077413595 A3G5
>
要以HTML表格格式呈現輸出,可以使用knitr::kable()
。
library(knitr)
kable(lmResultTable[1:5],row.names=TRUE,digits=5)
...在渲染Markdown之后產生以下輸出:
這里僅幾行:
library(tidyverse)
library(broom)
# create grouped dataframe:
df_g <- df %>% group_by(CatChro)
df_g %>% do(tidy(lm(Qend ~ Rainfall, data = .))) %>%
select(CatChro, term, estimate) %>% spread(term, estimate) %>%
left_join(df_g %>% do(glance(lm(Qend ~ Rainfall, data = .))) %>%
select(CatChro, r.squared, adj.r.squared, p.value), by = "CatChro")
結果將是:
# A tibble: 10 x 6
# Groups: CatChro [?]
CatChro `(Intercept)` Rainfall r.squared adj.r.squared p.value
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 A3D1 0.0119 0.000409 0.281 0.254 0.00312
2 A3D2 0.0236 0.000543 0.0338 0.00543 0.283
3 A3D3 0.0221 0.000145 0.0429 0.00297 0.310
4 A3D4 0.00930 0.000661 0.372 0.350 0.000344
5 A3D5 0.0143 0.000108 0.0441 -0.00899 0.374
6 A3G1 0.0244 0.000115 0.0363 0.0116 0.233
7 A3G2 0.0261 0.000458 0.0645 0.0411 0.105
8 A3G3 0.0435 0.000696 0.0759 0.0544 0.0670
9 A3G4 0.0237 0.000644 0.173 0.155 0.00324
10 A3G5 0.0260 0.000666 0.172 0.150 0.00774
下面的代碼創建一個具有所有系數和相應統計信息的數據幀(整潔地將lm的結果轉換為數據幀):
df_g %>%
do(tidy(lm(Qend ~ Rainfall, data = .)))
A tibble: 20 x 6
Groups: CatChro [10]
CatChro term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 A3D1 (Intercept) 0.0119 0.00358 3.32 0.00258
2 A3D1 Rainfall 0.000409 0.000126 3.25 0.00312
3 A3D2 (Intercept) 0.0236 0.00928 2.54 0.0157
4 A3D2 Rainfall 0.000543 0.000498 1.09 0.283
我知道您希望將截距和降雨的系數作為單獨的列,所以讓我們“分布”它們。 這是通過首先選擇相關列,然后調用tidyr::spread
,如
select(CatChro, term, estimate) %>% spread(term, estimate)
這給您:
df_g %>% do(tidy(lm(Qend ~ Rainfall, data = .))) %>%
select(CatChro, term, estimate) %>% spread(term, estimate)
A tibble: 10 x 3
Groups: CatChro [10]
CatChro `(Intercept)` Rainfall
<chr> <dbl> <dbl>
1 A3D1 0.0119 0.000409
2 A3D2 0.0236 0.000543
3 A3D3 0.0221 0.000145
4 A3D4 0.00930 0.000661
Glance為您提供了每個模型所需的摘要統計信息。 這些模型按組(在此處為CatChro)建立索引,因此很容易將它們合並到先前的數據幀中,這就是其余代碼的含義。
另一個解決方案,使用lme4::lmList
。 lmList
生成的對象的summary()
方法幾乎可以完成您想要的所有操作(盡管它不存儲p值,但是我必須在下面添加一些內容)。
m <- lme4::lmList(Qend~Rainfall|CatChro,data=d)
s <- summary(m)
pvals <- apply(s$fstatistic,1,function(x) pf(x[1],x[2],x[3],lower.tail=FALSE))
data.frame(intercept=coef(s)[,"Estimate","(Intercept)"],
slope=coef(s)[,"Estimate","Rainfall"],
r.squared=s$r.squared,
adj.r.squared=unlist(s$adj.r.squared),
p.value=pvals)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.