从 output Anova（车载包）中提取 P 值列

Question

我正在使用“汽车”package function Anova 进行一些统计测试。

它给出了以下 output：

    Y = cbind(curdata$V1, curdata$V2, curdata$V3)
    mymdl = lm(Y ~ curdata$V4 + curdata$V5)
    myanova = Anova(mymdl)
    
Type II MANOVA Tests: Pillai test statistic
           Df test stat approx F num Df den Df  Pr(>F)  
curdata$V4  1   0.27941   2.9728      3     23 0.05280 .
curdata$V5  1   0.33570   3.8743      3     23 0.02228 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

我想提取 'Pr(>F)' 列中的值，因此我可以将这些 p 值放在另一个矩阵中，以便以后更正多重比较。

我尝试过使用 unlist，但它仍然没有提供在列中找到的 p 值。

对此的任何帮助将不胜感激。

Answer 1

如果我们有多个响应变量，则它是Manova 。 我们可以捕获 output 并使用正则表达式

as.numeric(sub(".*\\s*(\\d+\\.[0-9e-]+)\\s*[*.]*", "\\1", capture.output(out)[4:5]))
#[1] 8.836e-06 2.200e-16

数据

 mymdl <- lm(cbind(Sepal.Length, Sepal.Width) ~ Petal.Width + 
     Petal.Length, data = iris)

 out <- Anova(mymdl)

Answer 2

也许不是最实用的方法，但您可以使用tidyr中的separate()来处理列：

library(car)
library(dplyr)
library(tidyr)
#Code
v1 <- data.frame(capture.output(myanova))
v1 <- v1[3:5,,drop=F]
names(v1)<-'v1'
v2 <- separate(v1,v1,c(paste0('v',1:21)),sep = '\\s')
v2 <- v2[-1,]

Output：

as.numeric(v2$v21)
[1] 8.836e-06 2.200e-16

警告：如果捕获操作中存在更多列，则需要更改1:21 。

Answer 3

TLDR：

# define helper:
get_summary_for_print <- car:::print.Anova.mlm
body(get_summary_for_print) <- local({tmp <- body(get_summary_for_print);tmp[-(length(tmp)-(0:1))]})
#use it:
get_summary_for_print(Anova(mymdl))$`Pr(>F)`

可惜没有指定的方式。 但是您可以查看car:::print.Anova.mlm的来源（通过在R控制台中键入此内容）以了解它如何获得您想要的值：

function (x, ...) 
{
    if ((!is.null(x$singular)) && x$singular) 
        stop("singular error SSP matrix; multivariate tests unavailable\ntry summary(object, multivariate=FALSE)")
    test <- x$test
    repeated <- x$repeated
    ntests <- length(x$terms)
    tests <- matrix(NA, ntests, 4)
    if (!repeated) 
        SSPE.qr <- qr(x$SSPE)
    for (term in 1:ntests) {
        eigs <- Re(eigen(qr.coef(if (repeated) qr(x$SSPE[[term]]) else SSPE.qr, 
            x$SSP[[term]]), symmetric = FALSE)$values)
        tests[term, 1:4] <- switch(test, Pillai = Pillai(eigs, 
            x$df[term], x$error.df), Wilks = Wilks(eigs, x$df[term], 
            x$error.df), `Hotelling-Lawley` = HL(eigs, x$df[term], 
            x$error.df), Roy = Roy(eigs, x$df[term], x$error.df))
    }
    ok <- tests[, 2] >= 0 & tests[, 3] > 0 & tests[, 4] > 0
    ok <- !is.na(ok) & ok
    tests <- cbind(x$df, tests, pf(tests[ok, 2], tests[ok, 3], 
        tests[ok, 4], lower.tail = FALSE))
    rownames(tests) <- x$terms
    colnames(tests) <- c("Df", "test stat", "approx F", "num Df", 
        "den Df", "Pr(>F)")
    tests <- structure(as.data.frame(tests), heading = paste("\nType ", 
        x$type, if (repeated) 
            " Repeated Measures", " MANOVA Tests: ", test, " test statistic", 
        sep = ""), class = c("anova", "data.frame"))
    print(tests, ...)
    invisible(x)
}
<bytecode: 0x56032ea80990>
<environment: namespace:car>

在这种情况下，计算 p 值涉及到相当多的代码行。 但是，我们可以轻松地创建print function 的修改版本来返回表格（ tests ），而不是仅打印它（ print(tests, ...) ）并返回原始 object （ invisible(x) ）：

get_summary_for_print <- car:::print.Anova.mlm # copy the original print function (inclusive environment)
body(get_summary_for_print) <- # replace the code of our copy
    local({ # to avoid pollution of environment by tmp
        tmp <- body(get_summary_for_print) # to avoid code duplication
        tmp[-(length(tmp)-(0:1))] # remove the last two code lines of the function
    })

并像这样使用它：

library(car)
#> Loading required package: carData
res <- Anova(lm(cbind(Sepal.Width, Sepal.Length, Petal.Width) ~ Species + Petal.Length, iris))
res
#> 
#> Type II MANOVA Tests: Pillai test statistic
#>              Df test stat approx F num Df den Df    Pr(>F)    
#> Species       2   0.70215   26.149      6    290 < 2.2e-16 ***
#> Petal.Length  1   0.63487   83.461      3    144 < 2.2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(get_summary_for_print(res))
#> Classes 'anova' and 'data.frame':    2 obs. of  6 variables:
#>  $ Df       : num  2 1
#>  $ test stat: num  0.702 0.635
#>  $ approx F : num  26.1 83.5
#>  $ num Df   : num  6 3
#>  $ den Df   : num  290 144
#>  $ Pr(>F)   : num  7.96e-25 2.41e-31
#>  - attr(*, "heading")= chr "\nType II MANOVA Tests: Pillai test statistic"

从 output Anova（车载包）中提取 P 值列

问题描述

3 个解决方案

解决方案1
2 已采纳 2020-12-05 20:26:09

数据

解决方案2
1 2020-12-05 21:14:43

解决方案3
0 2021-03-09 16:26:17

从 output Anova（车载包）中提取 P 值列

问题描述

3 个解决方案

解决方案1 2 已采纳 2020-12-05 20:26:09

数据

解决方案2 1 2020-12-05 21:14:43

解决方案3 0 2021-03-09 16:26:17

解决方案1
2 已采纳 2020-12-05 20:26:09

解决方案2
1 2020-12-05 21:14:43

解决方案3
0 2021-03-09 16:26:17