简体   繁体   English

如何解释 R 中 stats::contrasts 的 output

[英]How to interpret the output of stats::contrasts in R

I have an ordered factor column Severity with four levels: "Low"<"Mild"<"High"<"Extreme".我有一个具有四个级别的有序因子列Severity :“Low”<“Mild”<“High”<“Extreme”。 When I ran the code contrasts(df$Severity) it produced the following output:当我运行代码contrasts(df$Severity)时,它产生了以下 output:

             .L   .Q         .C
[1,] -0.6708204  0.5 -0.2236068
[2,] -0.2236068 -0.5  0.6708204
[3,]  0.2236068 -0.5 -0.6708204
[4,]  0.6708204  0.5  0.2236068

What did that matrix mean, in particular the names of the rows [1,], [2,]... and the columns.L, .Q, .C and the values -0.6708204, 0.5 etc?该矩阵是什么意思,特别是行 [1,]、[2,]... 和列的名称.L、.Q、.C 以及值 -0.6708204、0.5 等?

It is just that the ordered column is called by contr.poly if we check the source code of contrasts只是查看contrasts的源码发现有contr.poly ordered

> contrasts
function (x, contrasts = TRUE, sparse = FALSE) 
{
...
ctr <- attr(x, "contrasts")
    if ((NL <- is.null(ctr)) || is.character(ctr)) {
        if (NL) 
            ctr <- getOption("contrasts")[[if (is.ordered(x)) 
                2L
            else 1L]]
        ctrfn <- get(ctr, mode = "function", envir = parent.frame())
...

ctr <- if (useSparse) 
            ctrfn(levels(x), contrasts = contrasts, sparse = sparse)
        else ctrfn(levels(x), contrasts = contrasts)
  }
ctr
}

For eg using a small reproducible example例如,使用一个可重现的小例子

> data(iris)
> iris$Species <- ordered(iris$Species)
> contrasts(iris$Species)
                .L         .Q
[1,] -7.071068e-01  0.4082483
[2,] -7.850462e-17 -0.8164966
[3,]  7.071068e-01  0.4082483

Now, we go over the contrasts code现在,我们 go contrasts一下代码

> attr(iris$Species, "contrasts")
NULL
# as it is null, get the option of `contrasts`
> ctr <- getOption("contrasts")[[if (is.ordered(iris$Species)) 
                 2L
             else 1L]]
> ctr
[1] "contr.poly"
>  ctrfn <- get(ctr, mode = "function", envir = parent.frame())
> ctrfn
function (n, scores = 1:n, contrasts = TRUE, sparse = FALSE) 
{
    make.poly <- function(n, scores) {
        y <- scores - mean(scores)
        X <- outer(y, seq_len(n) - 1, `^`)
        QR <- qr(X)
        z <- QR$qr
        z <- z * (row(z) == col(z))
        raw <- qr.qy(QR, z)
        Z <- sweep(raw, 2L, apply(raw, 2L, function(x) sqrt(sum(x^2))), 
            `/`, check.margin = FALSE)
        colnames(Z) <- paste0("^", 1L:n - 1L)
        Z
    }
...

Then, if we go over the make.poly , it would return the output - just add some print/message in a modified function and call然后,如果我们在 make.poly 上输入make.poly ,它将返回 output - 只需在修改后的 function 中添加一些print/message并调用

contr_poly <- function (n, scores = 1:n, contrasts = TRUE, sparse = FALSE) 
{
    make.poly <- function(n, scores) {
        message(glue::glue("n: {n}"))
        message(glue::glue("scores: {toString(scores)}"))
        y <- scores - mean(scores)
        message(glue::glue("y: {toString(y)}"))
        X <- outer(y, seq_len(n) - 1, `^`)
        print("X")
        print(X)
        QR <- qr(X)
        print("QR")
        print(QR)
        z <- QR$qr
        z <- z * (row(z) == col(z))
        raw <- qr.qy(QR, z)
        Z <- sweep(raw, 2L, apply(raw, 2L, function(x) sqrt(sum(x^2))), 
            `/`, check.margin = FALSE)
        colnames(Z) <- paste0("^", 1L:n - 1L)
    
        print("Z")
        print(Z)
        Z
    }
    if (is.numeric(n) && length(n) == 1L) 
        levs <- seq_len(n)
    else {
        levs <- n
        n <- length(levs)
    }
    if (n < 2) 
        stop(gettextf("contrasts not defined for %d degrees of freedom", 
            n - 1), domain = NA)
    if (n > 95) 
        stop(gettextf("orthogonal polynomials cannot be represented accurately enough for %d degrees of freedom", 
            n - 1), domain = NA)
    if (length(scores) != n) 
        stop("'scores' argument is of the wrong length")
    if (!is.numeric(scores) || anyDuplicated(scores)) 
        stop("'scores' must all be different numbers")
    contr <- make.poly(n, scores)
    if (sparse) 
        contr <- .asSparse(contr)
    if (contrasts) {
        dn <- colnames(contr)
        dn[2:min(4, n)] <- c(".L", ".Q", ".C")[1:min(3, n - 1)]
        colnames(contr) <- dn
        contr[, -1, drop = FALSE]
    }
    else {
        contr[, 1] <- 1
        contr
    }
}

-call on the levels(iris$Species) -调用levels(iris$Species)

> contr_poly(levels(iris$Species))
n: 3
scores: 1, 2, 3
y: -1, 0, 1
[1] "X"
     [,1] [,2] [,3]
[1,]    1   -1    1
[2,]    1    0    0
[3,]    1    1    1
[1] "QR"
$qr
           [,1]          [,2]          [,3]
[1,] -1.7320508 -1.110223e-16 -1.154701e+00
[2,]  0.5773503 -1.414214e+00  1.110223e-16
[3,]  0.5773503  9.659258e-01  8.164966e-01

$rank
[1] 3

$qraux
[1] 1.5773503 1.2588190 0.8164966

$pivot
[1] 1 2 3

attr(,"class")
[1] "qr"
[1] "Z"
            ^0            ^1         ^2
[1,] 0.5773503 -7.071068e-01  0.4082483
[2,] 0.5773503 -7.850462e-17 -0.8164966
[3,] 0.5773503  7.071068e-01  0.4082483
                .L         .Q
[1,] -7.071068e-01  0.4082483
[2,] -7.850462e-17 -0.8164966
[3,]  7.071068e-01  0.4082483

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM