[英]How to interpret the output of stats::contrasts in R
我有一個具有四個級別的有序因子列Severity
:“Low”<“Mild”<“High”<“Extreme”。 當我運行代碼contrasts(df$Severity)
時,它產生了以下 output:
.L .Q .C
[1,] -0.6708204 0.5 -0.2236068
[2,] -0.2236068 -0.5 0.6708204
[3,] 0.2236068 -0.5 -0.6708204
[4,] 0.6708204 0.5 0.2236068
該矩陣是什么意思,特別是行 [1,]、[2,]... 和列的名稱.L、.Q、.C 以及值 -0.6708204、0.5 等?
只是查看contrasts
的源碼發現有contr.poly
ordered
的
> contrasts
function (x, contrasts = TRUE, sparse = FALSE)
{
...
ctr <- attr(x, "contrasts")
if ((NL <- is.null(ctr)) || is.character(ctr)) {
if (NL)
ctr <- getOption("contrasts")[[if (is.ordered(x))
2L
else 1L]]
ctrfn <- get(ctr, mode = "function", envir = parent.frame())
...
ctr <- if (useSparse)
ctrfn(levels(x), contrasts = contrasts, sparse = sparse)
else ctrfn(levels(x), contrasts = contrasts)
}
ctr
}
例如,使用一個可重現的小例子
> data(iris)
> iris$Species <- ordered(iris$Species)
> contrasts(iris$Species)
.L .Q
[1,] -7.071068e-01 0.4082483
[2,] -7.850462e-17 -0.8164966
[3,] 7.071068e-01 0.4082483
現在,我們 go contrasts
一下代碼
> attr(iris$Species, "contrasts")
NULL
# as it is null, get the option of `contrasts`
> ctr <- getOption("contrasts")[[if (is.ordered(iris$Species))
2L
else 1L]]
> ctr
[1] "contr.poly"
> ctrfn <- get(ctr, mode = "function", envir = parent.frame())
> ctrfn
function (n, scores = 1:n, contrasts = TRUE, sparse = FALSE)
{
make.poly <- function(n, scores) {
y <- scores - mean(scores)
X <- outer(y, seq_len(n) - 1, `^`)
QR <- qr(X)
z <- QR$qr
z <- z * (row(z) == col(z))
raw <- qr.qy(QR, z)
Z <- sweep(raw, 2L, apply(raw, 2L, function(x) sqrt(sum(x^2))),
`/`, check.margin = FALSE)
colnames(Z) <- paste0("^", 1L:n - 1L)
Z
}
...
然后,如果我們在 make.poly 上輸入make.poly
,它將返回 output - 只需在修改后的 function 中添加一些print/message
並調用
contr_poly <- function (n, scores = 1:n, contrasts = TRUE, sparse = FALSE)
{
make.poly <- function(n, scores) {
message(glue::glue("n: {n}"))
message(glue::glue("scores: {toString(scores)}"))
y <- scores - mean(scores)
message(glue::glue("y: {toString(y)}"))
X <- outer(y, seq_len(n) - 1, `^`)
print("X")
print(X)
QR <- qr(X)
print("QR")
print(QR)
z <- QR$qr
z <- z * (row(z) == col(z))
raw <- qr.qy(QR, z)
Z <- sweep(raw, 2L, apply(raw, 2L, function(x) sqrt(sum(x^2))),
`/`, check.margin = FALSE)
colnames(Z) <- paste0("^", 1L:n - 1L)
print("Z")
print(Z)
Z
}
if (is.numeric(n) && length(n) == 1L)
levs <- seq_len(n)
else {
levs <- n
n <- length(levs)
}
if (n < 2)
stop(gettextf("contrasts not defined for %d degrees of freedom",
n - 1), domain = NA)
if (n > 95)
stop(gettextf("orthogonal polynomials cannot be represented accurately enough for %d degrees of freedom",
n - 1), domain = NA)
if (length(scores) != n)
stop("'scores' argument is of the wrong length")
if (!is.numeric(scores) || anyDuplicated(scores))
stop("'scores' must all be different numbers")
contr <- make.poly(n, scores)
if (sparse)
contr <- .asSparse(contr)
if (contrasts) {
dn <- colnames(contr)
dn[2:min(4, n)] <- c(".L", ".Q", ".C")[1:min(3, n - 1)]
colnames(contr) <- dn
contr[, -1, drop = FALSE]
}
else {
contr[, 1] <- 1
contr
}
}
-調用levels(iris$Species)
> contr_poly(levels(iris$Species))
n: 3
scores: 1, 2, 3
y: -1, 0, 1
[1] "X"
[,1] [,2] [,3]
[1,] 1 -1 1
[2,] 1 0 0
[3,] 1 1 1
[1] "QR"
$qr
[,1] [,2] [,3]
[1,] -1.7320508 -1.110223e-16 -1.154701e+00
[2,] 0.5773503 -1.414214e+00 1.110223e-16
[3,] 0.5773503 9.659258e-01 8.164966e-01
$rank
[1] 3
$qraux
[1] 1.5773503 1.2588190 0.8164966
$pivot
[1] 1 2 3
attr(,"class")
[1] "qr"
[1] "Z"
^0 ^1 ^2
[1,] 0.5773503 -7.071068e-01 0.4082483
[2,] 0.5773503 -7.850462e-17 -0.8164966
[3,] 0.5773503 7.071068e-01 0.4082483
.L .Q
[1,] -7.071068e-01 0.4082483
[2,] -7.850462e-17 -0.8164966
[3,] 7.071068e-01 0.4082483
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.