![](/img/trans.png)
[英]stat_compare_means() gives different p.value than compare_means() or t.test()
[英]how is it that a corrected pairwise comparison yields a p.value < then a single t.test?
嗨,假設我有這些結果
df <- structure(list(len = c(4.2, 11.5, 7.3, 5.8, 6.4, 10, 11.2, 11.2,
5.2, 7, 15.2, 21.5, 17.6, 9.7, 14.5, 10, 8.2, 9.4, 16.5, 9.7,
16.5, 16.5, 15.2, 17.3, 22.5, 17.3, 13.6, 14.5, 18.8, 15.5, 19.7,
23.3, 23.6, 26.4, 20, 25.2, 25.8, 21.2, 14.5, 27.3, 23.6, 18.5,
33.9, 25.5, 26.4, 32.5, 26.7, 21.5, 23.3, 29.5, 25.5, 26.4, 22.4,
24.5, 24.8, 30.9, 26.4, 27.3, 29.4, 23), supp = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("OJ",
"VC"), class = "factor"), dose = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("D0.5", "D1", "D2"
), class = "factor")), row.names = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L,
40L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 41L, 42L,
43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L,
59L, 60L), class = "data.frame")
df$int <- interaction(df$supp, df$dose)
e <- pairwise.t.test(df$len, df$int, p.adjust.method="BH")
所以從 output
OJ.D0.5 VC.D0.5 OJ.D1 VC.D1 OJ.D2
VC.D0.5 0.00285 - - - -
OJ.D1 0.00000079391014 0.00000000000984 - - -
VC.D1 0.04207 0.00000243821908 **0.00088** - -
OJ.D2 0.00000000042891 0.00000000000001 0.04645 0.00000089414918 -
VC.D2 0.00000000042891 0.00000000000001 0.04474 0.00000085310153 0.96089
VC.D1 與 OJ.D1 的比較 = 0.00088
但是單個 t.test
t.test(df[df$supp == "VC" & df$dose == "D1", ]$len,
df[df$supp == "OJ" & df$dose == "D1", ]$len)
產生 p.value = p-value = 0.001038
所以我大多數人在某個地方搞砸了,因為調整后的 p 值不應該大於單個未糾正的 p 值嗎?
解決方案
當您設置p.adjust.method = "none"
和pool.sd = FALSE
時,您將獲得相同的結果:
pairwise.t.test(df$len, df$int, p.adjust.method = "none", pool.sd = FALSE)$p.value[3,3]
# 0.001038376
t.test(df[df$supp == "VC" & df$dose == "D1", ]$len,
df[df$supp == "OJ" & df$dose == "D1", ]$len)$p.value
# 0.001038376
筆記
解釋
當我們根本不應用多重測試校正時,比較變得容易得多。 在那種情況下,它們應該具有相同的 p 值,對吧? 因此,讓我們使用p.adjust.method = "none"
進行比較。 運行pairwise.t.test
時,我們現在得到0.00059
... 更接近,但仍然不對。
問題源於pool.sd
參數。 這會強制在所有比較中使用共同的標准偏差。 這通常很有用(如果滿足假設),但確實會導致不同的 p 值。
當我們查看底層代碼時,這變得很清楚:
if (pool.sd) {
METHOD <- "t tests with pooled SD"
xbar <- tapply(x, g, mean, na.rm = TRUE)
s <- tapply(x, g, sd, na.rm = TRUE)
n <- tapply(!is.na(x), g, sum)
degf <- n - 1
total.degf <- sum(degf)
pooled.sd <- sqrt(sum(s^2 * degf)/total.degf)
compare.levels <- function(i, j) {
dif <- xbar[i] - xbar[j]
se.dif <- pooled.sd * sqrt(1/n[i] + 1/n[j])
t.val <- dif/se.dif
if (alternative == "two.sided")
2 * pt(-abs(t.val), total.degf)
else pt(t.val, total.degf, lower.tail = (alternative ==
"less"))
}
}
其中,計算整個測試的總自由度 ( total.degf
),然后用於計算合並標准偏差 ( pooled.sd
)。
當我們設置pool.sd = FALSE
時,代碼僅使用t.test
function:
else {
METHOD <- if (paired)
"paired t tests"
else "t tests with non-pooled SD"
compare.levels <- function(i, j) {
xi <- x[as.integer(g) == i]
xj <- x[as.integer(g) == j]
t.test(xi, xj, paired = paired, alternative = alternative,
...)$p.value
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.