在圖上添加回歸線方程和 R^2

Question

我想知道如何在ggplot上添加回歸線方程和 R^2 。 我的代碼是：

library(ggplot2)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
            geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
            geom_point()
p

任何幫助將不勝感激。

Answer 1

這是一種解決方案

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

編輯。 我找出了我選擇此代碼的來源。 這是 ggplot2 谷歌組中原始帖子的鏈接

Answer 2

我的包ggpmisc中的統計stat_poly_eq()可以根據線性模型擬合添加文本標簽。

此答案已於 2022 年 6 月 2 日更新為 'ggpmisc' (>= 0.4.0) 和 'ggplot2' (>= 3.3.0)。 在示例中，我使用stat_poly_line()而不是stat_smooth()因為它具有與method和formula的stat_poly_eq()相同的默認值。 我在所有代碼示例中都省略了stat_poly_line()的附加參數，因為它們與添加標簽的問題無關。

library(ggplot2)
library(ggpmisc)
#> Loading required package: ggpp
#> 
#> Attaching package: 'ggpp'
#> The following object is masked from 'package:ggplot2':
#> 
#>     annotate
# artificial data
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$yy <- 2 + 3 * df$x + 0.1 * df$x^2 + rnorm(100, sd = 40)

# using default formula, label and methods
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq() +
  geom_point()

# assembling a single label with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

# adding separate labels with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = after_stat(eq.label))) +
  stat_poly_eq(label.y = 0.9) +
  geom_point()

# regression through the origin
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line(formula = y ~ x + 0) +
  stat_poly_eq(formula = y ~ x + 0, aes(label = after_stat(eq.label))) +
  geom_point()

# fitting a polynomial
ggplot(data = df, aes(x = x, y = yy)) +
  stat_poly_line(formula = y ~ poly(x, 2, raw = TRUE)) +
  stat_poly_eq(formula = y ~ poly(x, 2, raw = TRUE),
               aes(label = after_stat(eq.label))) +
  geom_point()

# adding a hat as asked by @MYaseen208 and @elarry
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(hat(y))~`=`~",
               aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

# variable substitution as asked by @shabbychef
# same labels in equation and axes
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(h)~`=`~",
               eq.x.rhs = "~italic(z)",
               aes(label = after_stat(eq.label))) +
  labs(x = expression(italic(z)), y = expression(italic(h))) +
  geom_point()

# grouping as asked by @helen.h
dfg <- data.frame(x = c(1:100))
dfg$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
dfg$group <- factor(rep(c("A", "B"), 50))

ggplot(data = dfg, aes(x = x, y = y, colour = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(grp.label), "*\": \"*",
                                 after_stat(eq.label), "*\", \"*",
                                 after_stat(rr.label), sep = ""))) +
  geom_point()

# a single fit with grouped data as asked by @Herman
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point(aes(colour = group))

# facets
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point() +
  facet_wrap(~group)

^{由reprex 包創建於 2022-06-02 (v2.0.1)}

Answer 3

我更改了stat_smooth和相關函數的源代碼的幾行，以創建一個添加擬合方程和 R 平方值的新函數。 這也適用於構面圖！

library(devtools)
source_gist("524eade46135f6348140")
df = data.frame(x = c(1:100))
df$y = 2 + 5 * df$x + rnorm(100, sd = 40)
df$class = rep(1:2,50)
ggplot(data = df, aes(x = x, y = y, label=y)) +
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE) +
  geom_smooth(method="lm",se=FALSE) +
  geom_point() + facet_wrap(~class)

在此處輸入圖像描述

我使用@Ramnath 的答案中的代碼來格式化等式。 stat_smooth_func函數不是很健壯，但使用它應該不難。

https://gist.github.com/kdauria/524eade46135f6348140 。 如果出現錯誤，請嘗試更新ggplot2 。

Answer 4

我已將 Ramnath 的帖子修改為 a) 使其更通用，因此它接受線性模型作為參數而不是數據框，並且 b) 更恰當地顯示底片。

lm_eqn = function(m) {

  l <- list(a = format(coef(m)[1], digits = 2),
      b = format(abs(coef(m)[2]), digits = 2),
      r2 = format(summary(m)$r.squared, digits = 3));

  if (coef(m)[2] >= 0)  {
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,l)
  } else {
    eq <- substitute(italic(y) == a - b %.% italic(x)*","~~italic(r)^2~"="~r2,l)    
  }

  as.character(as.expression(eq));                 
}

用法將更改為：

p1 = p + geom_text(aes(x = 25, y = 300, label = lm_eqn(lm(y ~ x, df))), parse = TRUE)

Answer 5

這里給大家最簡單的代碼

注意：顯示 Pearson 的 Rho 而不是R^2。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100)
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
        geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
        geom_point()+
        stat_cor(label.y = 35)+ #this means at 35th unit in the y axis, the r squared and p value will be shown
        stat_regline_equation(label.y = 30) #this means at 30th unit regresion line equation will be shown

p

Answer 6

使用ggpubr ：

library(ggpubr)

# reproducible data
set.seed(1)
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

# By default showing Pearson R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300) +
  stat_regline_equation(label.y = 280)

# Use R2 instead of R
ggscatter(df, x = "x", y = "y", add = "reg.line") +
  stat_cor(label.y = 300, 
           aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~"))) +
  stat_regline_equation(label.y = 280)

## compare R2 with accepted answer
# m <- lm(y ~ x, df)
# round(summary(m)$r.squared, 2)
# [1] 0.85

Answer 7

真的很喜歡@Ramnath 解決方案。 為了允許使用自定義回歸公式（而不是固定為 y 和 x 作為文字變量名稱），並將 p 值添加到打印輸出中（正如@Jerry T 評論的那樣），這里是 mod：

lm_eqn <- function(df, y, x){
    formula = as.formula(sprintf('%s ~ %s', y, x))
    m <- lm(formula, data=df);
    # formating the values into a summary string to print out
    # ~ give some space, but equal size and comma need to be quoted
    eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue), 
         list(target = y,
              input = x,
              a = format(as.vector(coef(m)[1]), digits = 2), 
              b = format(as.vector(coef(m)[2]), digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3),
             # getting the pvalue is painful
             pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
            )
          )
    as.character(as.expression(eq));                 
}

geom_point() +
  ggrepel::geom_text_repel(label=rownames(mtcars)) +
  geom_text(x=3,y=300,label=lm_eqn(mtcars, 'hp','wt'),color='red',parse=T) +
  geom_smooth(method='lm')

不幸的是，這不適用於 facet_wrap 或 facet_grid。

Answer 8

另一種選擇是創建一個自定義函數，使用dplyr和broom庫生成方程：

get_formula <- function(model) {
  
  broom::tidy(model)[, 1:2] %>%
    mutate(sign = ifelse(sign(estimate) == 1, ' + ', ' - ')) %>% #coeff signs
    mutate_if(is.numeric, ~ abs(round(., 2))) %>% #for improving formatting
    mutate(a = ifelse(term == '(Intercept)', paste0('y ~ ', estimate), paste0(sign, estimate, ' * ', term))) %>%
    summarise(formula = paste(a, collapse = '')) %>%
    as.character
  
}

lm(y ~ x, data = df) -> model
get_formula(model)
#"y ~ 6.22 + 3.16 * x"

scales::percent(summary(model)$r.squared, accuracy = 0.01) -> r_squared

現在我們需要將文本添加到繪圖中：

p + 
  geom_text(x = 20, y = 300,
            label = get_formula(model),
            color = 'red') +
  geom_text(x = 20, y = 285,
            label = r_squared,
            color = 'blue')

Answer 9

受此答案中提供的方程式風格的啟發，一種更通用的方法（多個預測變量 + 乳膠輸出作為選項）可以是：

print_equation= function(model, latex= FALSE, ...){
    dots <- list(...)
    cc= model$coefficients
    var_sign= as.character(sign(cc[-1]))%>%gsub("1","",.)%>%gsub("-"," - ",.)
    var_sign[var_sign==""]= ' + '

    f_args_abs= f_args= dots
    f_args$x= cc
    f_args_abs$x= abs(cc)
    cc_= do.call(format, args= f_args)
    cc_abs= do.call(format, args= f_args_abs)
    pred_vars=
        cc_abs%>%
        paste(., x_vars, sep= star)%>%
        paste(var_sign,.)%>%paste(., collapse= "")

    if(latex){
        star= " \\cdot "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]%>%
            paste0("\\hat{",.,"_{i}}")
        x_vars= names(cc_)[-1]%>%paste0(.,"_{i}")
    }else{
        star= " * "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]        
        x_vars= names(cc_)[-1]
    }

    equ= paste(y_var,"=",cc_[1],pred_vars)
    if(latex){
        equ= paste0(equ," + \\hat{\\varepsilon_{i}} \\quad where \\quad \\varepsilon \\sim \\mathcal{N}(0,",
                    summary(MetamodelKdifEryth)$sigma,")")%>%paste0("$",.,"$")
    }
    cat(equ)
}

model參數需要一個lm對象， latex參數是一個布爾值，用於請求一個簡單的字符或一個乳膠格式的方程，而...參數將其值傳遞給format函數。

我還添加了一個將其輸出為乳膠的選項，因此您可以在 rmarkdown 中使用此函數，如下所示：


```{r echo=FALSE, results='asis'}
print_equation(model = lm_mod, latex = TRUE)
```

現在使用它：

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$z <- 8 + 3 * df$x + rnorm(100, sd = 40)
lm_mod= lm(y~x+z, data = df)

print_equation(model = lm_mod, latex = FALSE)

此代碼產生： y = 11.3382963933174 + 2.5893419 * x + 0.1002227 * z

如果我們要求一個乳膠方程，將參數四舍五入為 3 位數：

print_equation(model = lm_mod, latex = TRUE, digits= 3)

這產生：

Answer 10

類似於 @zx8754 和 @kdauria 的答案，除了使用ggplot2和ggpubr 。 我更喜歡使用ggpubr ，因為它不需要自定義函數，例如這個問題的最佳答案。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
           r.accuracy = 0.01,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

也可以在上圖中添加 p 值

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")), # adds R^2 and p-value
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

當您有多個組時，也適用於facet_wrap()

df$group <- rep(1:2,50)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..),
                        label.x = 0, label.y = 400, size = 4) +
  theme_bw() +
  facet_wrap(~group)

在圖上添加回歸線方程和 R^2

問題描述

10 個解決方案

解決方案1
291 已采納 2011-09-26 01:20:09

解決方案2
202 2016-02-01 20:50:38

解決方案3
109 2015-01-15 08:34:57

解決方案4
79 2012-11-19 10:09:19

解決方案5
24 2020-02-29 18:46:42

解決方案6
19 2019-10-11 06:53:41

解決方案7
17 2018-08-22 20:38:56

解決方案8
5 2020-10-15 16:55:33

解決方案9
4 2019-04-05 13:52:14

解決方案10
1 2022-07-20 18:06:22

在圖上添加回歸線方程和 R^2

問題描述

10 個解決方案

解決方案1 291 已采納 2011-09-26 01:20:09

解決方案2 202 2016-02-01 20:50:38

解決方案3 109 2015-01-15 08:34:57

解決方案4 79 2012-11-19 10:09:19

解決方案5 24 2020-02-29 18:46:42

解決方案6 19 2019-10-11 06:53:41

解決方案7 17 2018-08-22 20:38:56

解決方案8 5 2020-10-15 16:55:33

解決方案9 4 2019-04-05 13:52:14

解決方案10 1 2022-07-20 18:06:22

解決方案1
291 已采納 2011-09-26 01:20:09

解決方案2
202 2016-02-01 20:50:38

解決方案3
109 2015-01-15 08:34:57

解決方案4
79 2012-11-19 10:09:19

解決方案5
24 2020-02-29 18:46:42

解決方案6
19 2019-10-11 06:53:41

解決方案7
17 2018-08-22 20:38:56

解決方案8
5 2020-10-15 16:55:33

解決方案9
4 2019-04-05 13:52:14

解決方案10
1 2022-07-20 18:06:22