如何避免在 function 中重復代碼以使用 ggplot2 繪制密度直方圖和簡單直方圖？

Question

我正在嘗試創建一個 function，它允許您根據您的選擇繪制直方圖或密度直方圖。

這是我擁有的 function：

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                          Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
  
  if(Density_Curve == "TRUE"){
    p <- DF1 %>% 
      ggplot(aes(x)) +
      geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) +
      geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) + 
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
    
   }else{
    p <- DF1 %>%
      ggplot(aes(x)) +
      geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) +
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
  }
  return(p)
}

mydf <- iris

使用 function：

# Draw a simple histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")

# Draw a density histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

function 工作得非常好，盡管我認為我重復了很多代碼，因為繪制 plot 的大部分代碼都是為兩者共享的。 唯一的區別在這里：

#simple histogram
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) 
  geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)

VS

#density histogram
geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha)

有人知道如何減少代碼嗎？

提前致謝

Answer 1

這里的解決方案。 感謝@Limey 的想法！

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name,
                          Density_Curve="TRUE", legend_title="title"){
  DF1 %>% 
      ggplot(aes(x)) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) + 
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
  
}

mydf <- iris

簡單直方圖

p= hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
                 legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
p = p+geom_histogram(aes(y=..density.., fill = Species), bins=5, col="black", alpha=0.2) 
p = p + geom_density(lwd = 1.2, colour="red", show.legend = FALSE) 
p

密度直方圖

p2 = hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
              legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
p2 = p2+ geom_histogram(aes(fill=Species), bins=5, col="black", alpha=0.2) 
p2

Answer 2

您可以定義一個條件幾何圖層，這樣您就不必重復剩余的 plot 代碼。 請注意，如果 DensityCurve TRUE 添加多個圖層，則需要創建一個列表

library(ggplot2)
library(dplyr)
    
    hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                              y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                              Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
        
        if(Density_Curve){
            geom_layer <- list(
                geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha),
                geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)
            )
        } else {
            geom_layer <- geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) 
        }
        
        DF1 %>% 
            ggplot(aes(x)) +
            geom_layer + 
            scale_x_continuous(breaks=pretty(x, n=10)) +
            xlab(x_axis) + 
            ylab(y_axis) +
            ggtitle(paste0(title_plot, var_name)) +
            guides(fill=guide_legend(title=legend_title)) 
        
    }
    
    mydf <- iris
    
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

^{由代表 package (v2.0.1) 於 2022 年 8 月 12 日創建}

如何避免在 function 中重復代碼以使用 ggplot2 繪制密度直方圖和簡單直方圖？

問題描述

2 個解決方案

解決方案1
2 2022-08-12 10:37:00

解決方案2
1 2022-08-12 11:14:06

如何避免在 function 中重復代碼以使用 ggplot2 繪制密度直方圖和簡單直方圖？

問題描述

2 個解決方案

解決方案1 2 2022-08-12 10:37:00

解決方案2 1 2022-08-12 11:14:06

解決方案1
2 2022-08-12 10:37:00

解決方案2
1 2022-08-12 11:14:06