簡體   English   中英

如何避免在 function 中重復代碼以使用 ggplot2 繪制密度直方圖和簡單直方圖?

[英]How to avoid repeating code in a function to draw a density histogram and a simple histogram with ggplot2?

我正在嘗試創建一個 function,它允許您根據您的選擇繪制直方圖或密度直方圖。

這是我擁有的 function:

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                          Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
  
  if(Density_Curve == "TRUE"){
    p <- DF1 %>% 
      ggplot(aes(x)) +
      geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) +
      geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) + 
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
    
   }else{
    p <- DF1 %>%
      ggplot(aes(x)) +
      geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) +
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
  }
  return(p)
}

mydf <- iris

使用 function:

# Draw a simple histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")

圖 1

# Draw a density histogram

hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

圖 2

function 工作得非常好,盡管我認為我重復了很多代碼,因為繪制 plot 的大部分代碼都是為兩者共享的。 唯一的區別在這里:

#simple histogram
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) 
  geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE) 

VS

#density histogram
geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) 

有人知道如何減少代碼嗎?

提前致謝

這里的解決方案。 感謝@Limey 的想法!

library(ggplot2)
library(dplyr)

hist_function <- function(DF1, x = Variable1, x_axis = "x_axis", 
                          y_axis = "ylab", title_plot, var_name,
                          Density_Curve="TRUE", legend_title="title"){
  DF1 %>% 
      ggplot(aes(x)) +
      scale_x_continuous(breaks=pretty(x, n=10)) +
      xlab(x_axis) + 
      ylab(y_axis) +
      ggtitle(paste0(title_plot, var_name)) +
      guides(fill=guide_legend(title=legend_title)) +
      theme(strip.text.y = element_blank())
  
}

mydf <- iris

簡單直方圖

p= hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
                 legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
p = p+geom_histogram(aes(y=..density.., fill = Species), bins=5, col="black", alpha=0.2) 
p = p + geom_density(lwd = 1.2, colour="red", show.legend = FALSE) 
p

密度直方圖

p2 = hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length", 
              legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
p2 = p2+ geom_histogram(aes(fill=Species), bins=5, col="black", alpha=0.2) 
p2

您可以定義一個條件幾何圖層,這樣您就不必重復剩余的 plot 代碼。 請注意,如果 DensityCurve TRUE 添加多個圖層,則需要創建一個列表

library(ggplot2)
library(dplyr)
    
    hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis", 
                              y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2, 
                              Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
        
        if(Density_Curve){
            geom_layer <- list(
                geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha),
                geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)
            )
        } else {
            geom_layer <- geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) 
        }
        
        DF1 %>% 
            ggplot(aes(x)) +
            geom_layer + 
            scale_x_continuous(breaks=pretty(x, n=10)) +
            xlab(x_axis) + 
            ylab(y_axis) +
            ggtitle(paste0(title_plot, var_name)) +
            guides(fill=guide_legend(title=legend_title)) 
        
    }
    
    mydf <- iris
    
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")

代表 package (v2.0.1) 於 2022 年 8 月 12 日創建

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM