[英]How to avoid repeating code in a function to draw a density histogram and a simple histogram with ggplot2?
我正在嘗試創建一個 function,它允許您根據您的選擇繪制直方圖或密度直方圖。
這是我擁有的 function:
library(ggplot2)
library(dplyr)
hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis",
y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2,
Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
if(Density_Curve == "TRUE"){
p <- DF1 %>%
ggplot(aes(x)) +
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha) +
geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE) +
scale_x_continuous(breaks=pretty(x, n=10)) +
xlab(x_axis) +
ylab(y_axis) +
ggtitle(paste0(title_plot, var_name)) +
guides(fill=guide_legend(title=legend_title)) +
theme(strip.text.y = element_blank())
}else{
p <- DF1 %>%
ggplot(aes(x)) +
geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha) +
scale_x_continuous(breaks=pretty(x, n=10)) +
xlab(x_axis) +
ylab(y_axis) +
ggtitle(paste0(title_plot, var_name)) +
guides(fill=guide_legend(title=legend_title)) +
theme(strip.text.y = element_blank())
}
return(p)
}
mydf <- iris
使用 function:
# Draw a simple histogram
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
# Draw a density histogram
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
function 工作得非常好,盡管我認為我重復了很多代碼,因為繪制 plot 的大部分代碼都是為兩者共享的。 唯一的區別在這里:
#simple histogram
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha)
geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)
VS
#density histogram
geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha)
有人知道如何減少代碼嗎?
提前致謝
這里的解決方案。 感謝@Limey 的想法!
library(ggplot2)
library(dplyr)
hist_function <- function(DF1, x = Variable1, x_axis = "x_axis",
y_axis = "ylab", title_plot, var_name,
Density_Curve="TRUE", legend_title="title"){
DF1 %>%
ggplot(aes(x)) +
scale_x_continuous(breaks=pretty(x, n=10)) +
xlab(x_axis) +
ylab(y_axis) +
ggtitle(paste0(title_plot, var_name)) +
guides(fill=guide_legend(title=legend_title)) +
theme(strip.text.y = element_blank())
}
mydf <- iris
簡單直方圖
p= hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length",
legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
p = p+geom_histogram(aes(y=..density.., fill = Species), bins=5, col="black", alpha=0.2)
p = p + geom_density(lwd = 1.2, colour="red", show.legend = FALSE)
p
密度直方圖
p2 = hist_function(DF1=mydf, x=mydf$Sepal.Length, var_name = "Sepal.Length", x_axis = "Length",
legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
p2 = p2+ geom_histogram(aes(fill=Species), bins=5, col="black", alpha=0.2)
p2
您可以定義一個條件幾何圖層,這樣您就不必重復剩余的 plot 代碼。 請注意,如果 DensityCurve TRUE 添加多個圖層,則需要創建一個列表
library(ggplot2)
library(dplyr)
hist_function <- function(DF1, x = Variable1, fill_col = Variable2, x_axis = "x_axis",
y_axis = "ylab", title_plot, var_name, bins=5, col_border="black", alpha=0.2,
Density_Curve="TRUE", colour_curve="red", legend_title="title", lwd=1.2){
if(Density_Curve){
geom_layer <- list(
geom_histogram(aes(y=..density.., fill = fill_col), bins=bins, col=col_border, alpha=alpha),
geom_density(lwd = lwd, colour=colour_curve, show.legend = FALSE)
)
} else {
geom_layer <- geom_histogram(aes(fill=fill_col), bins=bins, col=col_border, alpha=alpha)
}
DF1 %>%
ggplot(aes(x)) +
geom_layer +
scale_x_continuous(breaks=pretty(x, n=10)) +
xlab(x_axis) +
ylab(y_axis) +
ggtitle(paste0(title_plot, var_name)) +
guides(fill=guide_legend(title=legend_title))
}
mydf <- iris
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length",Density_Curve = "FALSE", x_axis = "Length", legend_title ="Species", y_axis ="Frequency", title_plot = "Histogram of ")
hist_function(DF1=mydf, x=mydf$Sepal.Length, fill_col = mydf$Species, var_name = "Sepal.Length", Density_Curve = "TRUE", x_axis = "Length", legend_title ="Species", y_axis ="Density",title_plot = "Distribution of ")
由代表 package (v2.0.1) 於 2022 年 8 月 12 日創建
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.