簡體   English   中英

顏色密度通過簇組繪制在ggplot2中

[英]Colour Density plots in ggplot2 by cluster groups

我一直在聚集我的一些價值觀,然后將它們分組。 然后,我使用ggplot2繪制一些密度圖並覆蓋簇。 示例圖片如下: 在此輸入圖像描述

對於群集中的每個組,我繪制密度圖並覆蓋它們。 密度圖中的着色對應於聚類中的分組。

我的問題是,我已根據分組手動拆分數據並將它們放在各自的文本表中(參見下面的代碼)。 這是非常低效的,並且對於大型數據集來說可能非常繁瑣。 如何在不將聚類分離到各自的文本表中的情況下動態繪制ggplot2的密度圖?

原始輸入表在拆分之前看起來像這樣:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

我用來生成繪圖的代碼:請注意將基本圖形與網格結合仍然無法正常工作

library(ggplot2)
library(grid)

layout(matrix(c(1,2,3,1,4,5), 2, 3, byrow = TRUE))

# define function to create multi-plot setup (nrow, ncol)
vp.setup <- function(x,y){
grid.newpage()
pushViewport(viewport(layout = grid.layout(x,y)))
}

# define function to easily access layout (row, col)
vp.layout <- function(x,y){
viewport(layout.pos.row=x, layout.pos.col=y)
}

vp.setup(2,3)

file_vals <- read.table(textConnection("
file        avg_vals
133         1.5923
132         1.6351
1010        1.6532
104         1.6824
105         1.6087
39          1.8694
32          1.9934
31          1.9919
37          1.8638
36          1.9691
35          1.9802
1           1.7283
5           1.7637
"), header=TRUE)

red <- read.table(textConnection("
file        max        min        avg               lowest
31          4328.0     5559.0     4858.0            4839.0
32          4334.0     5558.0     4864.0            4843.0
36          4360.0     5580.0     4891.0            4867.0
35          4304.0     5578.0     4842.0            4831.0
"), header=TRUE)

blue <- read.table(textConnection("
file        max        min        avg               lowest
133         4720.0     6064.0     5299.0            5277.0
105         4636.0     5905.0     5193.0            5165.0
104         4329.0     5554.0     4858.0            4838.0
132         5112.0     6520.0     5728.0            5699.0
1010        5008.0     6291.0     5591.0            5545.0
"), header=TRUE)

green <- read.table(textConnection("
file        max        min        avg               lowest
39          4486.0     5736.0     5031.0            5006.0
37          4444.0     5663.0     4979.0            4952.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
"), header=TRUE)


# Perform Cluster
d <- dist(file_vals$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
plot(fit, labels=file_vals$file)
groups <- cutree(fit, k=3)

cols = c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')
rect.hclust(fit, k=3, border=cols)


# Desnity plots
dat = rbind(data.frame(Cluster='Red', max_vals = red$max), data.frame(Cluster='Blue', max_vals = blue$max), data.frame(Cluster='Green', max_vals = green$max))
max = (ggplot(dat,aes(x=max_vals)))
max = max + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(3500, 5500)) + scale_fill_manual(values=c("red",'blue',"green"))
max = max + labs(fill = 'Clusters')
print(max, vp=vp.layout(1,2))

dat = rbind(data.frame(Cluster='Red', min_vals = red$min), data.frame(Cluster='Blue', min_vals = blue$min), data.frame(Cluster='Green', min_vals = green$min))
min = (ggplot(dat,aes(x=min_vals)))
min = min + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(5000, 7000)) + scale_fill_manual(values=c("red",'blue',"green"))
min = min + labs(fill = 'Clusters')
print(min, vp=vp.layout(1,3))

dat = rbind(data.frame(Cluster='Red', avg_vals = red$avg), data.frame(Cluster='Blue', avg_vals = blue$avg), data.frame(Cluster='Green', avg_vals = green$avg))
avg = (ggplot(dat,aes(x=avg_vals)))
avg = avg + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
avg = avg + labs(fill = 'Clusters')
print(avg, vp=vp.layout(2,2))

dat = rbind(data.frame(Cluster='Red', lowest_vals = red$lowest), data.frame(Cluster='Blue', lowest_vals = blue$lowest), data.frame(Cluster='Green', lowest_vals = green$lowest))
lowest = (ggplot(dat,aes(x=lowest_vals)))
lowest = lowest + geom_density(aes(fill=factor(Cluster)), alpha=.3) + xlim(c(4000, 6000)) + scale_fill_manual(values=c("red",'blue',"green"))
lowest = lowest + labs(fill = 'Clusters')
print(lowest, vp=vp.layout(2,3))

通過這種方式,您可以使用4個面板自動創建所需的繪圖。

一,數據:

scores <- read.table(textConnection("
file        max        min        avg               lowest
132         5112.0     6520.0     5728.0            5699.0
133         4720.0     6064.0     5299.0            5277.0
5           4617.0     5936.0     5185.0            5165.0
1           4384.0     5613.0     4917.0            4895.0
1010        5008.0     6291.0     5591.0            5545.0
104         4329.0     5554.0     4858.0            4838.0
105         4636.0     5905.0     5193.0            5165.0
35          4304.0     5578.0     4842.0            4831.0
36          4360.0     5580.0     4891.0            4867.0
37          4444.0     5663.0     4979.0            4952.0
31          4328.0     5559.0     4858.0            4839.0
39          4486.0     5736.0     5031.0            5006.0
32          4334.0     5558.0     4864.0            4843.0
"), header=TRUE)

file_vals <- read.table(textConnection("
file        avg_vals
                                   133         1.5923
                                   132         1.6351
                                   1010        1.6532
                                   104         1.6824
                                   105         1.6087
                                   39          1.8694
                                   32          1.9934
                                   31          1.9919
                                   37          1.8638
                                   36          1.9691
                                   35          1.9802
                                   1           1.7283
                                   5           1.7637
                                   "), header=TRUE)

兩個數據框都可以合並為一個:

dat <- merge(scores, file_vals, by = "file")

適合:

d <- dist(dat$avg_vals, method = "euclidean")
fit <- hclust(d, method="ward")
groups <- cutree(fit, k=3)
cols <- c('red', 'blue', 'green', 'purple', 'orange', 'magenta', 'brown', 'chartreuse4','darkgray','cyan1')

添加帶有顏色名稱的列(基於擬合):

dat$group <- cols[groups]

重塑從寬到長格式的數據:

dat_re <- reshape(dat, varying = c("max", "min", "avg", "lowest"), direction = "long", drop = c("file", "avg_vals"), v.names = "value", idvar = "group", times = c("max", "min", "avg", "lowest"), new.row.names = seq(nrow(scores) * 4))

情節:

p <- (ggplot(dat_re ,aes(x = value))) +
geom_density(aes(fill = group), alpha=.3) +
scale_fill_manual(values=cols) +
labs(fill = 'Clusters') +
facet_wrap( ~ time)

print(p)

在此輸入圖像描述

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM