來自 R 中兩個數據框的條形圖

Question

我的目標是制作一個基本的條形圖，其中數據來自具有相同變量的兩個數據框。 情節應該如下圖所示，但沒有兩個缺點。 數據框應該有圖例，誤差線應該在條的中間。 你知道怎么做嗎？ 下面是產生該圖的代碼，我認為bind_rows不是這里的最佳解決方案。

代碼↓

bind_rows(B, C, .id = "id") %>%
  filter(question %in% c("Q1", "Q2")) %>%
  ggplot(aes(x = question, y = mean)) +
  geom_bar(aes(fill = id), stat = "identity", position = "dodge2", width = 0.5) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, width = 0.1, colour = id)) +
  coord_flip()

數據框↓

structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", 
"Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"), n = c(204L, 204L, 
204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L), 
    mean = c(5.22549019607843, NA, 4.95098039215686, 4.39705882352941, 
    5.47058823529412, 5.51470588235294, 4.50490196078431, 4.92647058823529, 
    4.40686274509804, 5.56862745098039, 5.56372549019608, 5.23529411764706
    ), sd = c(1.1524816893289, NA, 1.31214449357814, 1.5422430010719, 
    1.12039650223724, 1.15104553532809, 1.37714471881058, 1.34621721218454, 
    1.30030385262334, 0.871099231072865, 0.830963499839951, 1.36945187401243
    )), row.names = c(NA, 12L), class = c("tbl_df", "tbl", "data.frame"
))

structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", 
"Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"), n = c(13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L), mean = c(5.38461538461539, 
4.38461538461539, 4.69230769230769, 4.30769230769231, 5.15384615384615, 
5.38461538461539, 4.76923076923077, 5.30769230769231, 4.53846153846154, 
5.61538461538461, 5.69230769230769, 4.92307692307692), sd = c(1.26085034391223, 
1.44559454541846, 1.03155347127648, 1.60128153805087, 0.898717034272917, 
1.12089707663561, 1.01273936708367, 0.85485041426511, 0.967417922046845, 
1.26085034391223, 0.85485041426511, 1.84668795692624)), row.names = c(NA, 
12L), class = c("tbl_df", "tbl", "data.frame"))

Answer 1

我認為dplyr::bind_rows()在這里工作得很好。 要對齊分組條和誤差條，請對兩個圖層使用position_dodge() 。

bind_rows(B, C, .id = "id") %>%
  filter(question %in% c("Q1", "Q2")) %>% 
  ggplot() +
  aes(mean, question, fill = id, xmin = mean - sd, xmax = mean + sd) +
  geom_col(position = "dodge2", width = 0.5) +
  geom_errorbar(position = position_dodge2(padding = 0.5), width = 0.5) +
  theme_minimal()

如果要添加其他圖層，還需要position_dodge2() 。

last_plot() +
  geom_point(position = position_dodge2(width = 0.5))

Answer 2

這篇文章與ggplot2和tidyverse一起標記。 但是，如果您只想使用base包，這里有一種方法（有點冗長和棘手，但它可以很好地工作）：

df1<-structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"),
                n = c(204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L, 204L),
                mean = c(5.22549019607843, NA, 4.95098039215686, 4.39705882352941, 5.47058823529412,
                5.51470588235294, 4.50490196078431, 4.92647058823529, 4.40686274509804, 5.56862745098039,
                5.56372549019608, 5.23529411764706), sd = c(1.1524816893289, NA, 1.31214449357814, 1.5422430010719,
                1.12039650223724, 1.15104553532809, 1.37714471881058, 1.34621721218454, 1.30030385262334,
                0.871099231072865, 0.830963499839951, 1.36945187401243)), row.names = c(NA, 12L), 
                class = c("tbl_df", "tbl", "data.frame"))
df2<-structure(list(question = c("Q1", "Q10", "Q11", "Q12", "Q2", "Q3", "Q4", "Q5", "Q6", "Q7", "Q8", "Q9"),
                n = c(13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L),
                mean = c(5.38461538461539, 4.38461538461539, 4.69230769230769, 4.30769230769231, 5.15384615384615,
                5.38461538461539, 4.76923076923077, 5.30769230769231, 4.53846153846154, 5.61538461538461, 
                5.69230769230769, 4.92307692307692), sd = c(1.26085034391223, 1.44559454541846, 1.03155347127648,
                1.60128153805087, 0.898717034272917, 1.12089707663561, 1.01273936708367, 0.85485041426511,
                0.967417922046845, 1.26085034391223, 0.85485041426511, 1.84668795692624)), row.names = c(NA, 12L),
                class = c("tbl_df", "tbl", "data.frame"))

首先，我們合並兩個數據框：

df3<-merge(df1,df2,by="question")

#df3
#question n.x   mean.x      sd.x n.y   mean.y      sd.y
#1        Q1 204 5.225490 1.1524817  13 5.384615 1.2608503
#2       Q10 204       NA        NA  13 4.384615 1.4455945
#3       Q11 204 4.950980 1.3121445  13 4.692308 1.0315535
#4       Q12 204 4.397059 1.5422430  13 4.307692 1.6012815
#5        Q2 204 5.470588 1.1203965  13 5.153846 0.8987170
#6        Q3 204 5.514706 1.1510455  13 5.384615 1.1208971
#7        Q4 204 4.504902 1.3771447  13 4.769231 1.0127394
#8        Q5 204 4.926471 1.3462172  13 5.307692 0.8548504
#9        Q6 204 4.406863 1.3003039  13 4.538462 0.9674179
#10       Q7 204 5.568627 0.8710992  13 5.615385 1.2608503
#11       Q8 204 5.563725 0.8309635  13 5.692308 0.8548504
#12       Q9 204 5.235294 1.3694519  13 4.923077 1.8466880

然后，我們以矩陣的形式存儲我們的數據：

mat<-as.matrix(cbind(df3$mean.x,df3$mean.y))

#mat
#         [,1]     [,2]
# [1,] 5.225490 5.384615
# [2,]       NA 4.384615
# [3,] 4.950980 4.692308
# [4,] 4.397059 4.307692
# [5,] 5.470588 5.153846
# [6,] 5.514706 5.384615
# [7,] 4.504902 4.769231
# [8,] 4.926471 5.307692
# [9,] 4.406863 4.538462
#[10,] 5.568627 5.615385
#[11,] 5.563725 5.692308
#[12,] 5.235294 4.923077

最后，我們繪制條形圖：

#par(mar=c(5,4,4,5)+0.1)
mid<-barplot(t(mat),beside=T,names.arg=df3$question,cex.names=0.8,
            horiz=T,xlim=c(0,7),col=c('#44c1f2','#f28744'),
            xlab='mean',ylab='question')

現在，為了在base包的 barplot 中繪制誤差線，我們可以執行以下操作（從@Laryx Decidua 的這篇文章中獲得靈感）：

arrows(x0=df1$mean-df1$sd, y0=mid[1,], x1=df1$mean+df1$sd, y1=mid[1,],
        code=3, angle=90, length=0.04, col="red")
arrows(x0=df2$mean-df2$sd, y0=mid[2,], x1=df2$mean+df2$sd, y1=mid[2,],
        code=3, angle=90, length=0.04, col="blue")

現在添加圖例：

legend(7,21,c('1','2'),fill=c('#44c1f2','#f28744'),xpd=T,title='ID')

這讓我們看到了這樣的情節：

來自 R 中兩個數據框的條形圖

問題描述

2 個解決方案

解決方案1
1 2021-07-26 11:47:53

解決方案2
1 2021-07-27 14:37:12

來自 R 中兩個數據框的條形圖

問題描述

2 個解決方案

解決方案1 1 2021-07-26 11:47:53

解決方案2 1 2021-07-27 14:37:12

解決方案1
1 2021-07-26 11:47:53

解決方案2
1 2021-07-27 14:37:12