簡體   English   中英

R:在x軸上具有多個類別變量的Barplot(大於2)

[英]R: Barplot with multiple categorical variables on the x axis (more than 2)

我正在努力創建一個包含多個類別變量的條形圖,因此我當前的數據看起來像這樣

df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))
ID  Type1    Score1   Type2    Score2
1       A        10       C        20
2       B        20       C        40
3       A        30       D        60
4       B        40       D        80

我正在尋找的應該是這樣的

在此處輸入圖片說明

接下來,我想通過在圖表上添加更多類別來進一步說明這一點,因此現在數據框為

df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))
ID  Type1    Score1   Type2    Score2    Colour
1       A        10       C        20     Black
2       B        20       C        40     White
3       A        30       D        60     Black
4       B        40       D        80     White

對於這個,它應該看起來像這樣

在此處輸入圖片說明

當只有一種類型時,我通常會reshape數據並使用ggplot命令執行此ggplot (而且我在網上找到的大多數答案幾乎都只是這個答案)。但是,現在不止一種類型。 我不太確定該如何處理。 有什么建議么?

關於第一個請求,請嘗試:

df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))

ab_means <- tapply(df$Score1, df$Type1, mean)
cd_means <- tapply(df$Score2, df$Type2, mean)

par(xpd = TRUE)
bp <- barplot(c(ab_means, cd_means), main = 'Average', ylab = 'Score', font.lab = 2)
text(x = mean(bp[1:2,]), y = -10, labels = 'Score 1', font = 2)
text(x = mean(bp[3:4,]), y = -10, labels = 'Score 2', font = 2)

ns <- length(df$Score1[df$Type1 == 'A'])
ns1 <- length(df$Score1[df$Type1 == 'B'])
ns2 <- length(df$Score2[df$Type2 == 'C'])
ns3 <- length(df$Score2[df$Type2 == 'D'])

text(x = bp, y = c(ab_means, cd_means)+1.5, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))

在此處輸入圖片說明

關於更復雜的請求,請嘗試:

# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))


# Define plot region
par(mfcol = c(1,2), # two columns, one row
    xpd = TRUE, # let things be plotted outside of the plotting region
    mar = c(5, 4, 6, 2)) # margins of the plotting region


# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)

ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)

# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))

# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)

# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))


text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))

text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)


# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))

# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))

text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))

text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)


# Final title
title(main = 'Average Score', outer = TRUE, line = -3)

在此處輸入圖片說明

在最后一個情節中,注意一些條不會出現。 這是因為無法計算某些均值(請注意,我們無法獲得因子B和顏色'Black'mean )。

如果您確實希望x軸連續無中斷,則可以使用curve()

# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))


# Define plot region
par(mfcol = c(1,2), # two columns, one row
    xpd = TRUE, # let things be plotted outside of the plotting region
    mar = c(5, 4, 6, 2)) # margins of the plotting region


# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)

ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)

# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))

# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)

# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))


text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))

text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)

# First curve added
curve(expr = 0*x+0, from = 0, to = 6, add = TRUE, col = 'black')


# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))

# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))

text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))

text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)

# Second curve added
curve(expr = 0*x+0, from = -10, to = 5, add = TRUE, col = 'black')

# Final title
title(main = 'Average Score', outer = TRUE, line = -3)

在此處輸入圖片說明

請讓我知道它是否適合您,以及我是否可以進一步幫助您。 希望能幫助到你!

我試圖在這里創建一個tidyverse解決方案:

對於您的第一個數據框df

library(tidyverse)

#tidy the dataframe
df_1 <- df %>% select(ID, Type1, Score1) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_2 <- df %>% select(ID, Type2, Score2) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2))
df_tidy <- bind_rows(df_1, df_2) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number)

#summarise the dataframe - create means of scores by Type
df_sum <- df_tidy %>% group_by(Type, Score_number) %>% summarise(Mean_score = mean(Score))

#create plot
ggplot(df_sum, aes(x = Type, y = Mean_score, fill = Score_number)) +
  geom_bar(stat = "identity")

在此處輸入圖片說明

對於第二個數據幀df2

#tidy the dataframe
df_3 <- df2 %>% select(ID, Type1, Score1, Colour) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_4 <- df2 %>% select(ID, Type2, Score2, Colour) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2), Score = as.numeric(as.character(Score)))
df_tidy_2 <- bind_rows(df_3, df_4) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number, Colour)

#summarise the dataframe - create means of scores by Type
df_sum_2 <- df_tidy_2 %>% group_by(Type, Score_number, Colour) %>% summarise(Mean_score = mean(Score))

#create plot
ggplot(df_sum_2, aes(x = Type, y = Mean_score, fill = Score_number)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Colour, nrow = 1)

在此處輸入圖片說明

我的感覺是,這不是您真正想要的解決方案嗎? 我不清楚分析的目的是什么-您要回答的問題是什么。 無論如何,希望這會有所幫助。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM