ggplot - 在 R 中的 x 轴上绘制类别的平均值

Question

晚上好，

这是我的第一个问题，所以请善待。

我想使用 R Studio 分析包含 150 多个列和 300 行的数据集，但我是新手。 我的问题是我想用 ggplot 绘制折线图或条形图。 不幸的是，我无法在 x 轴上绘制类别 i 与该类别的平均值（带性别）（无论是否使用 plot 或 ggplot）。 另一个问题是替换“。” 在图表的标题（列名）中。

附上此问题的主要代码以及使用 Excel 的图表图片（作为示例）。 在最好的情况下，我的代码将为每个标题类别（列名的前两个数字）创建一个带有子类别（后两个数字）的图表。 但一开始我试图用一个类别绘制一个图表，但没有奏效。

我会对反馈或提示感到高兴，因为它不可能那么难，但我还没有找到任何东西。

提前谢谢了。

PS：桑迪对这个问题的评论对我不起作用。

Roh_daten <- data.frame(Age=c(25,22,23,21,21,18),Geschlecht=c("m","w","m","m","m","m"),Test.Kette_01_01 = c(6,5,5,4,5,5),Test.String_01_02=c(2,5,5,3,3,4),Testchar_02_01 = c(0,5,5,4,6,6))
Laufzahl_i <- 1
Farbe_m="blue"#willkürlich festgelegt
Farbe_w="red"#willkürlich festgelegt

library(ggplot2)
library(stringr)

Links = function(text, num_char) {
  substr(text, 1, num_char)
}
Rechts = function(text, num_char) {
  substr(text, nchar(text) - (num_char-1), nchar(text))
}

for(i in 2:ncol(Roh_daten)) #nicht 1 da dies nur die ID ist
{
  #print(colnames(Roh_daten[i]))
  if(i==ncol(Roh_daten)) break()

  #colnames(Roh_daten[i]) <- c(String_in_string_replace(colnames(Roh_daten[i]),"\\.","\\ ","All"))

  if(all.equal(Roh_daten[,i], as.integer(Roh_daten[,i]))==TRUE)
  {
    assign(paste(colnames(Roh_daten[i]),"test_men",sep = "_"),mean(Roh_daten[,i][Roh_daten$Geschlecht == "m"],na.rm = TRUE))#erstellt aus dem paste String eine Variable
    assign(paste(colnames(Roh_daten[i]),"test_woman",sep = "_"),mean(Roh_daten[,i][Roh_daten$Geschlecht == "w"],na.rm = TRUE))
    assign(paste(colnames(Roh_daten[i]),"test_m_w",sep = "_"),mean(subset(Roh_daten[,i],Roh_daten$Geschlecht == "m" | Roh_daten$Geschlecht == "w"),na.rm = TRUE))

    if(Links(Rechts(colnames(Roh_daten[i]),5),2) == Links(Rechts(colnames(Roh_daten[i-1]),5),2)){#nur wenn stimmt alle -1
      #print(Links(Rechts(colnames(Roh_daten[i-1]),5),2))
      Laufzahl_i=Laufzahl_i+1
      if(Links(Rechts(colnames(Roh_daten[i]),5),2) == Links(Rechts(colnames(Roh_daten[i+1]),5),2)){#letztes element von alle mit der bed. von oben
      }else{
        #print(c("Es wurde ", Laufzahl_i, " Mal der gleiche Bereich erkannt."))
        Laufzahl_i <- 1

        Var_name_m <-  paste(colnames(Roh_daten[i]),"test_men",sep = "_")
        Var_name_w <-  paste(colnames(Roh_daten[i]),"test_woman",sep = "_")

        plot(get(Var_name_m),t="b",col=Farbe_m,ylim = c(0,6),yaxt="n",main = Links(Var_name_m,str_locate(Var_name_m,"_")-1),ylab="Wichtigkeit")
        text(x=get(Var_name_m),labels = as.character(round(get(Var_name_m),digits = 2)),pos=2,col = Farbe_m)
        text(x=get(Var_name_w),labels = as.character(round(get(Var_name_w),digits = 2)),pos=4,col = Farbe_w)
        axis(2, at = seq(0, 6, by = 0.5), las=2)
        legend(x ="topleft", legend = c("m","w"),col=c(Farbe_m, Farbe_w), bty = "o")
        points(get(Var_name_w),t="b",col=Farbe_w,ylim = c(0,6))

        p <- ggplot(data=Roh_daten[i],aes(x=get(Var_name_m),y=get(Var_name_m))) + #xlab(colnames(Roh_daten[,i]))
          #geom_line(linetype=2) +
          geom_point(size=1,col=Farbe_m) +
          geom_point(size=1,col=Farbe_w,aes(y=get(Var_name_w))) +
          theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5))
          #geom_bar(stat="identity")
          #scale_y_continuous(breaks = seq(1,6,by=1)) 
        p
#ggplot(data=Roh_daten[i],aes(x=get(Var_name_m),y=get(Var_name_m))) + stat_summary(fun.y=mean, geom = "point")
      }
    }

  }else {
    print(paste(colnames(Roh_daten[i])," hat einen Fehler (String)"))
  }
}
p

Excel 图表 - 示例

Answer 1

问题 1：绘制每个类别的每个性别的平均值

我不确定这是否正是您所要求的，但根据我的理解，您希望获得与 excel 相同的图。 Breifly，将每个类别的每个性别的平均值绘制为一条线或一个条形图，并在其上显示平均值。

根据您提供的示例，您可以使用dplyr和tidyr库根据性别对每列进行平均，并让它们重塑以在ggplot绘图。 在这里您可以按步骤进行操作：

首先，根据性别获取每列的平均值：

library(dplyr)
Roh_daten %>% 
  group_by(Geschlecht) %>% 
  summarise_all(.funs = mean) 

# A tibble: 2 x 5
  Geschlecht   Age Test.Kette_01_01 Test.String_01_02 Testchar_02_01
  <fct>      <dbl>            <dbl>             <dbl>          <dbl>
1 m           21.6                5               3.4            4.2
2 w           22                  5               5              5

接下来，我们要重塑这些数据以匹配要使用的ggplot2语法（简要总结，x 值的唯一列，y 值的唯一列，以及每个类别的列），因此您可以使用该函数pivot_longer从tidyr ：

library(dplyr)
library(tidyr)
Roh_daten %>% 
  group_by(Geschlecht) %>% 
  summarise_all(.funs = mean) %>% 
  pivot_longer(., -c(Geschlecht, Age), names_to = "Variable", values_to = "Value")

# A tibble: 6 x 4
  Geschlecht   Age Variable          Value
  <fct>      <dbl> <chr>             <dbl>
1 m           21.6 Test.Kette_01_01    5  
2 m           21.6 Test.String_01_02   3.4
3 m           21.6 Testchar_02_01      4.2
4 w           22   Test.Kette_01_01    5  
5 w           22   Test.String_01_02   5  
6 w           22   Testchar_02_01      5

最后，我们可以使用ggplot2得到这样的条形图：

library(dplyr)
library(tidyr)
library(ggplot2)
Roh_daten %>% 
  group_by(Geschlecht) %>% 
  summarise_all(.funs = mean) %>% 
  pivot_longer(., -c(Geschlecht, Age), names_to = "Variable", values_to = "Value") %>%
  ggplot(., aes(x = Variable, y = Value, group = Geschlecht))+
  geom_bar(stat = "identity", aes(fill = Geschlecht), position = position_dodge())+
  theme(legend.position = "top")+
  geom_label(aes(label = Value), position = position_dodge(0.9), vjust = -0.5)+
  ylim(0,5.5)

或者像这样获取线和点（库ggrepel将有助于显示标签而不相互重叠：

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
Roh_daten %>% 
  group_by(Geschlecht) %>% 
  summarise_all(.funs = mean) %>% 
  pivot_longer(., -c(Geschlecht, Age), names_to = "Variable", values_to = "Value") %>%
  ggplot(., aes(x = Variable, y = Value, color = Geschlecht, group = Geschlecht))+
  geom_point()+
  geom_line()+
  theme(legend.position = "top")+
  geom_label_repel(aes(label = Value), vjust = -0.5)

这是你正在寻找的那种情节吗？ 如果没有，您能否澄清您的问题，因为我不了解您的所有代码。

问题 2：替换列名中的点

关于您关于替换“。”的第二个问题。 在数据集的列名中，您可以使用库rebus ：

library(rebus)
gsub(DOT,"-", colnames(Roh_daten))

[1] "Age"               "Geschlecht"        "Test-Kette_01_01"  "Test-String_01_02" "Testchar_02_01"

我希望它能回答你的问题。

ggplot - 在 R 中的 x 轴上绘制类别的平均值

问题描述

1 个解决方案

解决方案1
0 已采纳 2019-12-29 06:52:20

ggplot - 在 R 中的 x 轴上绘制类别的平均值

问题描述

1 个解决方案

解决方案1 0 已采纳 2019-12-29 06:52:20

解决方案1
0 已采纳 2019-12-29 06:52:20