簡體   English   中英

R:繪制線性回歸缺少連接線

[英]R: plotting linear regression misses connecting line

所以我試圖 plot 收入與是否有人戰略性地投票之間的線性回歸,但我的代碼不太有效。 當我繪制年齡和戰略投票之間的線性回歸時,它工作得很好,你可以在這里看到情節年齡

但是對我的收入變量使用相同的代碼不起作用 - 我有箱線圖但沒有連接線地塊收入

我希望能深入了解為什么它不適用於我的收入變量。 我覺得這可能與收入變量中引入的 NA 有關,但我無法修復它。 非常感謝您提前;)

用於年齡和戰略投票的代碼:

Alter_Strat2021<- Deskriptive_Statistik %>% 
  select(Q3, StrategischeWahl2021) %>% 
  ungroup %>%
  group_by(StrategischeWahl2021) %>%
  summarise(Q3 = mean(as.numeric(Q3)))

mean(Desk_NumericQ3) -> MeanAlter2021

Deskriptive_Statistik %>% 
  ungroup %>% mutate(Q3 = as.numeric(Q3)) %>%
  ggplot()+
  aes(x=StrategischeWahl2021, y=Q3)+
  geom_boxplot(width = .1)+
  geom_jitter(width = .1, alpha = .1)+
  geom_point(data= Alter_Strat2021,
             color="red",
             size=5,
             shape=17) + geom_line(data=Alter_Strat2021,
                                   group=1,
                                   color="red")+
  scale_y_continuous(breaks=seq(0,80,5))+
  xlab("Strategische Wahl 2021")+
  ylab ("Alter")+
  ggtitle ("Lineare Regression Zusammenhang zwischen Alter und Strategische Wahl 2021")+
  scale_x_discrete(labels=c("Nein", "Ja"))+
  theme_minimal()

用於收入和戰略投票的代碼:

Einkommen_Strat2021<- Deskriptive_Statistik %>% 
  select(Q5, StrategischeWahl2021) %>% 
  ungroup %>%
  group_by(StrategischeWahl2021) %>%
  summarise(Q5 = mean(as.numeric(Q5)))

mean(Desk_NumericQ5) -> MeanEinkommen

Deskriptive_Statistik %>% 
  ungroup %>% mutate(Q5 = as.numeric(Q5)) %>%
  ggplot()+
  aes(x=StrategischeWahl2021, y=Q5)+
  geom_boxplot(width = .1)+
  geom_jitter(width = .1, alpha = .1)+
  scale_y_continuous(breaks=seq(0,20000,5000))+
  geom_point(data= Einkommen_Strat2021,
             color="red",
             size=5,
             shape=17) + geom_line(data=Einkommen_Strat2021,
                                   group=1,
                                   color="red")+
  xlab("Strategische Wahl 2021")+
  ylab ("Einkommen")+
  ggtitle ("Lineare Regression Zusammenhang zwischen Einkommen und Strategische Wahl 2021")

年齡和戰略投票數據:

dput(Alter_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q3 = c(26.8603351955307, 
27.6375)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L))
> dput(Desk_NumericQ3)
c(24, 20, 20, 19, 21, 33, 27, 20, 53, 31, 21, 22, 21, 20, 25, 
21, 24, 29, 53, 20, 21, 22, 48, 28, 20, 23, 29, 29, 23, 41, 29, 
21, 29, 47, 23, 53, 34, 19, 23, 24, 29, 29, 20, 22, 29, 25, 21, 
22, 29, 20, 30, 21, 23, 19, 23, 18, 25, 22, 28, 25, 22, 21, 24, 
24, 29, 55, 20, 20, 21, 20, 28, 22, 21, 22, 20, 31, 22, 20, 31, 
22, 22, 30, 20, 22, 18, 23, 55, 22, 25, 25, 21, 39, 22, 20, 49, 
58, 20, 19, 21, 22, 29, 23, 32, 35, 20, 20, 21, 28, 24, 28, 60, 
70, 43, 21, 25, 60, 34, 54, 24, 25, 23, 21, 48, 20, 25, 24, 21, 
25, 22, 24, 21, 22, 21, 18, 22, 21, 22, 18, 19, 71, 23, 26, 18, 
24, 21, 51, 37, 41, 23, 25, 22, 35, 21, 18, 22, 29, 26, 21, 22, 
23, 43, 22, 23, 22, 21, 69, 20, 25, 54, 20, 26, 28, 23, 28, 38, 
21, 22, 78, 23, 25, 25, 63, 32, 33, 20, 21, 20, 23, 21, 24, 19, 
24, 37, 21, 26, 24, 21, 23, 21, 19, 22, 22, 25, 20, 22, 22, 19, 
30, 19, 22, 19, 26, 23, 25, 21, 36, 25, 22, 23, 22, 23, 22, 20, 
21, 29, 22, 19, 22, 22, 60, 29, 21, 20, 21, 23, 21, 23, 19, 60, 
59, 20, 23, 60, 23, 24, 22, 22, 27, 23, 19, 22, 18, 21, 22, 19, 
68, 26, 21, 20)

用於收入和戰略投票的數據:

dput(Einkommen_Strat2021)
structure(list(StrategischeWahl2021 = c("0", "1"), Q5 = c(NA_real_, 
NA_real_)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-2L))
> dput(Desk_NumericQ5)
structure(c(900, 400, 6000, 4600, 3700, 800, 10000, 1300, 2300, 
0, 670, 2500, 0, 8500, 2700, 2000, 1000, 1500, 180, 0, 1300, 
450, 4000, 1100, 8000, 3000, 861, 5000, 1250, 2600, 6000, 450, 
1400, 450, 4800, 4900, 0, 500, 2500, 1, 2400, 2500, 1700, 0, 
750, 450, 3400, 1300, 13000, 1400, 1400, 2700, 150, 2100, 8000, 
0, 12000, 600, 450, 4000, 1000, 0, 2000, 600, 0, 2531, 800, 1200, 
500, 1100, 0, 2950, 4000, 1500, 1800, 450, 8600, 7000, 750, 0, 
5000, 900, 4000, 2000, 6000, 800, 3500, 4000, 3000, 4500, 400, 
450, 4000, 1600, 3300, 2500, 1500, 815, 2800, 3500, 100, 2500, 
300, 500, 1749, 700, 1250, 450, 1200, 700, 426, 900, 0, 0, 1500, 
0, 1250, 1700, 700, 200, 4000, 5500, 3200, 0, 600, 1389, 5000, 
900, 600, 3100, 2000, 850, 1535, 1400, 2500, 850, 0, 2700, 777, 
700, 5500, 2350, 6000, 219, 3000, 3000, 0, 1500, 1800, 0, 1900, 
1600, 2600, 1200, 1000, 2700, 5600, 650, 1200, 450, 15000, 800, 
2600, 200, 2300, 2400, 600, 0, 0, 1300, 450, 800, 800, 2000, 
0, 2500, 200, 3500, 500, 1600, 20000, 0, 2800, 10000, 700, 4500, 
1100, 1200, 8000, 3500, 860, 1000, 1800, 5000, 1000, 600, 950, 
0, 3000, 2400, 1600, 1500, 900, 2500, 2300, 1700, 4500, 250, 
0, 450, 0, 600, 2800, 1200, 1600), na.action = structure(c(2L, 
3L, 10L, 17L, 24L, 32L, 43L, 47L, 53L, 61L, 62L, 73L, 75L, 76L, 
79L, 80L, 85L, 91L, 94L, 98L, 104L, 105L, 107L, 143L, 144L, 153L, 
160L, 165L, 170L, 179L, 184L, 185L, 197L, 200L, 204L, 211L, 219L, 
220L, 221L, 228L, 240L, 241L, 246L, 251L), class = "omit"))

編輯:數據Descriptive_Statistik

Deskriptive_Statistik <- Deskriptive_Statistik %>% 
+   select(Q3, Q5, StrategischeWahl2021)
> dput(Deskriptive_Statistik)
structure(list(Q3 = c("24", "20", "20", "19", "21", "33", "27", 
"20", "53", "31", "21", "22", "21", "20", "25", "21", "24", "29", 
"53 ", "20", "21", "22", "48", "28", "20", "23", "29", "29", 
"23", "41", "29", "21", "29", "47", "23", "53", "34", "19", "23", 
"24", "29", "29", "20", "22", "29", "25", "21", "22", "29", "20", 
"30", "21", "23", "19", "23", "18", "25", "22", "28", "25", "22", 
"21", "24", "24", "29", "55", "20", "20", "21", "20", "28", "22", 
"21", "22", "20", "31", "22", "20", "31", "22", "22", "30", "20", 
"22", "18", "23", "55", "22", "25", "25", "21", "39", "22", "20", 
"49", "58", "20", "19", "21", "22", "29", "23", "32", "35", "20", 
"20", "21", "28", "24", "28", "60", "70", "43", "21", "25", "60", 
"34", "54", "24", "25", "23", "21", "48", "20", "25", "24", "21", 
"25", "22", "24", "21", "22", "21", "18", "22", "21", "22", "18", 
"19", "71", "23", "26", "18", "24", "21", "51", "37", "41", "23", 
"25", "22", "35", "21", "18", "22", "29", "26", "21", "22", "23", 
"43", "22", "23", "22", "21", "69", "20", "25", "54", "20", "26", 
"28", "23", "28", "38", "21", "22", "78", "23", "25", "25", "63", 
"32", "33", "20", "21", "20", "23", "21", "24", "19", "24", "37", 
"21", "26", "24", "21", "23", "21", "19", "22", "22", "25", "20", 
"22", "22", "19", "30", "19", "22", "19", "26", "23", "25", "21", 
"36", "25", "22", "23", "22", "23", "22", "20", "21", "29", "22", 
"19", "22", "22", "60", "29", "21", "20", "21", "23", "21", "23", 
"19", "60", "59", "20", "23", "60", "23", "24", "22", "22", "27", 
"23", "19", "22", "18", "21", "22", "19", "68", "26", "21", "20"
), Q5 = c("900", "800", "Verstehe die Frage nicht ", "400", "6000", 
"4600", "3700", "800", "10000", "-", "1300", "2300", "0", "670", 
"2500", "0", "-", "8500", "2700 ", "2000", "1000", "1500", "180", 
"4300", "0.00", "1300", "450", "4000", "1100", "8000", "3000", 
"2000", "861", "5000", "1250 ", "2600", "6000", "450", "1400", 
"450", "4800", "4900", "-", "0", "500", "2500", "1000", "1", 
"2400", "2500", "1700", "0", "1700", "750", "450", "3400", "1300", 
"13000", "1400", "1400", "-", "800", "2700", "150", "2100", "8000", 
"0", "12000", "600", "450", "4000", "1000", "1500", "0", "-", 
"-", "2000", "600", "-", "0", "0", "2531", "800", "1200", "-", 
"500", "1100", "0", "2950", "4000", "4500", "1500", "1800", "350", 
"450", "8600", "7000", "-", "750", "0", "5000", "900", "4000", 
"-", "420", "2000", "-", "6000", "800", "3500", "4000", "3000", 
"4500", "400", "450", "4000", "1600", "3300", "2500", "1500", 
"815", "2800", "3500", "100", "2500", "300", "500", "1749", "700", 
"1250", "450", "1200", "700", "426", "900", "0", "0", "1500", 
"0", "1250", "1700", "700", "-", "-", "200", "4000", "5500", 
"3200", "0", "600", "1389", "5000", "-", "900", "600", "3100", 
"2000", "850", "1535", "450", "1400", "2500", "850", "0", "250", 
"2700", "777", "700", "5500", "-", "2350", "6000", "219", "3000", 
"3000", "0 ", "1500", "1800", "-", "0", "1900", "1600", "2600", 
"3600", "900", "1200", "1000", "2700", "5600", "650", "1200", 
"450", "15000", "800", "2600", "200", "2400", "2300", "2400", 
"-", "600", "0", "0", "1900", "1300", "450", "800", "800", "2000", 
"0", "Keine Ahnung ", "2500", "200", "3500", "500", "1600", "20000", 
"0", "-", "3750", "2400", "2800", "10000", "700", "4500", "1100", 
"1200", "860", "8000", "3500", "860", "1000", "1800", "5000", 
"1000", "600", "950", "0", "3000", "4000", "0", "2400", "1600", 
"1500", "900", "300", "2500", "2300", "1700", "4500", "40", "250", 
"0", "450", "0", "600", "2800", "1200", "1600"), StrategischeWahl2021 = c("0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", 
"0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "1", "0", 
"0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", 
"0", "0", "0", "0", "1", "0", "1", "1", "0", "1", "1", "0", "1", 
"0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", "1", 
"1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", 
"1", "0", "0", "1", "0", "1", "0", "0", "1", "0", "1", "1", "0", 
"0", "0", "1", "1", "0", "1", "0", "1", "1", "0", "0", "0", "0", 
"0", "1", "0", "0", "0", "1", "0", "1", "1", "1", "1", "0", "1", 
"1", "0", "0", "0", "0", "1", "0", "1", "0", "0", "0", "1", "0", 
"0", "0", "1", "1", "0", "0", "1", "0", "1", "0", "0", "0", "0", 
"0", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0", "1", "0", 
"1", "0", "0", "0", "1", "0", "1", "0", "0", "1", "0", "0", "1", 
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "1", "0", 
"1", "0", "0", "0", "0", "0", "1", "0", "1", "0", "1", "0", "0", 
"1", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", 
"1", "0", "0", "1", "0", "1", "0", "0", "0", "0", "0", "1", "0", 
"1", "1", "0", "1", "0", "0", "1", "0", "0", "0", "0", "1", "0", 
"1", "0", "1", "1", "1", "0", "0", "0", "0", "1", "0", "0", "0", 
"1", "1", "0", "0", "0", "1", "1", "0", "0", "0", "0")), row.names = 3:261, class = "data.frame")

並非 Q5 列中的所有數據都是數字,因此當您執行as.numeric(Q5)時,它會在列中生成一些 NA。
NA 的平均值是 NA。

要修復它,請使用: summarize(Q5 = mean(as.numeric(Q5), na.rm=TRUE))

Einkommen_Strat2021<- Deskriptive_Statistik %>% 
   select(Q5, StrategischeWahl2021) %>% 
   ungroup %>%
   group_by(StrategischeWahl2021) %>%
   summarise(Q5 = mean(as.numeric(Q5), na.rm=TRUE))

Einkommen_Strat2021

# A tibble: 2 × 2
  StrategischeWahl2021    Q5
  <chr>                <dbl>
1 0                    2229.
2 1                    1917.

現在 ggplot function 調用按預期工作。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM