簡體   English   中英

如何為 ggplot2 的箱線圖添加權重?

[英]How do you add weights to a boxplot with ggplot2?

我正在從事一個項目,該項目涉及分析屬於特定領土區域(自治區)的活躍人口的收入。 我需要用給我和 ggplot2 的樣本權重創建一個直方圖。但是,當我嘗試將參數“權重”實現到美學時,它不起作用,因為無論我是否包含參數“權重”,它繪制相同的圖形。 除此之外,我不知道如何添加加權平均值,因為我的圖表甚至沒有考慮樣本的權重

這是從領土區域生成所有數據的代碼:

rm(list=ls(all=TRUE))
if (!require(sae)) install.packages("sae")
library(sae)
data(incomedata)
help("incomedata")
set.seed(100452840)
cual = sample(1:17,1) 
(cual)
datosECV=incomedata
datosECVmas16 = subset(datosECV, (datosECV$labor>0))
datosECVmas16$age = datosECVmas16$age - 1
nrows = dim(datosECVmas16)[[1]]
datosECVmas16$horas = round(rnorm(nrows,34,3), 1)
datosECVmas16$horas[(datosECVmas16$labor==2) | (datosECVmas16$labor == 3)] = 0
datosECVmas16$income = round(jitter(datosECVmas16$income),1)
datosECVmas16$income[datosECVmas16$labor==2] = datosECVmas16$income[datosECVmas16$labor==2]*0.7
datosECVmas16$income[datosECVmas16$labor==3] = 0
datosFinal = 
  data.frame(ca=datosECVmas16$ac, prov=datosECVmas16$prov, 
             provlab=datosECVmas16$provlab, gen=datosECVmas16$gen, 
             edad=datosECVmas16$age, nac=datosECVmas16$nat, 
             neduc=datosECVmas16$educ, sitemp=datosECVmas16$labor, 
             ingnorm=datosECVmas16$income, horas=datosECVmas16$horas,
             factorel=round(datosECVmas16$weight,1))
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]
if(cual == 1) {
  write.table(datos_Andalucia,"datos_Andalucia.txt",row.names=FALSE)
} else if(cual == 2) {
  write.table(datos_Aragon,"datos_Aragon.txt",row.names=FALSE)
} else if(cual == 3) {
  write.table(datos_Asturias,"datos_Asturias.txt",row.names=FALSE)
} else if(cual == 4) {
  write.table(datos_Baleares,"datos_Baleares.txt",row.names=FALSE)
} else if(cual == 5) {
  write.table(datos_Canarias,"datos_Canarias.txt",row.names=FALSE)
} else if(cual == 6) {
  write.table(datos_Cantabria,"datos_Cantabria.txt",row.names=FALSE)
} else if(cual == 7) {
  write.table(datos_CastillaLeon,"datos_CastillaLeon.txt",row.names=FALSE)
} else if(cual == 8) {
  write.table(datos_CastillaLaMancha,"datos_CastillaLaMancha.txt",row.names=FALSE)
} else if(cual == 9) {
  write.table(datos_Catalunya,"datos_Catalunya.txt",row.names=FALSE)
} else if(cual == 10) {
  write.table(datos_ComValenciana,"datos_ComValenciana.txt",row.names=FALSE)
} else if(cual == 11) {
  write.table(datos_Extremadura,"datos_Extremadura.txt",row.names=FALSE)
} else if(cual == 12) {
  write.table(datos_Galicia,"datos_Galicia.txt",row.names=FALSE)
} else if(cual == 13) {
  write.table(datos_ComMadrid,"datos_ComMadrid.txt",row.names=FALSE)
} else if(cual == 14) {  write.table(datos_RegMurcia,"datos_RegMurcia.txt",row.names=FALSE)
} else if(cual == 15) {
  write.table(datos_ComForalNavarra,"datos_ComForalNavarra.txt",row.names=FALSE)
} else if(cual == 16) {
  write.table(datos_PaisVasco,"datos_PaisVasco.txt",row.names=FALSE)
} else {
  write.table(datos_Rioja,"datos_Rioja.txt",row.names=FALSE)
}

datosFinal=datosFinal[-12086,]
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]

datosFinal=datosFinal[datosFinal$sitemp<3,]
datos_ComValenciana = datosFinal[datosFinal[,1]==10,]

N <- sum(datos_ComValenciana[,"factorel"])

重要說明:權重是數據集第 11 列給出的權重(本例中的數據集稱為 datos_ComValenciana)。 該專欄是名為“factorel”的專欄。 “Ingnorm”是不同人的不同收入。

在此處輸入圖像描述

這段代碼 belowe 應該完成工作但沒有:

ggplot(data = datos_ComValenciana, aes(x = ingnorm,y = ..density..,weight=factorel)) +
  geom_histogram(fill="#5DC863FF",alpha=0.6,col="black",bins=18)+
  xlab("Ingresos normalizados")+
  ylab("Cuenta")+
  scale_fill_viridis(alpha=1,discrete=TRUE, option="D")+
  ggtitle("Income without the weights")`

您可以直接在aes() function 中應用任何權重,就像這樣(我用mtcars做了一個玩具示例):

library(ggplot2)
library(viridis)
 
data("mtcars")

mtcars$wt_norm <- mtcars$wt / mean(mtcars$wt)

ggplot(data = mtcars, aes(x = mpg * wt_norm, y = after_stat(density))) +
  geom_histogram(fill="#5DC863FF", alpha = 0.6, col = "black", bins = 18)+
  xlab("mpg normalizado")+
  ylab("Cuenta")+
  scale_fill_viridis(alpha = 1, discrete = TRUE, option = "D") +    
 ggtitle("Consumo normalizado por peso")

在此處輸入圖像描述

這會產生與以下不同的結果:

ggplot(data = mtcars, aes(x = mpg, y = after_stat(density))) +
  geom_histogram(fill="#5DC863FF", alpha = 0.6, col = "black", bins = 18)+
  xlab("mpg")+
  ylab("Cuenta")+
  scale_fill_viridis(alpha = 1, discrete = TRUE, option = "D") + 
  ggtitle("Consumo")

在此處輸入圖像描述

我絕不是 ggplot 方面的專家,但是參數weights ,如果它是有效的,它似乎不起作用。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM