简体   繁体   中英

How to correct plot width + correct labels position in R (ggplot2)

I need your help, I'm making my project in R markdown and I'm having some problems with it.

This plot is very strange, because of some long names like the second one the plot is very slim and makes it look weird and the tittle is incomplete. I tried using out.with = "100%" in the plot but it still is very strange

Other problem I have is that here the first bar is very long so I can't put the labels on the right. But the bottom bars are very short and I can't put them inside the bars. How can I solve this

在此处输入图像描述

Here is the dataset: https://www.kaggle.com/datasets/thedevastator/uncovering-insights-to-college-majors-and-their?select=all-ages.csv

Here is my whole code (images plot code is identified by big arrow)

install.packages("tidyverse")
install.packages("scales")
require(tidyverse)
require(scales)

collegeData <- read_csv("all-ages.csv")
collegeWomenData <-read_csv("women-stem.csv")
head(collegeData)
str(collegeData)


                #Number students


##Number of students per major category     #Melhorar labels  #V
collegeData %>%
  group_by(Major_category) %>% 
  summarize(Total= sum(Total)) %>% 
  ggplot() +
  geom_col(mapping=aes(x=Total, y= reorder(Major_category, Total)),fill="darkblue")+
  labs(title= "Number of students per Major Category")+ xlab("Total of students") + ylab("Major category")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)))+ theme(axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)))+
  scale_x_continuous(labels = comma)+
  geom_text(aes(x = Total, y = reorder(Major_category, Total), label= format(Total,digits=5)), color="white",fontface="bold",hjust = 1.1)

  
  

#Top 15 majors with more students 
collegeData %>%
  arrange(desc(Total)) %>% 
  slice(1:15) %>% 
  ggplot() +
  geom_col(mapping=aes(x=Total, y= reorder(Major, Total)), fill="darkblue")+
  labs(title= "Number of students per Major Category")+ xlab("Total of students") + ylab("Major category")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  scale_x_continuous(labels = comma)+
  geom_text(aes(x = Total, y = reorder(Major, Total), label= format(Total,digits=5)), color="white",fontface="bold",hjust = 1.1)

#Top 15 majors with less students    # Second IMAGE <-------------------------------------------------- 
collegeData %>%
  arrange(Total) %>% 
  slice(1:15) %>% 
  ggplot() +
  geom_col(mapping=aes(x=Total, y= reorder(Major, Total)),fill="darkblue")+
  labs(title= "Number of students per Major Category")+ xlab("Total of students") + ylab("Major category")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  scale_x_continuous(labels = comma)+
  geom_text(aes(x = Total, y = reorder(Major, Total), label= format(Total,digits=5)), color="white",fontface="bold",hjust = 1.1)


                                #Women


#Percentage of female by major STEM category
collegeWomenData %>% 
  group_by(Major_category) %>%
  summarize(ShareWomen=mean(ShareWomen)) %>% 
  ggplot()+
  geom_col(mapping=aes(x=ShareWomen, y= reorder(Major_category, ShareWomen)),fill="darkblue")+
  labs(title= "Percentage of females by STEM category")+ xlab("Percentage of females") + ylab("Category")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = ShareWomen, y = reorder(Major_category, ShareWomen), label= paste0(format(ShareWomen*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)


#Top 15 majors with least females
collegeWomenData %>% 
  arrange(ShareWomen) %>% 
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=ShareWomen, y= reorder(Major, ShareWomen)),fill="darkblue")+
  labs(title= "Top 15 STEM majors with the lowest female percentage")+ xlab("Percentage of females") + ylab("Major")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = ShareWomen, y = reorder(Major, ShareWomen), label= paste0(format(ShareWomen*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)

#Top 15 majors with most females
collegeWomenData %>% 
  arrange(desc(ShareWomen)) %>% 
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=ShareWomen, y= reorder(Major, ShareWomen)),fill="darkblue")+
  labs(title= "Top 15 STEM majors with the highest female percentage")+ xlab("Percentage of females") + ylab("Major")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = ShareWomen, y = reorder(Major, ShareWomen), label= paste0(format(ShareWomen*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)

  
                        #Unemployment rate

#Unemployment rate per major category   
collegeData %>%
  group_by(Major_category) %>% 
  summarise(average_rate= mean(Unemployment_rate)) %>% 
  ggplot()+
  geom_col(mapping=aes(x=average_rate, y=reorder(Major_category, average_rate)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Unemployment rate per major category") + xlab("Unemployment rate")+ ylab("Major category")+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = average_rate, y = reorder(Major_category, average_rate), label= paste0(format(average_rate*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)
  

#Top 15 major with biggest Unemployment rate
collegeData %>%
  group_by(Major) %>%   
  summarise(average_rate= mean(Unemployment_rate)) %>%
  arrange(desc(average_rate)) %>%
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=average_rate, y=reorder(Major, average_rate)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)) , axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Top 15 majors with the biggest unemployment rate") + xlab("Unemployment rate")+ ylab("Major")+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = average_rate, y = reorder(Major, average_rate), label= paste0(format(average_rate*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)

#Top 15 major with smallest Unemployment rate
collegeData %>%
  group_by(Major) %>%   
  summarise(average_rate= mean(Unemployment_rate)) %>%
  arrange(average_rate) %>%
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=average_rate, y=reorder(Major, average_rate)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)) , axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Top 15 majors with the smallest unemployment rate") + xlab("Unemployment rate")+ ylab("Major")+
  scale_x_continuous(labels=scales::percent_format())+
  geom_text(aes(x = average_rate, y = reorder(Major, average_rate), label= paste0(format(average_rate*100,digits=3), "%")), color="white",fontface="bold",hjust = 1.1)


                #Salary


#Salary per major category   
collegeData %>%
  group_by(Major_category) %>% 
  summarise(average_salary= mean(Median)) %>% 
  ggplot()+
  geom_col(mapping=aes(x=average_salary, y=reorder(Major_category, average_salary)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)) , axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Average salary per major category") + xlab("Salary")+ ylab("Major category") + scale_x_continuous(labels=scales::dollar_format())+
  geom_text(aes(x = average_salary, y = reorder(Major_category, average_salary), label= paste0("$", format(average_salary,digits=5))), color="white",fontface="bold",hjust = 1.1)

#Top15 worst paying majors
collegeData %>%
  arrange(Median) %>% 
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=Median, y=reorder(Major, Median)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)) , axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Top 15 major with lowest average salaries") + xlab("Salary")+ ylab("Major") + scale_x_continuous(labels=scales::dollar_format())+
  geom_text(aes(x = Median, y = reorder(Major, Median), label= paste0("$",Median)), color="white",fontface="bold",hjust = 1.1)


#Top 15 best paying majors      #FIRST IMAGE <----------------------------------------------------
collegeData %>%
  arrange(desc(Median)) %>% 
  slice(1:15) %>% 
  ggplot()+
  geom_col(mapping=aes(x=Median, y=reorder(Major, Median)), fill="darkblue")+
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)) , axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),plot.title = element_text(hjust = 0.5))+
  labs(title= "Top 15 major with highest average salaries") + xlab("Salary")+ ylab("Major") + scale_x_continuous(labels=scales::dollar_format())+
  geom_text(aes(x = Median, y = reorder(Major, Median), label= paste0("$",Median)), color="white",fontface="bold",hjust = 1.1)

For the width problem I tried out.with = "100%" and other values. For the labels problem I tried putting the values in different places (left, middle, right) but none seem to be visually ok

You could do many things, but I don't know what works best for you.

First, you can use expand in scale_ _continuous (whether for x or y) to expand the plot limits.

For a long title, you could add a carriage return (AKA new line, next line, press enter ). Another option when using plots like this (horizontal bars) is to center titles over the entire space. Check out the additional call in theme .

For long names, you could modify them. In this case, for some plots, the text is all uppercase. In others, you've got mixed cases. It fits better if you take the plot that's all uppercase and reformat the text. (Note the addition of mutate in the data and the call for the library stringr .)

collegeWomenData %>% 
  arrange(ShareWomen) %>% 
  mutate(Major = str_to_title(Major)) %>% 
  slice(1:15) %>% 
  ggplot(aes(x = reorder(Major, ShareWomen), y = ShareWomen,
             label = label_percent()(ShareWomen))) +
  geom_col(fill = "darkblue") +
  geom_text(hjust = 0, nudge_y = .01) + 
  labs(title= "Top 15 STEM majors with the lowest female percentage") + 
  ylab("Percentage of females") + xlab("Major") +
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
        axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),
        plot.title = element_text(hjust = 0.5),
        plot.title.position = "plot")+
  scale_y_continuous(labels = percent_format(),
                     expand = expansion(mult = c(0, .3))) +
  coord_flip()

在此处输入图像描述

You can apply these same principles for the other plot you used in your question.

collegeData %>%
  group_by(Major_category) %>% 
  summarize(Total = sum(Total)) %>% 
  ggplot(aes(x = reorder(Major_category, Total), y = Total, label = Total)) +
  geom_col(fill = "darkblue") +
  geom_text(hjust = 0, nudge_y = 100000) +
  labs(title= "Number of students per Major Category") +
  ylab("Total of students") + xlab("Major category") +
  theme(axis.title.x = element_text(margin = margin(t = 20, r = 0, b = 0, l = 0)),
        axis.title.y = element_text(margin = margin(t = 0, r = 25, b = 0, l = 0)),
        plot.title = element_text(hjust = 0.5),
        plot.title.position = "plot") + 
  scale_y_continuous(labels = comma,
                     expand = expansion(mult = c(0, .25))) +
  coord_flip()

在此处输入图像描述

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM