I am working with the R programming language. I downloaded the following data on world populations and tried to make a histogram:
#data source: https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv
`WPP2019_TotalPopulationBySex.(1)` <- read.csv("C:/Users/ME/Downloads/WPP2019_TotalPopulationBySex (1).csv")
world_data = WPP2019_TotalPopulationBySex.(1)
> head(world_data)
LocID Location VarID Variant Time MidPeriod PopMale PopFemale PopTotal PopDensity
1 4 Afghanistan 2 Medium 1950 1950.5 4099.243 3652.874 7752.117 11.874
2 4 Afghanistan 2 Medium 1951 1951.5 4134.756 3705.395 7840.151 12.009
3 4 Afghanistan 2 Medium 1952 1952.5 4174.450 3761.546 7935.996 12.156
4 4 Afghanistan 2 Medium 1953 1953.5 4218.336 3821.348 8039.684 12.315
5 4 Afghanistan 2 Medium 1954 1954.5 4266.484 3884.832 8151.316 12.486
6 4 Afghanistan 2 Medium 1955 1955.5 4318.945 3952.047 8270.992 12.669
world_data$PopMale = as.numeric(substr(world_data$PopMale,1,1))
world_data$PopFemale = as.numeric(substr(world_data$PopFemale,1,1))
world_data$PopTotal = as.numeric(substr(world_data$PopTotal,1,1))
world_data$PopDensity = as.numeric(substr(world_data$PopDensity,1,1))
I tried to make the histogram:
library(ggplot2)
library(scales)
library(cowplot2)
g1 = ggplot(world_data, aes(x=PopMale)) + geom_histogram() + + ggtitle("Male Population")
g2 = ggplot(world_data, aes(x=PopFemale)) + geom_histogram() + ggtitle("Female Population")
g3 = ggplot(world_data, aes(x=PopTotal)) + geom_histogram() + ggtitle("Total Population")
g4 = ggplot(world_data, aes(x=PopDensity)) + geom_histogram()+ ggtitle("Population Density")
plot_row = plot_grid(g1, g2, g3, g4)
# now add the title
title <- ggdraw() +
draw_label(
"World Population: Does Benford's Law Exist?",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(
title, plot_row,
ncol = 1,
# rel_heights values control vertical title margins
rel_heights = c(0.1, 1)
)
Question: I am trying to format the x-axis of the histograms so that they display integers (eg 0,1,2,3,4,5,6,7,8,9). I tried to use the following code to do this:
integer_breaks <- function(n = 5, ...) {
fxn <- function(x) {
breaks <- floor(pretty(x, n, ...))
names(breaks) <- attr(breaks, "labels")
breaks
}
return(fxn)
}
library(ggplot2)
g1 = ggplot(world_data, aes(x=PopMale)) + geom_histogram() + scale_y_continuous(breaks = integer_breaks()) + ggtitle("Male Population")
g2 = ggplot(world_data, aes(x=PopFemale)) + geom_histogram() + scale_y_continuous(breaks = integer_breaks() + ggtitle("Female Population")
g3 = ggplot(world_data, aes(x=PopTotal)) + geom_histogram() + scale_y_continuous(breaks = integer_breaks() + ggtitle("Total Population")
g4 = ggplot(world_data, aes(x=PopDensity)) + geom_histogram()+ scale_y_continuous(breaks = integer_breaks() + ggtitle("Population Density")
plot_row = plot_grid(g1, g2, g3, g4)
# now add the title
title <- ggdraw() +
draw_label(
"World Population: Does Benford's Law Exist?",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(
title, plot_row,
ncol = 1,
# rel_heights values control vertical title margins
rel_heights = c(0.1, 1)
)
Problem: But this is still displaying the x-axis as before.
Can someone please show me how to fix this problem?
Thanks!
References:
For a situation like this, you should make bar charts rather than histograms:
library(tidyverse)
df <- read_csv("https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv")
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> LocID = col_double(),
#> Location = col_character(),
#> VarID = col_double(),
#> Variant = col_character(),
#> Time = col_double(),
#> MidPeriod = col_double(),
#> PopMale = col_double(),
#> PopFemale = col_double(),
#> PopTotal = col_double(),
#> PopDensity = col_double()
#> )
dfplot <- df %>%
select(starts_with("Pop")) %>%
pivot_longer(cols = everything(), names_to = "variable", values_to = "value") %>%
mutate(firstdigit = substr(as.character(value), 1, 1))
ggplot(dfplot, aes(firstdigit)) +
geom_bar() +
facet_wrap(~variable)
Created on 2021-10-28 by the reprex package (v2.0.1)
I was able to find the answer:
library(ggplot2)
g1 = ggplot(world_data, aes(x=PopMale)) + geom_histogram() + scale_x_continuous(breaks = seq(0, 10,1)) + ggtitle("Male Population")
g2 = ggplot(world_data, aes(x=PopFemale)) + geom_histogram() + scale_x_continuous(breaks = seq(0, 10,1)) + ggtitle("Female Population")
g3 = ggplot(world_data, aes(x=PopTotal)) + geom_histogram() + scale_x_continuous(breaks = seq(0, 10,1)) + ggtitle("Total Population")
g4 = ggplot(world_data, aes(x=PopDensity)) + geom_histogram()+ scale_x_continuous(breaks = seq(0, 10,1)) + ggtitle("Population Density")
plot_row = plot_grid(g1, g2, g3, g4)
# now add the title
title <- ggdraw() +
draw_label(
"World Population: Does Benford's Law Exist?",
fontface = 'bold',
x = 0,
hjust = 0
) +
theme(
# add margin on the left of the drawing canvas,
# so title is aligned with left edge of first plot
plot.margin = margin(0, 0, 0, 7)
)
plot_grid(
title, plot_row,
ncol = 1,
# rel_heights values control vertical title margins
rel_heights = c(0.1, 1)
)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.