[英]ggplot histogram split on a boolean
使用公共數據集(注意拉取需要幾分鍾):
library(data.table)
dl <- tempfile()
download.file("http://files.grouplens.org/datasets/movielens/ml-10m.zip", dl)
ratings <- fread(text = gsub("::", "\t", readLines(unzip(dl, "ml-10M100K/ratings.dat"))),
col.names = c("userId", "movieId", "rating", "timestamp"))
movies <- str_split_fixed(readLines(unzip(dl, "ml-10M100K/movies.dat")), "\\::", 3)
colnames(movies) <- c("movieId", "title", "genres")
if (as.numeric(version$year) < 2020 | (version$year=="2020" & as.numeric(version$month) < 3)){
# if using R 3.6 or earlier
movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(levels(movieId))[movieId],
title = as.character(title),
genres = as.character(genres))
} else {
# if using R 4.0 or later
movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(movieId),
title = as.character(title),
genres = as.character(genres))}
movielens <- left_join(ratings, movies, by = "movieId")
如:
> head(movielens)
userId movieId rating timestamp title genres
1: 1 122 5 838985046 Boomerang (1992) Comedy|Romance
2: 1 185 5 838983525 Net, The (1995) Action|Crime|Thriller
3: 1 231 5 838983392 Dumb & Dumber (1994) Comedy
4: 1 292 5 838983421 Outbreak (1995) Action|Drama|Sci-Fi|Thriller
5: 1 316 5 838983392 Stargate (1994) Action|Adventure|Sci-Fi
6: 1 329 5 838983392 Star Trek: Generations (1994) Action|Adventure|Drama|Sci-Fi
>
我正在嘗試使用fill
拆分 ggplot 直方圖,以顯示以下整體評分和半評分之間的差異:
movielens %>%
mutate(whole = rating == round(rating)) %>%
ggplot(mapping=aes(x=rating), fill=whole) +
geom_histogram()
由於半評級不太常見,但由於某種原因fill
不起作用......
您需要填寫您的審美(又名映射),而不是在您的ggplot()
調用中:
library(data.table)
library(stringr)
library(dplyr)
library(ggplot2)
dl <- tempfile()
download.file("http://files.grouplens.org/datasets/movielens/ml-10m.zip", dl)
ratings <- fread(text = gsub("::", "\t", readLines(unzip(dl, "ml-10M100K/ratings.dat"))),
col.names = c("userId", "movieId", "rating", "timestamp"))
movies <- str_split_fixed(readLines(unzip(dl, "ml-10M100K/movies.dat")), "\\::", 3)
colnames(movies) <- c("movieId", "title", "genres")
if (as.numeric(version$year) < 2020 | (version$year=="2020" & as.numeric(version$month) < 3)){
# if using R 3.6 or earlier
movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(levels(movieId))[movieId],
title = as.character(title),
genres = as.character(genres))
} else {
# if using R 4.0 or later
movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(movieId),
title = as.character(title),
genres = as.character(genres))}
movielens <- left_join(ratings, movies, by = "movieId")
movielens %>%
mutate(whole = rating == round(rating)) %>%
ggplot(mapping=aes(x=rating, fill=whole)) +
geom_histogram()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.