ggplot 直方圖在 boolean 上拆分

Question

使用公共數據集（注意拉取需要幾分鍾）：

library(data.table)
dl <- tempfile()
download.file("http://files.grouplens.org/datasets/movielens/ml-10m.zip", dl)

ratings <- fread(text = gsub("::", "\t", readLines(unzip(dl, "ml-10M100K/ratings.dat"))),
                 col.names = c("userId", "movieId", "rating", "timestamp"))

movies <- str_split_fixed(readLines(unzip(dl, "ml-10M100K/movies.dat")), "\\::", 3)
colnames(movies) <- c("movieId", "title", "genres")

if (as.numeric(version$year) < 2020 | (version$year=="2020" & as.numeric(version$month) < 3)){
  # if using R 3.6 or earlier
  movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(levels(movieId))[movieId],
                                             title = as.character(title),
                                             genres = as.character(genres))
} else {
  # if using R 4.0 or later
  movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(movieId),
                                             title = as.character(title),
                                             genres = as.character(genres))}

movielens <- left_join(ratings, movies, by = "movieId")

如：

  > head(movielens)
   userId movieId rating timestamp                         title                        genres
1:      1     122      5 838985046              Boomerang (1992)                Comedy|Romance
2:      1     185      5 838983525               Net, The (1995)         Action|Crime|Thriller
3:      1     231      5 838983392          Dumb & Dumber (1994)                        Comedy
4:      1     292      5 838983421               Outbreak (1995)  Action|Drama|Sci-Fi|Thriller
5:      1     316      5 838983392               Stargate (1994)       Action|Adventure|Sci-Fi
6:      1     329      5 838983392 Star Trek: Generations (1994) Action|Adventure|Drama|Sci-Fi
>

我正在嘗試使用fill拆分 ggplot 直方圖，以顯示以下整體評分和半評分之間的差異：

movielens %>% 
  mutate(whole = rating == round(rating)) %>% 
  ggplot(mapping=aes(x=rating), fill=whole) +
  geom_histogram()

由於半評級不太常見，但由於某種原因fill不起作用......

Answer 1

您需要填寫您的審美（又名映射），而不是在您的ggplot()調用中：

library(data.table)
library(stringr)
library(dplyr)
library(ggplot2)
dl <- tempfile()
download.file("http://files.grouplens.org/datasets/movielens/ml-10m.zip", dl)

ratings <- fread(text = gsub("::", "\t", readLines(unzip(dl, "ml-10M100K/ratings.dat"))),
                 col.names = c("userId", "movieId", "rating", "timestamp"))

movies <- str_split_fixed(readLines(unzip(dl, "ml-10M100K/movies.dat")), "\\::", 3)
colnames(movies) <- c("movieId", "title", "genres")

if (as.numeric(version$year) < 2020 | (version$year=="2020" & as.numeric(version$month) < 3)){
  # if using R 3.6 or earlier
  movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(levels(movieId))[movieId],
                                             title = as.character(title),
                                             genres = as.character(genres))
} else {
  # if using R 4.0 or later
  movies <- as.data.frame(movies) %>% mutate(movieId = as.numeric(movieId),
                                             title = as.character(title),
                                             genres = as.character(genres))}

movielens <- left_join(ratings, movies, by = "movieId")
movielens %>% 
  mutate(whole = rating == round(rating)) %>%
  ggplot(mapping=aes(x=rating, fill=whole)) +
  geom_histogram()

ggplot 直方圖在 boolean 上拆分

問題描述

1 個解決方案

解決方案1
1 已采納 2020-08-17 16:08:18

ggplot 直方圖在 boolean 上拆分

問題描述

1 個解決方案

解決方案1 1 已采納 2020-08-17 16:08:18

解決方案1
1 已采納 2020-08-17 16:08:18