i hope someone could help me on this as i have no idea how to solve this. i am a student and i was given a tidytext assignment. i tried to code but i have encountered the following error code "Error in is_corpus_df(corpus): all(names(corpus)[1L:2L] == c("doc_id", "text")) is not TRUE" on Page 91 session as shown below.
i hope someone kind enough to show me how to get this fixed. Please find my code below
# Page 83 The sentiments dataset
library(tidytext)
sentiments
#pagfe 84 affin, bing, nrc sentiment lexicons
library(tidytext)
get_sentiments("afinn")
get_sentiments("bing")
get_sentiments("nrc")
#Page 85 Sentiment analysis with inner join
library(tidytext)
library(janeaustenr)
library(dplyr)
library(stringr)
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter
[\\divxlc]",
ignore_case =
TRUE)))) %>%
ungroup() %>%
unnest_tokens(word, text)#Break into individual words per rows
# Page 86 sentiment analysis using NRC Lexicon and filter by word joy
nrc_joy <- get_sentiments("nrc") %>%
filter(sentiment == "joy")
# Filter the dataframe with word on Emma then use inner join to nrc_joy
# to extract the sentiment analysis. then use the count from dplyr
tidy_books %>%
filter(book == "Emma") %>%
inner_join(nrc_joy) %>%
count(word, sort = TRUE)
# Page 87 use spread() to have negative and positive sentiment in separate
# columns, and lastly calculate a net sentiment (positive - negative).
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(book, index = linenumber %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
#plot sentiment scores the plot trajectory of each novel
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
geom_col(show.legend = FALSE) +
facet_wrap(~book, ncol = 2, scales = "free_x")
# Page 87 comparing the three sentiment dictionaries
pride_prejudice <- tidy_books %>%
filter(book == "Pride & Prejudice")
pride_prejudice
# Page 88 use integer division (%/%) and the same pattern with count(), spread(),
# and mutate() to find the net sentiment in each of these sections of text
afinn <- pride_prejudice %>%
inner_join(get_sentiments("afinn")) %>%
group_by(index = linenumber %/% 80) %>%
summarise(sentiment = sum(value)) %>%
mutate(method = "AFINN")
bing_and_nrc <- bind_rows(
pride_prejudice %>%
inner_join(get_sentiments("bing")) %>%
mutate(method = "Bing et al."),
pride_prejudice %>%
inner_join(get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative"))
) %>%
mutate(method = "NRC")) %>%
count(method, index = linenumber %/% 80, sentiment) %>%
spread(sentiment, n, fill = 0) %>%
mutate(sentiment = positive - negative)
# Page 88 Part 2 Bind them together and visualise
bind_rows(afinn,
bing_and_nrc) %>%
ggplot(aes(index, sentiment, fill = method)) +
geom_col(show.legend = FALSE) +
facet_wrap(~method, ncol = 1, scales = "free_y")
# Page 89 Look at how many positive and negative words in these lexicon
get_sentiments("nrc") %>%
filter(sentiment %in% c("positive",
"negative")) %>%
count(sentiment)
get_sentiments("bing") %>%
count(sentiment)
# Page 89 Part 2 Most common positive and negative words
bing_word_counts <- tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
bing_word_counts
# Page 90 shown visually with pipe straight into ggplot2
bing_word_counts %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup() %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(n, word, fill = sentiment)) +
geom_col(show.legend = FALSE) +
facet_wrap(~sentiment, scales = "free_y") +
labs(x = "Contribution to sentiment",
y = NULL)
#removed coord_flip() as it squeeze the chart by inversing the coordinate
#page 90 part 2 add a miss to a custom stop-words list using bind_rows()
custom_stop_words <- bind_rows(tibble(word = c("miss"),
lexicon = c("custom")),
stop_words)
custom_stop_words
# Page 91 wordcloud package using R graphrics
library(wordcloud)
tidy_books %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100))
# Page 91 part 2 using Reshape2
library(reshape2)
tidy_books %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = c("gray20", "gray80"),
max.words = 100)
# Page 91 looking at units beyond words
PandP_sentences <- tibble(text = prideprejudice) %>%
unnest_tokens(sentence, text, token = "sentences")
Thank you very much for your help
tidy_books <- austen_books() %>%
group_by(book) %>%
mutate(line_number = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter[\\divxlc]",
ignore_case = TRUE))))%>%
ungroup()
pridepredjudice <- tidy_books %>%
filter(book == "Pride & Prejudice") %>%
tibble()
PandP_sentences <- pridepredjudice %>%
unnest_tokens(sentence, text, token = "sentences")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.