简体   繁体   中英

Error in is_corpus_df(corpus) : all(names(corpus)[1L:2L] == c(“doc_id”, “text”)) is not TRUE

i hope someone could help me on this as i have no idea how to solve this. i am a student and i was given a tidytext assignment. i tried to code but i have encountered the following error code "Error in is_corpus_df(corpus): all(names(corpus)[1L:2L] == c("doc_id", "text")) is not TRUE" on Page 91 session as shown below.

i hope someone kind enough to show me how to get this fixed. Please find my code below

# Page 83 The sentiments dataset
library(tidytext)
sentiments


#pagfe 84 affin, bing, nrc sentiment lexicons 
library(tidytext)
get_sentiments("afinn")
get_sentiments("bing")
get_sentiments("nrc")

#Page 85 Sentiment analysis with inner join
library(tidytext)
library(janeaustenr)
library(dplyr)
library(stringr)


tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter
                                                 [\\divxlc]",
                                                 ignore_case =
                                                   TRUE)))) %>%
  ungroup() %>%
  unnest_tokens(word, text)#Break into individual words per rows



# Page 86 sentiment analysis using NRC Lexicon and filter by word joy
nrc_joy <- get_sentiments("nrc") %>% 
  filter(sentiment == "joy")



# Filter the dataframe with word on Emma then use inner join to nrc_joy
# to extract the sentiment analysis. then use the count from dplyr
tidy_books %>%
  filter(book == "Emma") %>%
  inner_join(nrc_joy) %>%
  count(word, sort = TRUE)


# Page 87 use spread() to have negative and positive sentiment in separate 
# columns, and lastly calculate a net sentiment (positive - negative).
library(tidyr)
jane_austen_sentiment <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(book, index = linenumber %/% 80, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

#plot sentiment scores the plot trajectory of each novel
library(ggplot2)
ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~book, ncol = 2, scales = "free_x")


# Page 87 comparing the three sentiment dictionaries

pride_prejudice <- tidy_books %>% 
  filter(book == "Pride & Prejudice")
pride_prejudice

# Page 88 use integer division (%/%) and the same pattern with count(), spread(),
# and mutate() to find the net sentiment in each of these sections of text
afinn <- pride_prejudice %>% 
  inner_join(get_sentiments("afinn")) %>% 
  group_by(index = linenumber %/% 80) %>% 
  summarise(sentiment = sum(value)) %>% 
  mutate(method = "AFINN")

bing_and_nrc <- bind_rows(
  pride_prejudice %>% 
    inner_join(get_sentiments("bing")) %>%
    mutate(method = "Bing et al."),
  pride_prejudice %>% 
    inner_join(get_sentiments("nrc") %>% 
                 filter(sentiment %in% c("positive", 
                                         "negative"))
    ) %>%
    mutate(method = "NRC")) %>%
  count(method, index = linenumber %/% 80, sentiment) %>%
  spread(sentiment, n, fill = 0) %>%
  mutate(sentiment = positive - negative)

# Page 88 Part 2 Bind them together and visualise 
bind_rows(afinn,
          bing_and_nrc) %>%
  ggplot(aes(index, sentiment, fill = method)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~method, ncol = 1, scales = "free_y")


# Page 89 Look at how many positive and negative words in these lexicon
get_sentiments("nrc") %>%
  filter(sentiment %in% c("positive",
                          "negative")) %>%
  count(sentiment)

get_sentiments("bing") %>%
  count(sentiment)


# Page 89 Part 2 Most common positive and negative words 
bing_word_counts <- tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  ungroup()
bing_word_counts


# Page 90 shown visually with pipe straight into ggplot2 
bing_word_counts %>%
  group_by(sentiment) %>%
  top_n(10) %>%
  ungroup() %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(n, word, fill = sentiment)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~sentiment, scales = "free_y") +
  labs(x = "Contribution to sentiment",
       y = NULL)
 #removed coord_flip() as it squeeze the chart by inversing the coordinate

#page 90 part 2 add a miss to a custom stop-words list using bind_rows()
custom_stop_words <- bind_rows(tibble(word = c("miss"),
                                      lexicon = c("custom")),
                               stop_words)
custom_stop_words


# Page 91 wordcloud package using R graphrics 
library(wordcloud)
tidy_books %>%
  anti_join(stop_words) %>%
  count(word) %>%
  with(wordcloud(word, n, max.words = 100))

# Page 91 part 2 using Reshape2
library(reshape2)
tidy_books %>%
  inner_join(get_sentiments("bing")) %>%
  count(word, sentiment, sort = TRUE) %>%
  acast(word ~ sentiment, value.var = "n", fill = 0) %>%
  comparison.cloud(colors = c("gray20", "gray80"),
                   max.words = 100)


# Page 91 looking at units beyond words
PandP_sentences <- tibble(text = prideprejudice) %>%
  unnest_tokens(sentence, text, token = "sentences")

Thank you very much for your help

tidy_books <- austen_books() %>%
  group_by(book) %>%
  mutate(line_number = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter[\\divxlc]",
                                                 ignore_case = TRUE))))%>%
  ungroup() 


pridepredjudice <- tidy_books %>%
  filter(book == "Pride & Prejudice") %>%
  tibble()

PandP_sentences <- pridepredjudice %>%
  unnest_tokens(sentence, text, token = "sentences")

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM