# Tidy Text Mining # Chapter 2 library(tidyverse) library(tidytext) # sentiment lexicons sentiments get_sentiments("afinn") get_sentiments("bing") get_sentiments("nrc") # sentiment analysis for Jane Austen books. library(janeaustenr) library(stringr) tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text) nrc_joy <- get_sentiments("nrc") %>% filter(sentiment == "joy") tidy_books %>% filter(book == "Emma") %>% inner_join(nrc_joy) %>% count(word, sort = TRUE) library(tidyr) jane_austen_sentiment <- tidy_books %>% inner_join(get_sentiments("bing")) %>% count(book, index = linenumber %/% 80, sentiment) %>% spread(sentiment, n, fill = 0) %>% mutate(sentiment = positive - negative) # Plotting the sentiment in the order of each book. ggplot(jane_austen_sentiment, aes(index, sentiment, fill = book)) + geom_col(show.legend = FALSE) + facet_wrap(~book, ncol = 2, scales = "free_x")