# Tidy Text Mining # Chapter 2 library(tidyverse) library(tidytext) library(janeaustenr) library(stringr) library(reshape2) library(sentimentr) mytext <- c( 'do you like it? But I hate really bad dogs', 'I am the best friend.', 'Do you really like it? I\'m not a fan' ) # by sentence mytext <- get_sentences(mytext) sentiment(mytext) # by group sentiment_by(mytext) # 2012 debate out <- with( presidential_debates_2012, sentiment_by( get_sentences(dialogue), list(person, time) ) ) plot(out) # tidy_books tidy_books <- austen_books() %>% group_by(book) %>% mutate(linenumber = row_number(), chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]", ignore_case = TRUE)))) %>% ungroup() %>% unnest_tokens(word, text) library(wordcloud) tidy_books %>% anti_join(stop_words) %>% count(word) %>% with(wordcloud(word, n, max.words = 100)) tidy_books %>% inner_join(get_sentiments("bing")) %>% count(word, sentiment, sort = TRUE) %>% acast(word ~ sentiment, value.var = "n", fill = 0) %>% comparison.cloud(colors = c("gray20", "gray80"), max.words = 100)