library(wordcloud) library(tm) # First simple example # from help(wordcloud) wordcloud(c(letters, LETTERS, 0:9), seq(1, 1000, len = 62)) # So to make a workcloud in R you need a list of word and the list of # coresponding frequencies. # Second Example. # from help(wordcloud) wordcloud( "Many years ago the great British explorer George Mallory, who was to die on Mount Everest, was asked why did he want to climb it. He said, \"Because it is there.\" Well, space is there, and we're going to climb it, and the moon and the planets are there, and new hopes for knowledge and peace are there. And, therefore, as we set sail we ask God's blessing on the most hazardous and dangerous and greatest adventure on which man has ever embarked.", ,random.order=FALSE) # Third Example # Download Moby Dick from https://www.gutenberg.org/ # Save it to a directory, for me E:\tm\ moby <- Corpus(DirSource("E:\\tm\\", encoding = "UTF-8")) inspect(moby) moby <- tm_map(moby, stripWhitespace) moby <- tm_map(moby, removeNumbers) moby <- tm_map(moby, removePunctuation) inspect(moby) moby <- tm_map(moby, removeWords, stopwords('english')) moby <- tm_map(moby, removeWords, c("and","the","our","that", "for","are","also","more", "has","must","have","should", "this","with")) inspect(moby) moby <- tm_map(moby, tolower) moby <- tm_map(moby, PlainTextDocument) inspect(moby) tdm <- TermDocumentMatrix(moby) m <- as.matrix(tdm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) wordcloud(d$word,d$freq,min.freq=2,max.words=100) # Fourth Example data(crude) inspect(crude) crude <- tm_map(crude, removePunctuation) inspect(crude) moby <- tm_map(moby, removeWords, stopwords('english')) crude <- tm_map(crude, removeWords, stopwords('english')) inspect(crude) tdm <- TermDocumentMatrix(crude) m <- as.matrix(tdm) v <- sort(rowSums(m),decreasing=TRUE) d <- data.frame(word = names(v),freq=v) wordcloud(d$word,d$freq) # Fifth Example # http://www.r-bloggers.com/word-cloud-in-r/ library(XML) library(tm) library(wordcloud) library(RColorBrewer) u = "http://cran.r-project.org/web/packages/available_packages_by_date.html" t = readHTMLTable(u)[[1]] ap.corpus <- Corpus(DataframeSource(data.frame(as.character(t[,3])))) inspect(ap.corpus) ap.corpus <- tm_map(ap.corpus, removePunctuation) ap.corpus <- tm_map(ap.corpus, tolower) ap.corpus <- tm_map(ap.corpus, PlainTextDocument) ap.corpus <- tm_map(ap.corpus, function(x) removeWords(x, stopwords("english"))) ap.tdm <- TermDocumentMatrix(ap.corpus) ap.m <- as.matrix(ap.tdm) ap.v <- sort(rowSums(ap.m),decreasing=TRUE) ap.d <- data.frame(word = names(ap.v),freq=ap.v) table(ap.d$freq) pal2 <- brewer.pal(8,"Dark2") wordcloud(ap.d$word,ap.d$freq, scale=c(8,.2),min.freq=3, max.words=Inf, random.order=FALSE, rot.per=.15, colors=pal2)