This is from Section 15.3 of the Modern Data Science with R book.

Using rvest

Take a look at the Wikipedia List of songs recorded by the Beatles.

In the book the second list of Other songs is used. I have used the Main Songs list.

A great reference for regex (commands like gsub) is the r4ds book, see Chapter 14 about strings

library(rvest) 
library(tidyr) 
library(methods) 
library(mdsr)
library(tm)
url <- "http://en.wikipedia.org/wiki/List_of_songs_recorded_by_the_Beatles" 
tables <- url %>%
  read_html() %>%
  html_nodes(css = "table") 
tables
{xml_nodeset (7)}
[1] <table class="nowraplinks navbox-inner" style="border-spacing:0;background:transparent;color: ...
[2] <table id="toc" class="toc" summary="Class" align="center" style="text-align:center;"><tbody> ...
[3] <table class="wikitable" style="font-size:90%;">\n<caption>Key\n</caption>\n<tbody>\n<tr>\n<t ...
[4] <table class="wikitable sortable plainrowheaders" style="text-align:center">\n<caption>Name o ...
[5] <table class="wikitable sortable plainrowheaders" style="text-align:center">\n<caption>Name o ...
[6] <table class="wikitable sortable plainrowheaders" style="text-align:center">\n<caption>Name o ...
[7] <table class="nowraplinks vcard hlist collapsible autocollapse navbox-inner" style="border-sp ...
songs <- html_table(tables[[4]])
glimpse(songs)
Observations: 213
Variables: 6
$ Song                     <chr> "\"Across the Universe\"[b]", "\"Act Naturally\"", "\"All I've...
$ `Core catalogue release` <chr> "Let It Be", "Help!", "With the Beatles", "With the Beatles", ...
$ `Songwriter(s)`          <chr> "LennonMcCartney", "Johnny RussellVoni Morrison", "LennonMcCar...
$ `Lead vocal(s)[a]`       <chr> "Lennon", "Starr", "Lennon", "McCartney", "McCartney(with Lenn...
$ Year                     <int> 1969, 1965, 1963, 1963, 1969, 1967, 1964, 1966, 1963, 1965, 19...
$ `Ref(s)`                 <chr> "[6]", "[7]", "[8]", "[8]", "[9]", "[10][11]", "[12]", "[13]",...
songs
other <- html_table(tables[[5]])
glimpse(other)
Observations: 43
Variables: 8
$ Song            <chr> "\"12-Bar Original\"", "\"Ain't She Sweet\"", "\"Ain't She Sweet\"", "\...
$ `Release(s)`    <chr> "Anthology 2", "Anthology 1", "Anthology 3", "Anthology 3", "The Beatle...
$ `Songwriter(s)` <chr> "LennonMcCartneyHarrisonStarkey", "Jack YellenMilton Ager", "Jack Yelle...
$ `Lead vocal(s)` <chr> "Instrumental", "Lennon", "McCartney", "Harrison", "Lennon", "McCartney...
$ Yearrecorded    <int> 1965, 1961, 1969, 1969, 1963, 1962, 1968, 1968, 1960, 1968, 1967, 1968,...
$ Yearreleased    <int> 1996, 1995, 1996, 1996, 2013, 1995, 2018, 2018, 1995, 2018, 1995, 2018,...
$ Notes           <chr> "A twelve-bar blues instrumental, recorded on 4 November 1965.[45] An e...
$ Ref.            <chr> "[47]", "[38]", "[48]", "[48]", "[28]", "[38]", "[51]", "[51]", "[38]",...
other
songs <- songs %>% mutate(Song = gsub('\\"', "", Song), Year = as.numeric(Year)) %>% 
  rename(songwriters = `Songwriter(s)`)
songs
other <- other %>% mutate(Song = gsub('\\"', "", Song), Yearrecorded = as.numeric(Yearrecorded)) %>% 
  rename(songwriters = `Songwriter(s)`)
other
tally(~songwriters, data = songs) %>% 
  sort(decreasing = TRUE) %>% 
  head()
songwriters
               LennonMcCartney                       Harrison                 Larry Williams 
                           159                             22                              3 
                  Carl Perkins                    Chuck Berry LennonMcCartneyHarrisonStarkey 
                             2                              2                              2 
length(grep("McCartney", songs$songwriters))
[1] 165
length(grep("Lennon", songs$songwriters))
[1] 165
length(grep("(McCartney|Lennon)", songs$songwriters))
[1] 165
length(grep("(McCartney|Lennon).*(McCartney|Lennon)", songs$songwriters))
[1] 165
songs %>% filter(grepl("(McCartney|Lennon).*(McCartney|Lennon)", songwriters)) %>% 
  select(Song) %>% 
  head()
song_titles <- VCorpus(VectorSource(songs$Song)) %>% 
  tm_map(removeWords, stopwords("english")) %>% 
  DocumentTermMatrix(control = list(weighting = weightTfIdf))
findFreqTerms(song_titles, 14)
[1] "love"       "revolution" "you"       

Using httr

The following code is from Exercise 15.10. The site stackexchange.com displays questions and answers on technical topics.

library(httr) 

Attaching package: ‘httr’

The following object is masked from ‘package:NLP’:

    content
# Find the most recent R questions on stackoverflow 
getresult <- GET("http://api.stackexchange.com",
                 path = "questions",
                 query = list(site = "stackoverflow.com", tagged = "dplyr")) 
stop_for_status(getresult) # Ensure returned without error 
questions <- content(getresult) # Grab content 
names(questions$items[[1]]) # What does the returned data look like?
 [1] "tags"               "owner"              "is_answered"        "view_count"        
 [5] "answer_count"       "score"              "last_activity_date" "creation_date"     
 [9] "question_id"        "link"               "title"             
length(questions$item)
[1] 30
substr(questions$items[[1]]$title, 1, 68)
[1] "R Dplyr solution for summarize_at correlation"
substr(questions$items[[2]]$title, 1, 68)
[1] "Trouble using dplyr::order to rank values from smallest to largest i"
substr(questions$items[[3]]$title, 1, 68)
[1] "Issue with summarizing a field"

The question asked in this Exercise: How many questions were returned? Without using jargon, describe in words what is being displayed and how it might be used.

The next Exercise 15.11 ask for the same, dplyr. Try something else like ggplot2.

library(httr) 
# Find the most recent R questions on stackoverflow 
getresult <- GET("http://api.stackexchange.com",
                 path = "questions",
                 query = list(site = "stackoverflow.com", tagged = "ggplot2")) 
stop_for_status(getresult) # Ensure returned without error 
questions <- content(getresult) # Grab content 
names(questions$items[[1]]) # What does the returned data look like?
 [1] "tags"               "migrated_from"      "owner"              "is_answered"       
 [5] "view_count"         "accepted_answer_id" "answer_count"       "score"             
 [9] "last_activity_date" "creation_date"      "question_id"        "link"              
[13] "title"             
substr(questions$items[[1]]$title, 1, 68)
[1] "ggsave gives blank jpeg image"
substr(questions$items[[2]]$title, 1, 68)
[1] "ggplot facet_wrap reading all input data as NaN (w/in looped filter)"
substr(questions$items[[3]]$title, 1, 68)
[1] "How to add a vertical line to ggplot when the x-axis goes from 1999 "
substr(questions$items[[4]]$title, 1, 68)
[1] "ggplot geom_boxplot rendering extremely slowly"
substr(questions$items[[5]]$title, 1, 68)
[1] "In R, how do I create multiple time series graphs for multiple sites"
LS0tCnRpdGxlOiAiSW5nZXN0aW5nIHRleHQiCm91dHB1dDoKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKICBodG1sX25vdGVib29rOiBkZWZhdWx0Ci0tLQoKVGhpcyBpcyBmcm9tIFNlY3Rpb24gMTUuMyBvZiB0aGUgTW9kZXJuIERhdGEgU2NpZW5jZSB3aXRoIFIgYm9vay4KCiMgVXNpbmcgKnJ2ZXN0KgoKVGFrZSBhIGxvb2sgYXQgdGhlIFdpa2lwZWRpYSBbTGlzdCBvZiBzb25ncyByZWNvcmRlZCBieSB0aGUgQmVhdGxlc10oaHR0cDovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9MaXN0X29mX3NvbmdzX3JlY29yZGVkX2J5X3RoZV9CZWF0bGVzKS4KCkluIHRoZSBib29rIHRoZSBzZWNvbmQgbGlzdCBvZiBPdGhlciBzb25ncyBpcyB1c2VkLiAgSSBoYXZlIHVzZWQgdGhlIE1haW4gU29uZ3MgbGlzdC4KCkEgZ3JlYXQgcmVmZXJlbmNlIGZvciByZWdleCAoY29tbWFuZHMgbGlrZSBnc3ViKSBpcyB0aGUgW3I0ZHNdKGh0dHBzOi8vcjRkcy5oYWQuY28ubnopIGJvb2ssIHNlZSBDaGFwdGVyIDE0IGFib3V0IHN0cmluZ3MKCmBgYHtyfQpsaWJyYXJ5KHJ2ZXN0KSAKbGlicmFyeSh0aWR5cikgCmxpYnJhcnkobWV0aG9kcykgCmxpYnJhcnkobWRzcikKbGlicmFyeSh0bSkKCnVybCA8LSAiaHR0cDovL2VuLndpa2lwZWRpYS5vcmcvd2lraS9MaXN0X29mX3NvbmdzX3JlY29yZGVkX2J5X3RoZV9CZWF0bGVzIiAKdGFibGVzIDwtIHVybCAlPiUKICByZWFkX2h0bWwoKSAlPiUKICBodG1sX25vZGVzKGNzcyA9ICJ0YWJsZSIpIAp0YWJsZXMKc29uZ3MgPC0gaHRtbF90YWJsZSh0YWJsZXNbWzRdXSkKZ2xpbXBzZShzb25ncykKc29uZ3MKCm90aGVyIDwtIGh0bWxfdGFibGUodGFibGVzW1s1XV0pCmdsaW1wc2Uob3RoZXIpCm90aGVyCmBgYAoKYGBge3J9CnNvbmdzIDwtIHNvbmdzICU+JSBtdXRhdGUoU29uZyA9IGdzdWIoJ1xcIicsICIiLCBTb25nKSwgWWVhciA9IGFzLm51bWVyaWMoWWVhcikpICU+JSAKICByZW5hbWUoc29uZ3dyaXRlcnMgPSBgU29uZ3dyaXRlcihzKWApCnNvbmdzCgpvdGhlciA8LSBvdGhlciAlPiUgbXV0YXRlKFNvbmcgPSBnc3ViKCdcXCInLCAiIiwgU29uZyksIFllYXJyZWNvcmRlZCA9IGFzLm51bWVyaWMoWWVhcnJlY29yZGVkKSkgJT4lIAogIHJlbmFtZShzb25nd3JpdGVycyA9IGBTb25nd3JpdGVyKHMpYCkKb3RoZXIKYGBgCgoKCgoKCmBgYHtyfQp0YWxseSh+c29uZ3dyaXRlcnMsIGRhdGEgPSBzb25ncykgJT4lIAogIHNvcnQoZGVjcmVhc2luZyA9IFRSVUUpICU+JSAKICBoZWFkKCkKCmBgYAoKYGBge3J9Cmxlbmd0aChncmVwKCJNY0NhcnRuZXkiLCBzb25ncyRzb25nd3JpdGVycykpCmxlbmd0aChncmVwKCJMZW5ub24iLCBzb25ncyRzb25nd3JpdGVycykpCmxlbmd0aChncmVwKCIoTWNDYXJ0bmV5fExlbm5vbikiLCBzb25ncyRzb25nd3JpdGVycykpCmxlbmd0aChncmVwKCIoTWNDYXJ0bmV5fExlbm5vbikuKihNY0NhcnRuZXl8TGVubm9uKSIsIHNvbmdzJHNvbmd3cml0ZXJzKSkKYGBgCgpgYGB7cn0Kc29uZ3MgJT4lIGZpbHRlcihncmVwbCgiKE1jQ2FydG5leXxMZW5ub24pLiooTWNDYXJ0bmV5fExlbm5vbikiLCBzb25nd3JpdGVycykpICU+JSAKICBzZWxlY3QoU29uZykgJT4lIAogIGhlYWQoKQpgYGAKCmBgYHtyfQpzb25nX3RpdGxlcyA8LSBWQ29ycHVzKFZlY3RvclNvdXJjZShzb25ncyRTb25nKSkgJT4lIAogIHRtX21hcChyZW1vdmVXb3Jkcywgc3RvcHdvcmRzKCJlbmdsaXNoIikpICU+JSAKICBEb2N1bWVudFRlcm1NYXRyaXgoY29udHJvbCA9IGxpc3Qod2VpZ2h0aW5nID0gd2VpZ2h0VGZJZGYpKQpmaW5kRnJlcVRlcm1zKHNvbmdfdGl0bGVzLCAxNCkKYGBgCgojIFVzaW5nICpodHRyKgoKVGhlIGZvbGxvd2luZyBjb2RlIGlzIGZyb20gRXhlcmNpc2UgMTUuMTAuICBUaGUgc2l0ZSBbc3RhY2tleGNoYW5nZS5jb21dKHN0YWNrZXhjaGFuZ2UuY29tKSBkaXNwbGF5cyBxdWVzdGlvbnMgYW5kIGFuc3dlcnMgb24gdGVjaG5pY2FsIHRvcGljcy4KCmBgYHtyfQpsaWJyYXJ5KGh0dHIpIAojIEZpbmQgdGhlIG1vc3QgcmVjZW50IFIgcXVlc3Rpb25zIG9uIHN0YWNrb3ZlcmZsb3cgCmdldHJlc3VsdCA8LSBHRVQoImh0dHA6Ly9hcGkuc3RhY2tleGNoYW5nZS5jb20iLAogICAgICAgICAgICAgICAgIHBhdGggPSAicXVlc3Rpb25zIiwKICAgICAgICAgICAgICAgICBxdWVyeSA9IGxpc3Qoc2l0ZSA9ICJzdGFja292ZXJmbG93LmNvbSIsIHRhZ2dlZCA9ICJkcGx5ciIpKSAKc3RvcF9mb3Jfc3RhdHVzKGdldHJlc3VsdCkgIyBFbnN1cmUgcmV0dXJuZWQgd2l0aG91dCBlcnJvciAKcXVlc3Rpb25zIDwtIGNvbnRlbnQoZ2V0cmVzdWx0KSAjIEdyYWIgY29udGVudCAKbmFtZXMocXVlc3Rpb25zJGl0ZW1zW1sxXV0pCSMgV2hhdCBkb2VzIHRoZSByZXR1cm5lZCBkYXRhIGxvb2sgbGlrZT8KYGBgCgpgYGB7cn0KbGVuZ3RoKHF1ZXN0aW9ucyRpdGVtKQpgYGAKCmBgYHtyfQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1sxXV0kdGl0bGUsIDEsIDY4KQpgYGAKCgpgYGB7cn0Kc3Vic3RyKHF1ZXN0aW9ucyRpdGVtc1tbMl1dJHRpdGxlLCAxLCA2OCkKYGBgCgpgYGB7cn0Kc3Vic3RyKHF1ZXN0aW9ucyRpdGVtc1tbM11dJHRpdGxlLCAxLCA2OCkKYGBgCgoKVGhlIHF1ZXN0aW9uIGFza2VkIGluIHRoaXMgRXhlcmNpc2U6ICBIb3cgbWFueSBxdWVzdGlvbnMgd2VyZSByZXR1cm5lZD8gV2l0aG91dCB1c2luZyBqYXJnb24sIGRlc2NyaWJlIGluIHdvcmRzIHdoYXQgaXMgYmVpbmcgZGlzcGxheWVkIGFuZCBob3cgaXQgbWlnaHQgYmUgdXNlZC4KClRoZSBuZXh0IEV4ZXJjaXNlIDE1LjExIGFzayBmb3IgdGhlIHNhbWUsIGRwbHlyLiAgVHJ5IHNvbWV0aGluZyBlbHNlIGxpa2UgZ2dwbG90Mi4KCmBgYHtyfQpsaWJyYXJ5KGh0dHIpIAojIEZpbmQgdGhlIG1vc3QgcmVjZW50IFIgcXVlc3Rpb25zIG9uIHN0YWNrb3ZlcmZsb3cgCmdldHJlc3VsdCA8LSBHRVQoImh0dHA6Ly9hcGkuc3RhY2tleGNoYW5nZS5jb20iLAogICAgICAgICAgICAgICAgIHBhdGggPSAicXVlc3Rpb25zIiwKICAgICAgICAgICAgICAgICBxdWVyeSA9IGxpc3Qoc2l0ZSA9ICJzdGFja292ZXJmbG93LmNvbSIsIHRhZ2dlZCA9ICJnZ3Bsb3QyIikpIApzdG9wX2Zvcl9zdGF0dXMoZ2V0cmVzdWx0KSAjIEVuc3VyZSByZXR1cm5lZCB3aXRob3V0IGVycm9yIApxdWVzdGlvbnMgPC0gY29udGVudChnZXRyZXN1bHQpICMgR3JhYiBjb250ZW50IApuYW1lcyhxdWVzdGlvbnMkaXRlbXNbWzFdXSkJIyBXaGF0IGRvZXMgdGhlIHJldHVybmVkIGRhdGEgbG9vayBsaWtlPwpgYGAKCmBgYHtyfQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1sxXV0kdGl0bGUsIDEsIDY4KQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1syXV0kdGl0bGUsIDEsIDY4KQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1szXV0kdGl0bGUsIDEsIDY4KQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1s0XV0kdGl0bGUsIDEsIDY4KQpzdWJzdHIocXVlc3Rpb25zJGl0ZW1zW1s1XV0kdGl0bGUsIDEsIDY4KQpgYGAKCg==