Here are some examples from Chapter 15. The examples are related to the General Social Survey from NORC at the Unversity of Chicago.
library(tidyverse)
library(forcats)
gss_cat
gss_cat %>%
count(race)
Factor variables are used to make bar charts. The geom_bar() counts the observations in each level of the factor.
ggplot(gss_cat, aes(race)) +
geom_bar()
Forcing NAs.
ggplot(gss_cat, aes(race)) +
geom_bar() +
scale_x_discrete(drop = FALSE)
Modifying the order of a factor.
Examine tv watch time by religion.
relig_summary <- gss_cat %>%
group_by(relig) %>%
summarise(
age = mean(age, na.rm = TRUE),
tvhours = mean(tvhours, na.rm = TRUE),
n = n()
)
relig_summary %>% ggplot(aes(tvhours, relig)) + geom_point()
relig_summary %>% ggplot(aes(tvhours, fct_reorder(relig, tvhours))) +
geom_point()
The fct_reorder() functon should be used in a mutate statement.
Same as the last code.
relig_summary %>%
mutate(relig = fct_reorder(relig, tvhours)) %>%
ggplot(aes(tvhours, relig)) +
geom_point()
Now tv watch time by average age.
rincome_summary <- gss_cat %>%
group_by(rincome) %>%
summarise(
age = mean(age, na.rm = TRUE),
tvhours = mean(tvhours, na.rm = TRUE),
n = n()
)
rincome_summary %>% ggplot(aes(age, fct_reorder(rincome, age))) +
geom_point()
Does this make sense? What is wrong with this plot?
rincome_summary %>%ggplot(aes(age, fct_relevel(rincome, "Not applicable"))) +
geom_point()
Using mutate()
gss_cat %>% ggplot(aes(marital)) +
geom_bar()
gss_cat %>% mutate(marital = marital) %>%
ggplot(aes(marital)) +
geom_bar()
gss_cat %>% mutate(marital = marital %>% fct_infreq()) %>%
ggplot(aes(marital)) +
geom_bar()
gss_cat %>% mutate(marital = marital %>% fct_infreq() %>% fct_rev()) %>%
ggplot(aes(marital)) +
geom_bar()
Modifying factor levels.
gss_cat %>% count(partyid)
Re-coding
gss_cat %>%
mutate(partyid = fct_recode(partyid,
"Republican, strong" = "Strong republican",
"Republican, weak" = "Not str republican",
"Independent, near rep" = "Ind,near rep",
"Independent, near dem" = "Ind,near dem",
"Democrat, weak" = "Not str democrat",
"Democrat, strong" = "Strong democrat"
)) %>%
count(partyid)
Other category
gss_cat %>%
mutate(partyid = fct_recode(partyid,
"Republican, strong" = "Strong republican",
"Republican, weak" = "Not str republican",
"Independent, near rep" = "Ind,near rep",
"Independent, near dem" = "Ind,near dem",
"Democrat, weak" = "Not str democrat",
"Democrat, strong" = "Strong democrat",
"Other" = "No answer",
"Other" = "Don't know",
"Other" = "Other party"
)) %>%
count(partyid)
Collapse a factor
gss_cat %>%
mutate(partyid = fct_collapse(partyid,
other = c("No answer", "Don't know", "Other party"),
rep = c("Strong republican", "Not str republican"),
ind = c("Ind,near rep", "Independent", "Ind,near dem"),
dem = c("Not str democrat", "Strong democrat")
)) %>%
count(partyid)
LS0tCnRpdGxlOiAiRmFjdG9ycyIKb3V0cHV0OgogIHdvcmRfZG9jdW1lbnQ6IGRlZmF1bHQKICBodG1sX25vdGVib29rOiBkZWZhdWx0CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0Ci0tLQoKSGVyZSBhcmUgc29tZSBleGFtcGxlcyBmcm9tIENoYXB0ZXIgMTUuICBUaGUgZXhhbXBsZXMgYXJlIHJlbGF0ZWQgdG8gdGhlIFtHZW5lcmFsIFNvY2lhbCBTdXJ2ZXldKGh0dHA6Ly9nc3Mubm9yYy5vcmcvKSBmcm9tIE5PUkMgYXQgdGhlIFVudmVyc2l0eSBvZiBDaGljYWdvLgoKYGBge3IgbWVzc2FnZSA9IEZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShmb3JjYXRzKQoKZ3NzX2NhdAoKZ3NzX2NhdCAlPiUKICBjb3VudChyYWNlKQpgYGAKCkZhY3RvciB2YXJpYWJsZXMgYXJlIHVzZWQgdG8gbWFrZSBiYXIgY2hhcnRzLiAgVGhlICpnZW9tX2JhcigpKiBjb3VudHMgdGhlIG9ic2VydmF0aW9ucyBpbiBlYWNoIGxldmVsIG9mIHRoZSBmYWN0b3IuCgpgYGB7cn0KZ2dwbG90KGdzc19jYXQsIGFlcyhyYWNlKSkgKwogIGdlb21fYmFyKCkKCmBgYAoKRm9yY2luZyBOQXMuCgpgYGB7cn0KZ2dwbG90KGdzc19jYXQsIGFlcyhyYWNlKSkgKwogIGdlb21fYmFyKCkgKwogIHNjYWxlX3hfZGlzY3JldGUoZHJvcCA9IEZBTFNFKQpgYGAKCk1vZGlmeWluZyB0aGUgb3JkZXIgb2YgYSBmYWN0b3IuCgpFeGFtaW5lIHR2IHdhdGNoIHRpbWUgYnkgcmVsaWdpb24uCgpgYGB7cn0KcmVsaWdfc3VtbWFyeSA8LSBnc3NfY2F0ICU+JQogIGdyb3VwX2J5KHJlbGlnKSAlPiUKICBzdW1tYXJpc2UoCiAgICBhZ2UgPSBtZWFuKGFnZSwgbmEucm0gPSBUUlVFKSwKICAgIHR2aG91cnMgPSBtZWFuKHR2aG91cnMsIG5hLnJtID0gVFJVRSksCiAgICBuID0gbigpCiAgKQoKcmVsaWdfc3VtbWFyeSAlPiUgZ2dwbG90KGFlcyh0dmhvdXJzLCByZWxpZykpICsgZ2VvbV9wb2ludCgpCmBgYAoKCmBgYHtyfQpyZWxpZ19zdW1tYXJ5ICU+JSBnZ3Bsb3QoYWVzKHR2aG91cnMsIGZjdF9yZW9yZGVyKHJlbGlnLCB0dmhvdXJzKSkpICsKICBnZW9tX3BvaW50KCkKYGBgCgpUaGUgKmZjdF9yZW9yZGVyKCkqIGZ1bmN0b24gc2hvdWxkIGJlIHVzZWQgaW4gYSBtdXRhdGUgc3RhdGVtZW50LiAgCgpTYW1lIGFzIHRoZSBsYXN0IGNvZGUuCgoKYGBge3J9CnJlbGlnX3N1bW1hcnkgJT4lCiAgbXV0YXRlKHJlbGlnID0gZmN0X3Jlb3JkZXIocmVsaWcsIHR2aG91cnMpKSAlPiUKICBnZ3Bsb3QoYWVzKHR2aG91cnMsIHJlbGlnKSkgKwogICAgZ2VvbV9wb2ludCgpCmBgYAoKTm93IHR2IHdhdGNoIHRpbWUgYnkgYXZlcmFnZSBhZ2UuCgpgYGB7cn0KcmluY29tZV9zdW1tYXJ5IDwtIGdzc19jYXQgJT4lCiAgZ3JvdXBfYnkocmluY29tZSkgJT4lCiAgc3VtbWFyaXNlKAogICAgYWdlID0gbWVhbihhZ2UsIG5hLnJtID0gVFJVRSksCiAgICB0dmhvdXJzID0gbWVhbih0dmhvdXJzLCBuYS5ybSA9IFRSVUUpLAogICAgbiA9IG4oKQogICkKCnJpbmNvbWVfc3VtbWFyeSAlPiUgZ2dwbG90KGFlcyhhZ2UsIGZjdF9yZW9yZGVyKHJpbmNvbWUsIGFnZSkpKSArIAogIGdlb21fcG9pbnQoKQpgYGAKCkRvZXMgdGhpcyBtYWtlIHNlbnNlPyAgV2hhdCBpcyB3cm9uZyB3aXRoIHRoaXMgcGxvdD8KCmBgYHtyfQpyaW5jb21lX3N1bW1hcnkgJT4lZ2dwbG90KGFlcyhhZ2UsIGZjdF9yZWxldmVsKHJpbmNvbWUsICJOb3QgYXBwbGljYWJsZSIpKSkgKyAKICBnZW9tX3BvaW50KCkKYGBgCgpVc2luZyAqbXV0YXRlKCkqCgpgYGB7cn0KZ3NzX2NhdCAlPiUgZ2dwbG90KGFlcyhtYXJpdGFsKSkgKwogICAgZ2VvbV9iYXIoKQoKZ3NzX2NhdCAlPiUgbXV0YXRlKG1hcml0YWwgPSBtYXJpdGFsKSAlPiUgCiAgZ2dwbG90KGFlcyhtYXJpdGFsKSkgKwogICAgZ2VvbV9iYXIoKQoKZ3NzX2NhdCAlPiUgbXV0YXRlKG1hcml0YWwgPSBtYXJpdGFsICU+JSBmY3RfaW5mcmVxKCkpICU+JSAKICBnZ3Bsb3QoYWVzKG1hcml0YWwpKSArCiAgICBnZW9tX2JhcigpCgpnc3NfY2F0ICU+JSBtdXRhdGUobWFyaXRhbCA9IG1hcml0YWwgJT4lIGZjdF9pbmZyZXEoKSAlPiUgZmN0X3JldigpKSAlPiUKICBnZ3Bsb3QoYWVzKG1hcml0YWwpKSArCiAgICBnZW9tX2JhcigpCmBgYAoKTW9kaWZ5aW5nIGZhY3RvciBsZXZlbHMuCgpgYGB7cn0KZ3NzX2NhdCAlPiUgY291bnQocGFydHlpZCkKYGBgCgpSZS1jb2RpbmcKCmBgYHtyfQpnc3NfY2F0ICU+JQogIG11dGF0ZShwYXJ0eWlkID0gZmN0X3JlY29kZShwYXJ0eWlkLAogICAgIlJlcHVibGljYW4sIHN0cm9uZyIgICAgPSAiU3Ryb25nIHJlcHVibGljYW4iLAogICAgIlJlcHVibGljYW4sIHdlYWsiICAgICAgPSAiTm90IHN0ciByZXB1YmxpY2FuIiwKICAgICJJbmRlcGVuZGVudCwgbmVhciByZXAiID0gIkluZCxuZWFyIHJlcCIsCiAgICAiSW5kZXBlbmRlbnQsIG5lYXIgZGVtIiA9ICJJbmQsbmVhciBkZW0iLAogICAgIkRlbW9jcmF0LCB3ZWFrIiAgICAgICAgPSAiTm90IHN0ciBkZW1vY3JhdCIsCiAgICAiRGVtb2NyYXQsIHN0cm9uZyIgICAgICA9ICJTdHJvbmcgZGVtb2NyYXQiCiAgKSkgJT4lCiAgY291bnQocGFydHlpZCkKYGBgCgpPdGhlciBjYXRlZ29yeQoKYGBge3J9Cmdzc19jYXQgJT4lCiAgbXV0YXRlKHBhcnR5aWQgPSBmY3RfcmVjb2RlKHBhcnR5aWQsCiAgICAiUmVwdWJsaWNhbiwgc3Ryb25nIiAgICA9ICJTdHJvbmcgcmVwdWJsaWNhbiIsCiAgICAiUmVwdWJsaWNhbiwgd2VhayIgICAgICA9ICJOb3Qgc3RyIHJlcHVibGljYW4iLAogICAgIkluZGVwZW5kZW50LCBuZWFyIHJlcCIgPSAiSW5kLG5lYXIgcmVwIiwKICAgICJJbmRlcGVuZGVudCwgbmVhciBkZW0iID0gIkluZCxuZWFyIGRlbSIsCiAgICAiRGVtb2NyYXQsIHdlYWsiICAgICAgICA9ICJOb3Qgc3RyIGRlbW9jcmF0IiwKICAgICJEZW1vY3JhdCwgc3Ryb25nIiAgICAgID0gIlN0cm9uZyBkZW1vY3JhdCIsCiAgICAiT3RoZXIiICAgICAgICAgICAgICAgICA9ICJObyBhbnN3ZXIiLAogICAgIk90aGVyIiAgICAgICAgICAgICAgICAgPSAiRG9uJ3Qga25vdyIsCiAgICAiT3RoZXIiICAgICAgICAgICAgICAgICA9ICJPdGhlciBwYXJ0eSIKICApKSAlPiUKICBjb3VudChwYXJ0eWlkKQpgYGAKCkNvbGxhcHNlIGEgZmFjdG9yCgpgYGB7cn0KZ3NzX2NhdCAlPiUKICBtdXRhdGUocGFydHlpZCA9IGZjdF9jb2xsYXBzZShwYXJ0eWlkLAogICAgb3RoZXIgPSBjKCJObyBhbnN3ZXIiLCAiRG9uJ3Qga25vdyIsICJPdGhlciBwYXJ0eSIpLAogICAgcmVwID0gYygiU3Ryb25nIHJlcHVibGljYW4iLCAiTm90IHN0ciByZXB1YmxpY2FuIiksCiAgICBpbmQgPSBjKCJJbmQsbmVhciByZXAiLCAiSW5kZXBlbmRlbnQiLCAiSW5kLG5lYXIgZGVtIiksCiAgICBkZW0gPSBjKCJOb3Qgc3RyIGRlbW9jcmF0IiwgIlN0cm9uZyBkZW1vY3JhdCIpCiAgKSkgJT4lCiAgY291bnQocGFydHlpZCkKYGBgCgo=