--- title: "Stat. 651 ggplot2" author: "Prof. Eric A. Suess" output: word_document: default html_notebook: default pdf_document: default --- ggplot2 examples ```{r message=FALSE} library(tidyverse) library(mdsr) ``` # CIACounties Make the base plot **g** and then add different layers on to it. ```{r} head(CIACountries) # base plot g g <- CIACountries %>% ggplot(aes(y = gdp, x = educ)) g + geom_point() g + geom_point(size = 3) g + geom_point(aes(color = net_users), size = 3) # no geom_point used for the next picture g + geom_text( aes(label = country, color = net_users), size = 3 ) g + geom_point( aes(color = net_users, size = roadways) ) ``` Change the scales ```{r} g + geom_point(aes(color = net_users, size = roadways)) + coord_trans( y = "log10") g + geom_point(aes(color = net_users, size = roadways)) + scale_y_continuous(name = "Gross Domestic Product", trans = "log10") ``` # Faceting ```{r} g + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top") g + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + scale_y_continuous(name = "Gross Domestic Product", trans = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top") ``` # Export the data and try in Tableau ```{r} getwd() write_csv(CIACountries, "CIACountries.csv") ``` # MedicareCharges Check out the [MEPS](https://meps.ahrq.gov/mepsweb/) website for more real data. ```{r} # head(MedicareCharges) # This now causes an error, remove the grouping. ? MedicareCharges MedicareCharges <- ungroup(MedicareCharges) head(MedicareCharges) NJCharges <- MedicareCharges %>% filter(stateProvider == "NJ") NJCharges ``` ```{r} p <- NJCharges %>% ggplot(aes(y = mean_charge, x = reorder(drg, mean_charge))) + geom_bar(fill = "grey", stat = "identity") p p <- p + ylab("Statewide Average Charges ($)") + xlab("Medical Procedure (DRG)") p p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1)) p ``` Now add the overall data to the plot to compare with NJ. ```{r} p <- p + geom_point(data = MedicareCharges, size = 1, alpha = 0.3) p ``` # SAT Here is the link to the [College Board SAT](https://research.collegeboard.org/programs/sat/data) website. ```{r} g <- SAT_2010 %>% ggplot(aes(x = math)) g + geom_histogram() g + geom_histogram(binwidth = 10) g + geom_density() ``` ```{r} ggplot( data = head(SAT_2010, 10), aes( y = math, x = reorder(state, math) ) ) + geom_bar(stat = "identity") ``` Scatterplot with tend lines ```{r} g <- SAT_2010 %>% ggplot(aes(x = expenditure, y = math)) + geom_point() g g <- g + geom_smooth(method="lm", se = 0) + xlab("Average expenditure per student ($100)") + ylab("Average score on math SAT") g ``` Add the trend line within groups representing rate of taking the test. ```{r} SAT_2010 <- SAT_2010 %>% mutate(SAT_rate = cut(sat_pct, breaks = c(0, 30, 60, 100), labels = c("low", "medium", "high") )) g <- g %+% SAT_2010 g g + aes(color = SAT_rate) g +facet_wrap( ~ SAT_rate) ``` # HELPPrct Here is the link to the [NSDUH](https://www.samhsa.gov/data/data-we-collect/nsduh-national-survey-drug-use-and-health) website. ```{r} HELPrct %>% ggplot(aes(x = homeless)) + geom_bar(aes(fill = substance), position = "fill") + coord_flip() ``` # NHANES Here is the link to the [NHANES](https://www.cdc.gov/nchs/nhanes/index.htm) website. ```{r} library(NHANES) head(NHANES) ``` Take a sample first and then make the plot. ```{r} sample_n(NHANES, size = 1000) %>% ggplot(aes(x = Age, y = Height, color = Gender)) + geom_point() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)") ``` Here is an alternative plot using all the data. This is hexbin plot. ```{r} NHANES %>% ggplot(aes(x = Age, y = Height, color = Gender)) + geom_hex() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)") ``` ```{r} library(mosaic) head(NHANES) NHANES2 <- NHANES %>% select(AgeDecade, BMI_WHO) head(NHANES2) NHANES2_table <- table(NHANES2) NHANES2_table mosaicplot(NHANES2_table, color = TRUE) ``` # Weather ```{r} library(macleish) head(whately_2015) whately_2015 %>% ggplot(aes(x = when, y=temperature)) + geom_line(color = "darkgrey") + geom_smooth() + xlab(NULL) + ylab("Tempurature (degrees Fahrenheit)") ``` Here is the link to the [choroplethr](https://arilamstein.com/open-source/) website. ```{r} library(choroplethr) library(choroplethrMaps) library(rUnemploymentData) animated_state_unemployment_choropleth() # animated_county_unemployment_choropleth() ``` # Networks Check out [ggnet2](https://briatte.github.io/ggnet/) or the newer [ggnetwork](https://briatte.github.io/ggnetwork/) Example 4. ```{r} library(GGally) library(network) library(sna) library(ggplot2) # root URL r = "https://raw.githubusercontent.com/briatte/ggnet/master/" # read nodes v = read.csv(paste0(r, "inst/extdata/nodes.tsv"), sep = "\t") names(v) # read edges e = read.csv(paste0(r, "inst/extdata/network.tsv"), sep = "\t") names(e) # network object net = network(e, directed = TRUE) # party affiliation x = data.frame(Twitter = network.vertex.names(net)) x = merge(x, v, by = "Twitter", sort = FALSE)$Groupe net %v% "party" = as.character(x) # color palette y = RColorBrewer::brewer.pal(9, "Set1")[ c(3, 1, 9, 6, 8, 5, 2) ] names(y) = levels(x) # network plot ggnet2(net, color = "party", palette = y, alpha = 0.75, size = 4, edge.alpha = 0.5) ``` Review Table 3.3 on page 47 for the different kinds of plots that can be made for different kinds of x, y variables. Continue with the Extended example: Historical baby names on page 48.