--- title: "Stat. 651: ggplot2" author: "Prof. Eric A. Suess" format: html: self-contained: true --- # ggplot2 examples ```{r message=FALSE} library(tidyverse) library(mdsr) library(mosaicData) ``` # CIACounties Make the base plot **g** and then add different layers on to it. ```{r} head(CIACountries) # base plot g g <- CIACountries |> ggplot(aes(y = gdp, x = educ)) g + geom_point() g + geom_point(size = 3) g + geom_point(aes(color = net_users), size = 3) # no geom_point used for the next picture g + geom_text( aes(label = country, color = net_users), size = 3 ) g + geom_point( aes(color = net_users, size = roadways) ) ``` Change the scales ```{r} g + geom_point(aes(color = net_users, size = roadways)) + coord_trans( y = "log10") g + geom_point(aes(color = net_users, size = roadways)) + scale_y_continuous( name = "Gross Domestic Product", trans = "log10", labels = scales::comma ) ``` # Faceting ```{r} g + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top") g + geom_point(alpha = 0.9, aes(size = roadways)) + coord_trans(y = "log10") + scale_y_continuous(name = "Gross Domestic Product", trans = "log10") + facet_wrap( ~ net_users, nrow = 1) + theme(legend.position = "top") ``` # Export the data and try in Tableau ```{r} getwd() write_csv(CIACountries, "CIACountries.csv") ``` # MedicareCharges Check out the [MEPS](https://meps.ahrq.gov/mepsweb/) website for more real data. ```{r} # head(MedicareCharges) # This now causes an error, remove the grouping. ? MedicareCharges ChargesNJ <- MedicareCharges |> filter(stateProvider == "NJ") ``` ```{r} p <- ggplot( data = ChargesNJ, aes(x = reorder(drg, mean_charge), y = mean_charge) ) + geom_col(fill = "gray") + ylab("Statewide Average Charges ($)") + xlab("Medical Procedure (DRG)") + theme(axis.text.x = element_text(angle = 90, hjust = 1, size = rel(0.5))) p ``` Now add the overall data to the plot to compare with NJ. ```{r} p <- p + geom_point(data = MedicareCharges, size = 1, alpha = 0.3) p ``` # SAT Here is the link to the [College Board SAT](https://research.collegeboard.org/programs/sat/data) website. ```{r} g <- SAT_2010 |> ggplot(aes(x = math)) g + geom_histogram() g + geom_histogram(binwidth = 10) + labs(x = "Average math SAT score") g + geom_density(adjust = 0.3) ``` ```{r} ggplot( data = head(SAT_2010, 10), aes(x = reorder(state, math), y = math) ) + geom_col() + labs(x = "State", y = "Average math SAT score") ``` # HELPPrct Here is the link to the [NSDUH](https://www.samhsa.gov/data/data-we-collect/nsduh-national-survey-drug-use-and-health) website. ```{r} ggplot(data = mosaicData::HELPrct, aes(x = homeless)) + geom_bar(aes(fill = substance), position = "fill") + scale_fill_brewer(palette = "Spectral") + coord_flip() ``` Scatterplot with tend lines ```{r} g <- ggplot( data = SAT_2010, aes(x = expenditure, y = math) ) + geom_point() g g <- g + geom_smooth(method="lm", se = 0) + xlab("Average expenditure per student ($100)") + ylab("Average score on math SAT") g ``` Add the trend line within groups representing rate of taking the test. ```{r} SAT_2010 <- SAT_2010|> mutate( SAT_rate = cut( sat_pct, breaks = c(0, 30, 60, 100), labels = c("low", "medium", "high") ) ) g <- g %+% SAT_2010 g g + aes(color = SAT_rate) g + facet_wrap( ~ SAT_rate) ``` # NHANES Here is the link to the [NHANES](https://www.cdc.gov/nchs/nhanes/index.htm) website. ```{r} library(NHANES) head(NHANES) ``` Take a sample first and then make the plot. ```{r} library(NHANES) ggplot( data = slice_sample(NHANES, n = 1000), aes(x = Age, y = Height, color = fct_relevel(Gender, "male")) ) + geom_point() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)") + labs(color = "Gender") ``` Here is an alternative plot using all the data. This is hexbin plot. ```{r} NHANES |> ggplot(aes(x = Age, y = Height, color = Gender)) + geom_hex() + geom_smooth() + xlab("Age (years)") + ylab("Height (cm)") ``` ```{r} library(mosaic) head(NHANES) NHANES2 <- NHANES |> select(AgeDecade, BMI_WHO) head(NHANES2) NHANES2_table <- table(NHANES2) NHANES2_table mosaicplot(NHANES2_table, color = TRUE) ``` # Weather ```{r} library(macleish) head(whately_2015) whately_2015 |> ggplot(aes(x = when, y=temperature)) + geom_line(color = "darkgrey") + geom_smooth() + xlab(NULL) + ylab("Tempurature (degrees Fahrenheit)") ``` Here is the link to the [choroplethr](https://arilamstein.com/open-source/) website. ```{r} library(choroplethr) library(choroplethrMaps) data(df_pop_state) state_choropleth(df_pop_state, title = "US 2012 State Population Estimates", legend = "Population") data(df_pop_county) county_choropleth(df_pop_county, title = "US 2012 County Population Estimates", legend = "Population") data(df_pop_country) country_choropleth(df_pop_country, "2012 World Bank Populate Estimates") ``` Review Table 3.3 on page 47 for the different kinds of plots that can be made for different kinds of x, y variables. Continue with the Extended example: Historical baby names on page 48.