ggplot2 examples
library(tidyverse)
library(mdsr)
Make the base plot g and then add different layers on to it.
head(CIACountries)
# base plot g
g <- CIACountries %>% ggplot(aes(y = gdp, x = educ))
g + geom_point()
g + geom_point(size = 3)
g + geom_point(aes(color = net_users), size = 3)
# no geom_point used for the next picture
g + geom_text( aes(label = country, color = net_users), size = 3 )
g + geom_point( aes(color = net_users, size = roadways) )
Change the scales
g + geom_point(aes(color = net_users, size = roadways)) +
coord_trans( y = "log10")
g + geom_point(aes(color = net_users, size = roadways)) +
scale_y_continuous(name = "Gross Domestic Product", trans = "log10")
g + geom_point(alpha = 0.9, aes(size = roadways)) +
coord_trans(y = "log10") +
facet_wrap( ~ net_users, nrow = 1) +
theme(legend.position = "top")
g + geom_point(alpha = 0.9, aes(size = roadways)) +
coord_trans(y = "log10") +
scale_y_continuous(name = "Gross Domestic Product", trans = "log10") +
facet_wrap( ~ net_users, nrow = 1) +
theme(legend.position = "top")
getwd()
[1] "/home/esuess/classes/2020-2021/01 - Fall 2020/Stat651/Presentations/02_ggplot2"
write_csv(CIACountries, "CIACountries.csv")
Check out the MEPS website for more real data.
# head(MedicareCharges) # This now causes an error, remove the grouping.
? MedicareCharges
MedicareCharges <- ungroup(MedicareCharges)
head(MedicareCharges)
NJCharges <- MedicareCharges %>% filter(stateProvider == "NJ")
NJCharges
p <- NJCharges %>% ggplot(aes(y = mean_charge, x = reorder(drg, mean_charge))) +
geom_bar(fill = "grey", stat = "identity")
p
p <- p + ylab("Statewide Average Charges ($)") +
xlab("Medical Procedure (DRG)")
p
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))
p
Now add the overall data to the plot to compare with NJ.
p <- p + geom_point(data = MedicareCharges, size = 1, alpha = 0.3)
p
Here is the link to the College Board SAT website.
g <- SAT_2010 %>% ggplot(aes(x = math))
g + geom_histogram()
g + geom_histogram(binwidth = 10)
g + geom_density()
ggplot( data = head(SAT_2010, 10), aes( y = math, x = reorder(state, math) ) ) +
geom_bar(stat = "identity")
Scatterplot with tend lines
g <- SAT_2010 %>% ggplot(aes(x = expenditure, y = math)) +
geom_point()
g
g <- g + geom_smooth(method="lm", se = 0) +
xlab("Average expenditure per student ($100)") +
ylab("Average score on math SAT")
g
Add the trend line within groups representing rate of taking the test.
SAT_2010 <- SAT_2010 %>%
mutate(SAT_rate = cut(sat_pct, breaks = c(0, 30, 60, 100), labels = c("low", "medium", "high") ))
g <- g %+% SAT_2010
g
g + aes(color = SAT_rate)
g +facet_wrap( ~ SAT_rate)
Here is the link to the NSDUH website.
HELPrct %>% ggplot(aes(x = homeless)) +
geom_bar(aes(fill = substance), position = "fill") +
coord_flip()
Here is the link to the NHANES website.
library(NHANES)
head(NHANES)
Take a sample first and then make the plot.
sample_n(NHANES, size = 1000) %>% ggplot(aes(x = Age, y = Height, color = Gender)) +
geom_point() +
geom_smooth() +
xlab("Age (years)") +
ylab("Height (cm)")
Here is an alternative plot using all the data. This is hexbin plot.
NHANES %>% ggplot(aes(x = Age, y = Height, color = Gender)) +
geom_hex() +
geom_smooth() +
xlab("Age (years)") +
ylab("Height (cm)")
library(mosaic)
head(NHANES)
NHANES2 <- NHANES %>% select(AgeDecade, BMI_WHO)
head(NHANES2)
NHANES2_table <- table(NHANES2)
NHANES2_table
BMI_WHO
AgeDecade 12.0_18.5 18.5_to_24.9 25.0_to_29.9 30.0_plus
0-9 873 193 28 7
10-19 280 664 244 172
20-29 49 526 349 418
30-39 10 394 433 495
40-49 26 371 475 506
50-59 15 314 487 477
60-69 8 199 321 373
70+ 6 142 207 218
mosaicplot(NHANES2_table, color = TRUE)
library(macleish)
Loading required package: etl
head(whately_2015)
whately_2015 %>% ggplot(aes(x = when, y=temperature)) +
geom_line(color = "darkgrey") +
geom_smooth() +
xlab(NULL) +
ylab("Tempurature (degrees Fahrenheit)")
Here is the link to the choroplethr website.
library(choroplethr)
Loading required package: acs
Loading required package: XML
Attaching package: ‘acs’
The following object is masked from ‘package:dplyr’:
combine
The following object is masked from ‘package:base’:
apply
View the Choroplethr documentation at www.Choroplethr.com
library(choroplethrMaps)
library(rUnemploymentData)
animated_state_unemployment_choropleth()
[1] "All files will be written to the current working directory: /home/esuess/classes/2020-2021/01 - Fall 2020/Stat651/Presentations/02_ggplot2 . To change this use setwd()"
[1] "Now writing individual choropleth files there as 'choropleth_1.png', 'choropleth_2.png', etc."
Saving 7 x 7 in image
[1] "Now writing code to animate all images in 'animated_choropleth.html'. Please open that file with a browser."
# animated_county_unemployment_choropleth()
Check out ggnet2 or the newer ggnetwork
Example 4.
library(GGally)
library(network)
network: Classes for Relational Data
Version 1.16.0 created on 2019-11-30.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
Mark S. Handcock, University of California -- Los Angeles
David R. Hunter, Penn State University
Martina Morris, University of Washington
Skye Bender-deMoll, University of Washington
For citation information, type citation("network").
Type help("network-package") to get started.
library(sna)
Loading required package: statnet.common
Attaching package: ‘statnet.common’
The following object is masked from ‘package:base’:
order
sna: Tools for Social Network Analysis
Version 2.5 created on 2019-12-09.
copyright (c) 2005, Carter T. Butts, University of California-Irvine
For citation information, type citation("sna").
Type help(package="sna") to get started.
library(ggplot2)
# root URL
r = "https://raw.githubusercontent.com/briatte/ggnet/master/"
# read nodes
v = read.csv(paste0(r, "inst/extdata/nodes.tsv"), sep = "\t")
names(v)
[1] "Sexe" "Prénom" "Nom" "Groupe"
[5] "Département.d.élection" "Num.circonscription" "Commission.permanente" "Twitter"
# read edges
e = read.csv(paste0(r, "inst/extdata/network.tsv"), sep = "\t")
names(e)
[1] "Source" "Target"
# network object
net = network(e, directed = TRUE)
# party affiliation
x = data.frame(Twitter = network.vertex.names(net))
x = merge(x, v, by = "Twitter", sort = FALSE)$Groupe
net %v% "party" = as.character(x)
# color palette
y = RColorBrewer::brewer.pal(9, "Set1")[ c(3, 1, 9, 6, 8, 5, 2) ]
names(y) = levels(x)
# network plot
ggnet2(net, color = "party", palette = y, alpha = 0.75, size = 4, edge.alpha = 0.5)
Review Table 3.3 on page 47 for the different kinds of plots that can be made for different kinds of x, y variables.
Continue with the Extended example: Historical baby names on page 48.