--- title: '%+%' output: word_document: default html_notebook: default pdf_document: default --- **Historical baby names** ```{r message = FALSE} library(pacman) p_load(tidyverse, babynames, mdsr, Hmisc) ``` This dataset was created in 2014. ```{r} BabynamesDist <- make_babynames_dist() head(BabynamesDist) ``` ```{r} BabynamesDist %>% filter(name == "Benjamin") ``` ```{r} joseph <- BabynamesDist %>% filter(name == "Joseph" & sex == "M") name_plot <- joseph %>% ggplot(aes(x = year)) name_plot ``` ```{r} name_plot <- name_plot + geom_bar(stat = "identity", aes(y = count_thousands*alive_prob), fill = "#b2d7e9", color = "white") name_plot ``` ```{r} name_plot <- name_plot + geom_line(aes(y = count_thousands), size=2) name_plot ``` ```{r} name_plot <- name_plot + ylab("Number of People (thousands)") + xlab(NULL) name_plot ``` ```{r} summary(name_plot) ``` ```{r} ?wtd.quantile median_yob <- with(joseph, wtd.quantile(year, weights=est_alive_today, prob = 0.5)) # The pipe %>% does not work. median_yob ``` ```{r} name_plot <- name_plot + geom_bar(stat = "identity", color = "white", fill = "#008fd5", aes(y = ifelse(year == median_yob, est_alive_today / 1000, 0))) name_plot ``` ```{r} name_plot + ggtitle("Age Distribution of American Boys Named Joseph") + geom_text(x = 1935, y = 40, label = "Number of Josephs\nborn each year") + geom_text(x = 1915, y = 13, label = "Number of Josephs\nborn each year\nestimated to be alive\non 1/1/214", color = "#b2d7e9") + geom_text(x = 2003, y = 40, label = "The median\nliving Joseph\nis 37 years old", color = "darkgrey") + geom_curve(x = 1995, xend = 1974, y = 40, yend = 24, arrow = arrow(length = unit(0.3, "cm")), curvature = 0.5) + ylim (0, 42) ``` Now replace the data in the ggplot and then make the new plot. ```{r} name_plot %+% filter(BabynamesDist, name == "Josephine" & sex == "F") ``` Faceting Try the name Jessie ```{r} name_plot <- name_plot + facet_wrap(~sex) name_plot %+% filter(BabynamesDist, name == "Jessie") ``` Try it with your own name. ```{r} name_plot <- name_plot + facet_wrap(~sex) name_plot %+% filter(BabynamesDist, name == "Eric") ``` ```{r} many_names_plot <- name_plot + facet_grid(name ~sex) mnp <- many_names_plot %+% filter(BabynamesDist, name %in% c("Jessie", "Marion", "Jackie")) mnp ``` ```{r} mnp + facet_grid(sex ~ name) ``` **Most common women's names** Thank you Hannah Kim! ```{r} library(babynames) com_fem <- BabynamesDist %>% filter(sex == "F") %>% group_by(name) %>% summarise( N = n(), est_num_alive = sum(est_alive_today) ) %>% arrange(desc(est_num_alive)) %>% head(25) %>% select(name) %>% left_join(., BabynamesDist, by = "name") %>% group_by(name) %>% summarise( N = n(), est_num_alive = sum(est_alive_today), q1_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.25), median_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.5), q3_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.75) ) w_plot <- ggplot(data = com_fem, aes(x = reorder(name, -median_age), y = median_age)) + xlab(NULL) + ylab("Age (in years)") + ggtitle("Median ages for females with the 25 most common names") w_plot <- w_plot + geom_linerange(aes(ymin = q1_age, ymax = q3_age), color = "#f3d478", size = 10, alpha = 0.8) w_plot <- w_plot + geom_point(fill = "#ed3324", colour = "white", size = 4, shape = 21) w_plot + geom_point(aes(y = 55, x = 24), fill = "#ed3324", colour = "white", size = 4, shape = 21) + geom_text(aes(y = 58, x = 24, label = " median")) + geom_text(aes(y = 26, x = 16, label = " 25th")) + geom_text(aes(y = 51, x = 16, label = "75th percentile ")) + geom_point(aes(y = 24, x = 16), shape = 17) + geom_point(aes(y = 56, x = 16), shape = 17) + coord_flip() ```