Historical baby names

library(pacman)
p_load(tidyverse, babynames, mdsr, Hmisc)

This dataset was created in 2014.

BabynamesDist <- make_babynames_dist()
head(BabynamesDist)
BabynamesDist %>% filter(name == "Benjamin")
joseph <- BabynamesDist %>%
  filter(name == "Joseph" & sex == "M")
name_plot <- joseph %>% ggplot(aes(x = year))
name_plot

name_plot <- name_plot +
  geom_bar(stat = "identity", aes(y = count_thousands*alive_prob),
           fill = "#b2d7e9", color = "white")
name_plot

name_plot <- name_plot + geom_line(aes(y = count_thousands), size=2)
name_plot

name_plot <- name_plot + 
  ylab("Number of People (thousands)") +
  xlab(NULL)
name_plot

summary(name_plot)
data: year, sex, name, n, prop, alive_prob, count_thousands, age_today, est_alive_today
  [111x9]
mapping:  x = ~year
faceting: <ggproto object: Class FacetNull, Facet, gg>
    compute_layout: function
    draw_back: function
    draw_front: function
    draw_labels: function
    draw_panels: function
    finish_data: function
    init_scales: function
    map_data: function
    params: list
    setup_data: function
    setup_params: function
    shrink: TRUE
    train_scales: function
    vars: function
    super:  <ggproto object: Class FacetNull, Facet, gg>
-----------------------------------
mapping: y = ~count_thousands * alive_prob 
geom_bar: width = NULL, na.rm = FALSE
stat_identity: na.rm = FALSE
position_stack 

mapping: y = ~count_thousands 
geom_line: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity 
?wtd.quantile
median_yob <- with(joseph, wtd.quantile(year, weights=est_alive_today, prob = 0.5))  # The pipe %>% does not work.
median_yob    
 50% 
1975 
name_plot <- name_plot + geom_bar(stat = "identity", color = "white", fill = "#008fd5",
                                   aes(y = ifelse(year == median_yob, est_alive_today / 1000, 0)))
name_plot

name_plot + ggtitle("Age Distribution of American Boys Named Joseph") +
  geom_text(x = 1935, y = 40, label = "Number of Josephs\nborn each year") +
  geom_text(x = 1915, y = 13, label = "Number of Josephs\nborn each year\nestimated to be alive\non 1/1/214",
          color = "#b2d7e9") +
  geom_text(x = 2003, y = 40, label = "The median\nliving Joseph\nis 37 years old", 
          color = "darkgrey") +
  geom_curve(x = 1995, xend = 1974, y = 40, yend = 24,
             arrow = arrow(length = unit(0.3, "cm")), curvature = 0.5) +
  ylim (0, 42)

Now replace the data in the ggplot and then make the new plot.

name_plot %+% filter(BabynamesDist, name == "Josephine" & sex == "F")