Historical baby names

library(pacman)
p_load(tidyverse, babynames, mdsr, Hmisc)

This dataset was created in 2014.

BabynamesDist <- make_babynames_dist()
head(BabynamesDist)
BabynamesDist %>% filter(name == "Benjamin")
joseph <- BabynamesDist %>%
  filter(name == "Joseph" & sex == "M")
name_plot <- joseph %>% ggplot(aes(x = year))
name_plot

name_plot <- name_plot +
  geom_bar(stat = "identity", aes(y = count_thousands*alive_prob),
           fill = "#b2d7e9", color = "white")
name_plot

name_plot <- name_plot + geom_line(aes(y = count_thousands), size=2)
name_plot

name_plot <- name_plot + 
  ylab("Number of People (thousands)") +
  xlab(NULL)
name_plot

summary(name_plot)
data: year, sex, name, n, prop, alive_prob, count_thousands, age_today, est_alive_today
  [111x9]
mapping:  x = ~year
faceting: <ggproto object: Class FacetNull, Facet, gg>
    compute_layout: function
    draw_back: function
    draw_front: function
    draw_labels: function
    draw_panels: function
    finish_data: function
    init_scales: function
    map_data: function
    params: list
    setup_data: function
    setup_params: function
    shrink: TRUE
    train_scales: function
    vars: function
    super:  <ggproto object: Class FacetNull, Facet, gg>
-----------------------------------
mapping: y = ~count_thousands * alive_prob 
geom_bar: width = NULL, na.rm = FALSE
stat_identity: na.rm = FALSE
position_stack 

mapping: y = ~count_thousands 
geom_line: na.rm = FALSE
stat_identity: na.rm = FALSE
position_identity 
?wtd.quantile
median_yob <- with(joseph, wtd.quantile(year, weights=est_alive_today, prob = 0.5))  # The pipe %>% does not work.
median_yob    
 50% 
1975 
name_plot <- name_plot + geom_bar(stat = "identity", color = "white", fill = "#008fd5",
                                   aes(y = ifelse(year == median_yob, est_alive_today / 1000, 0)))
name_plot

name_plot + ggtitle("Age Distribution of American Boys Named Joseph") +
  geom_text(x = 1935, y = 40, label = "Number of Josephs\nborn each year") +
  geom_text(x = 1915, y = 13, label = "Number of Josephs\nborn each year\nestimated to be alive\non 1/1/214",
          color = "#b2d7e9") +
  geom_text(x = 2003, y = 40, label = "The median\nliving Joseph\nis 37 years old", 
          color = "darkgrey") +
  geom_curve(x = 1995, xend = 1974, y = 40, yend = 24,
             arrow = arrow(length = unit(0.3, "cm")), curvature = 0.5) +
  ylim (0, 42)

Now replace the data in the ggplot and then make the new plot.

name_plot %+% filter(BabynamesDist, name == "Josephine" & sex == "F")

Faceting

Try the name Jessie

name_plot <- name_plot + facet_wrap(~sex)
name_plot %+% filter(BabynamesDist, name == "Jessie")

Try it with your own name.

name_plot <- name_plot + facet_wrap(~sex)
name_plot %+% filter(BabynamesDist, name == "Eric")

many_names_plot <- name_plot + facet_grid(name ~sex)
mnp <- many_names_plot %+% filter(BabynamesDist, name %in% c("Jessie", "Marion", "Jackie"))
mnp

mnp + facet_grid(sex ~ name)

Most common women’s names

Thank you Hannah Kim!

library(babynames)
com_fem <- BabynamesDist %>%
  filter(sex == "F") %>% 
  group_by(name) %>%
  summarise(
    N = n(),
    est_num_alive = sum(est_alive_today)
  ) %>% 
  arrange(desc(est_num_alive)) %>% 
  head(25) %>% 
  select(name) %>% 
  left_join(., BabynamesDist, by = "name") %>% 
  group_by(name) %>% 
  summarise(
    N = n(),
    est_num_alive = sum(est_alive_today),
    q1_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.25),
    median_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.5),
    q3_age = wtd.quantile(age_today, weight = est_alive_today, probs = 0.75) 
  )
`summarise()` ungrouping output (override with `.groups` argument)
`summarise()` ungrouping output (override with `.groups` argument)
w_plot <- ggplot(data = com_fem, aes(x = reorder(name, -median_age),
  y = median_age)) + xlab(NULL) + ylab("Age (in years)") +
  ggtitle("Median ages for females with the 25 most common names")
w_plot <- w_plot + geom_linerange(aes(ymin = q1_age, ymax = q3_age),
  color = "#f3d478", size = 10, alpha = 0.8)
w_plot <- w_plot +
  geom_point(fill = "#ed3324", colour = "white", size = 4, shape = 21)
w_plot +
  geom_point(aes(y = 55, x = 24), fill = "#ed3324", colour = "white",
             size = 4, shape = 21) +
  geom_text(aes(y = 58, x = 24, label = "   median")) +
  geom_text(aes(y = 26, x = 16, label = "   25th")) +
  geom_text(aes(y = 51, x = 16, label = "75th percentile      ")) +
  geom_point(aes(y = 24, x = 16), shape = 17) +
  geom_point(aes(y = 56, x = 16), shape = 17) +
  coord_flip()

LS0tCnRpdGxlOiAnJSslJwpvdXRwdXQ6CiAgd29yZF9kb2N1bWVudDogZGVmYXVsdAogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgoqKkhpc3RvcmljYWwgYmFieSBuYW1lcyoqCgpgYGB7ciBtZXNzYWdlID0gRkFMU0V9CmxpYnJhcnkocGFjbWFuKQoKcF9sb2FkKHRpZHl2ZXJzZSwgYmFieW5hbWVzLCBtZHNyLCBIbWlzYykKYGBgCgpUaGlzIGRhdGFzZXQgd2FzIGNyZWF0ZWQgaW4gMjAxNC4KCmBgYHtyfQpCYWJ5bmFtZXNEaXN0IDwtIG1ha2VfYmFieW5hbWVzX2Rpc3QoKQpoZWFkKEJhYnluYW1lc0Rpc3QpCmBgYAoKYGBge3J9CkJhYnluYW1lc0Rpc3QgJT4lIGZpbHRlcihuYW1lID09ICJCZW5qYW1pbiIpCmBgYAoKYGBge3J9Cmpvc2VwaCA8LSBCYWJ5bmFtZXNEaXN0ICU+JQogIGZpbHRlcihuYW1lID09ICJKb3NlcGgiICYgc2V4ID09ICJNIikKCm5hbWVfcGxvdCA8LSBqb3NlcGggJT4lIGdncGxvdChhZXMoeCA9IHllYXIpKQoKbmFtZV9wbG90CmBgYAoKYGBge3J9Cm5hbWVfcGxvdCA8LSBuYW1lX3Bsb3QgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBhZXMoeSA9IGNvdW50X3Rob3VzYW5kcyphbGl2ZV9wcm9iKSwKICAgICAgICAgICBmaWxsID0gIiNiMmQ3ZTkiLCBjb2xvciA9ICJ3aGl0ZSIpCgpuYW1lX3Bsb3QKYGBgCgpgYGB7cn0KbmFtZV9wbG90IDwtIG5hbWVfcGxvdCArIGdlb21fbGluZShhZXMoeSA9IGNvdW50X3Rob3VzYW5kcyksIHNpemU9MikKbmFtZV9wbG90CmBgYAoKYGBge3J9Cm5hbWVfcGxvdCA8LSBuYW1lX3Bsb3QgKyAKICB5bGFiKCJOdW1iZXIgb2YgUGVvcGxlICh0aG91c2FuZHMpIikgKwogIHhsYWIoTlVMTCkKCm5hbWVfcGxvdApgYGAKCmBgYHtyfQpzdW1tYXJ5KG5hbWVfcGxvdCkKYGBgCgpgYGB7cn0KP3d0ZC5xdWFudGlsZQoKbWVkaWFuX3lvYiA8LSB3aXRoKGpvc2VwaCwgd3RkLnF1YW50aWxlKHllYXIsIHdlaWdodHM9ZXN0X2FsaXZlX3RvZGF5LCBwcm9iID0gMC41KSkgICMgVGhlIHBpcGUgJT4lIGRvZXMgbm90IHdvcmsuCgptZWRpYW5feW9iICAgIApgYGAKCmBgYHtyfQpuYW1lX3Bsb3QgPC0gbmFtZV9wbG90ICsgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGNvbG9yID0gIndoaXRlIiwgZmlsbCA9ICIjMDA4ZmQ1IiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBhZXMoeSA9IGlmZWxzZSh5ZWFyID09IG1lZGlhbl95b2IsIGVzdF9hbGl2ZV90b2RheSAvIDEwMDAsIDApKSkKCm5hbWVfcGxvdApgYGAKCmBgYHtyfQpuYW1lX3Bsb3QgKyBnZ3RpdGxlKCJBZ2UgRGlzdHJpYnV0aW9uIG9mIEFtZXJpY2FuIEJveXMgTmFtZWQgSm9zZXBoIikgKwogIGdlb21fdGV4dCh4ID0gMTkzNSwgeSA9IDQwLCBsYWJlbCA9ICJOdW1iZXIgb2YgSm9zZXBoc1xuYm9ybiBlYWNoIHllYXIiKSArCiAgZ2VvbV90ZXh0KHggPSAxOTE1LCB5ID0gMTMsIGxhYmVsID0gIk51bWJlciBvZiBKb3NlcGhzXG5ib3JuIGVhY2ggeWVhclxuZXN0aW1hdGVkIHRvIGJlIGFsaXZlXG5vbiAxLzEvMjE0IiwKICAgICAgICAgIGNvbG9yID0gIiNiMmQ3ZTkiKSArCiAgZ2VvbV90ZXh0KHggPSAyMDAzLCB5ID0gNDAsIGxhYmVsID0gIlRoZSBtZWRpYW5cbmxpdmluZyBKb3NlcGhcbmlzIDM3IHllYXJzIG9sZCIsIAogICAgICAgICAgY29sb3IgPSAiZGFya2dyZXkiKSArCiAgZ2VvbV9jdXJ2ZSh4ID0gMTk5NSwgeGVuZCA9IDE5NzQsIHkgPSA0MCwgeWVuZCA9IDI0LAogICAgICAgICAgICAgYXJyb3cgPSBhcnJvdyhsZW5ndGggPSB1bml0KDAuMywgImNtIikpLCBjdXJ2YXR1cmUgPSAwLjUpICsKICB5bGltICgwLCA0MikKCmBgYAoKTm93IHJlcGxhY2UgdGhlIGRhdGEgaW4gdGhlIGdncGxvdCBhbmQgdGhlbiBtYWtlIHRoZSBuZXcgcGxvdC4KCmBgYHtyfQpuYW1lX3Bsb3QgJSslIGZpbHRlcihCYWJ5bmFtZXNEaXN0LCBuYW1lID09ICJKb3NlcGhpbmUiICYgc2V4ID09ICJGIikKYGBgCgpGYWNldGluZwoKVHJ5IHRoZSBuYW1lIEplc3NpZQoKYGBge3J9Cm5hbWVfcGxvdCA8LSBuYW1lX3Bsb3QgKyBmYWNldF93cmFwKH5zZXgpCm5hbWVfcGxvdCAlKyUgZmlsdGVyKEJhYnluYW1lc0Rpc3QsIG5hbWUgPT0gIkplc3NpZSIpCmBgYAoKVHJ5IGl0IHdpdGggeW91ciBvd24gbmFtZS4KCmBgYHtyfQpuYW1lX3Bsb3QgPC0gbmFtZV9wbG90ICsgZmFjZXRfd3JhcCh+c2V4KQpuYW1lX3Bsb3QgJSslIGZpbHRlcihCYWJ5bmFtZXNEaXN0LCBuYW1lID09ICJFcmljIikKYGBgCgpgYGB7cn0KbWFueV9uYW1lc19wbG90IDwtIG5hbWVfcGxvdCArIGZhY2V0X2dyaWQobmFtZSB+c2V4KQptbnAgPC0gbWFueV9uYW1lc19wbG90ICUrJSBmaWx0ZXIoQmFieW5hbWVzRGlzdCwgbmFtZSAlaW4lIGMoIkplc3NpZSIsICJNYXJpb24iLCAiSmFja2llIikpCm1ucApgYGAKCmBgYHtyfQptbnAgKyBmYWNldF9ncmlkKHNleCB+IG5hbWUpCmBgYAoKKipNb3N0IGNvbW1vbiB3b21lbidzIG5hbWVzKioKClRoYW5rIHlvdSBIYW5uYWggS2ltIQoKCmBgYHtyfQpsaWJyYXJ5KGJhYnluYW1lcykKCmNvbV9mZW0gPC0gQmFieW5hbWVzRGlzdCAlPiUKICBmaWx0ZXIoc2V4ID09ICJGIikgJT4lIAogIGdyb3VwX2J5KG5hbWUpICU+JQogIHN1bW1hcmlzZSgKICAgIE4gPSBuKCksCiAgICBlc3RfbnVtX2FsaXZlID0gc3VtKGVzdF9hbGl2ZV90b2RheSkKICApICU+JSAKICBhcnJhbmdlKGRlc2MoZXN0X251bV9hbGl2ZSkpICU+JSAKICBoZWFkKDI1KSAlPiUgCiAgc2VsZWN0KG5hbWUpICU+JSAKICBsZWZ0X2pvaW4oLiwgQmFieW5hbWVzRGlzdCwgYnkgPSAibmFtZSIpICU+JSAKICBncm91cF9ieShuYW1lKSAlPiUgCiAgc3VtbWFyaXNlKAogICAgTiA9IG4oKSwKICAgIGVzdF9udW1fYWxpdmUgPSBzdW0oZXN0X2FsaXZlX3RvZGF5KSwKICAgIHExX2FnZSA9IHd0ZC5xdWFudGlsZShhZ2VfdG9kYXksIHdlaWdodCA9IGVzdF9hbGl2ZV90b2RheSwgcHJvYnMgPSAwLjI1KSwKICAgIG1lZGlhbl9hZ2UgPSB3dGQucXVhbnRpbGUoYWdlX3RvZGF5LCB3ZWlnaHQgPSBlc3RfYWxpdmVfdG9kYXksIHByb2JzID0gMC41KSwKICAgIHEzX2FnZSA9IHd0ZC5xdWFudGlsZShhZ2VfdG9kYXksIHdlaWdodCA9IGVzdF9hbGl2ZV90b2RheSwgcHJvYnMgPSAwLjc1KSAKICApCgp3X3Bsb3QgPC0gZ2dwbG90KGRhdGEgPSBjb21fZmVtLCBhZXMoeCA9IHJlb3JkZXIobmFtZSwgLW1lZGlhbl9hZ2UpLAogIHkgPSBtZWRpYW5fYWdlKSkgKyB4bGFiKE5VTEwpICsgeWxhYigiQWdlIChpbiB5ZWFycykiKSArCiAgZ2d0aXRsZSgiTWVkaWFuIGFnZXMgZm9yIGZlbWFsZXMgd2l0aCB0aGUgMjUgbW9zdCBjb21tb24gbmFtZXMiKQoKd19wbG90IDwtIHdfcGxvdCArIGdlb21fbGluZXJhbmdlKGFlcyh5bWluID0gcTFfYWdlLCB5bWF4ID0gcTNfYWdlKSwKICBjb2xvciA9ICIjZjNkNDc4Iiwgc2l6ZSA9IDEwLCBhbHBoYSA9IDAuOCkKCndfcGxvdCA8LSB3X3Bsb3QgKwogIGdlb21fcG9pbnQoZmlsbCA9ICIjZWQzMzI0IiwgY29sb3VyID0gIndoaXRlIiwgc2l6ZSA9IDQsIHNoYXBlID0gMjEpCgp3X3Bsb3QgKwogIGdlb21fcG9pbnQoYWVzKHkgPSA1NSwgeCA9IDI0KSwgZmlsbCA9ICIjZWQzMzI0IiwgY29sb3VyID0gIndoaXRlIiwKICAgICAgICAgICAgIHNpemUgPSA0LCBzaGFwZSA9IDIxKSArCiAgZ2VvbV90ZXh0KGFlcyh5ID0gNTgsIHggPSAyNCwgbGFiZWwgPSAiICAgbWVkaWFuIikpICsKICBnZW9tX3RleHQoYWVzKHkgPSAyNiwgeCA9IDE2LCBsYWJlbCA9ICIgICAyNXRoIikpICsKICBnZW9tX3RleHQoYWVzKHkgPSA1MSwgeCA9IDE2LCBsYWJlbCA9ICI3NXRoIHBlcmNlbnRpbGUgICAgICAiKSkgKwogIGdlb21fcG9pbnQoYWVzKHkgPSAyNCwgeCA9IDE2KSwgc2hhcGUgPSAxNykgKwogIGdlb21fcG9pbnQoYWVzKHkgPSA1NiwgeCA9IDE2KSwgc2hhcGUgPSAxNykgKwogIGNvb3JkX2ZsaXAoKQpgYGAKCgoKCg==