%+%

Author

Prof. Eric A. Suess

Historical baby names

See Section 3.3

library(pacman)

p_load(tidyverse, babynames, mdsr, Hmisc)

This dataset was created in 2014.

BabynamesDist <- make_babynames_dist()
head(BabynamesDist)
# A tibble: 6 × 9
   year sex   name          n   prop alive_prob count_thousands age_today
  <dbl> <chr> <chr>     <int>  <dbl>      <dbl>           <dbl>     <dbl>
1  1900 F     Mary      16706 0.0526          0           16.7        114
2  1900 F     Helen      6343 0.0200          0            6.34       114
3  1900 F     Anna       6114 0.0192          0            6.11       114
4  1900 F     Margaret   5304 0.0167          0            5.30       114
5  1900 F     Ruth       4765 0.0150          0            4.76       114
6  1900 F     Elizabeth  4096 0.0129          0            4.10       114
# ℹ 1 more variable: est_alive_today <dbl>
BabynamesDist |>  
  filter(name == "Benjamin")
# A tibble: 205 × 9
    year sex   name         n    prop alive_prob count_thousands age_today
   <dbl> <chr> <chr>    <int>   <dbl>      <dbl>           <dbl>     <dbl>
 1  1900 M     Benjamin   450 0.00278   0                  0.45        114
 2  1901 M     Benjamin   343 0.00297   0.000025           0.343       113
 3  1902 M     Benjamin   374 0.00282   0.00005            0.374       112
 4  1903 M     Benjamin   324 0.00251   0.000075           0.324       111
 5  1904 M     Benjamin   358 0.00258   0.0001             0.358       110
 6  1905 M     Benjamin   379 0.00265   0.000125           0.379       109
 7  1906 M     Benjamin   352 0.00244   0.00015            0.352       108
 8  1907 M     Benjamin   460 0.00290   0.000175           0.46        107
 9  1908 M     Benjamin   480 0.00289   0.0002             0.48        106
10  1909 M     Benjamin   489 0.00276   0.000225           0.489       105
# ℹ 195 more rows
# ℹ 1 more variable: est_alive_today <dbl>
joseph <- BabynamesDist  |> 
  filter(name == "Joseph" & sex == "M")

name_plot <- joseph  |>  ggplot(aes(x = year))

name_plot

In the Second edition of the book the authors changed to using geom_col().

name_plot <- name_plot +
  geom_col(
    aes(y = count_thousands * alive_prob), 
    fill = "#b2d7e9", 
    color = "white",
    size = 0.1
  )
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
name_plot

name_plot <- name_plot + 
  geom_line(aes(y = count_thousands), size = 2)
name_plot