The code demonstrated here are from Charlotte Wickham’s purrr-tutorial. She gave a two day tutorial at the 2017 UseR, YouTube Solving iteration problems with purrr day1 and YouTube Solving iteration problems with purrr II day2.
Here is the link to the http://bit.ly/purrr-slides.
I also find that Sharon Machlis purrr videos are quite useful. map_df
library(pacman)
p_load(tidyverse, repurrrsive, nycflights13, fs, glue, babynames, Lahman, tidymodels)
data(sw_films)
View(sw_films)
data(sw_people)
View(sw_people)
data(sw_planets)
View(sw_planets)
data(sw_species)
View(sw_species)
data(sw_starships)
View(sw_starships)
data(sw_vehicles)
View(sw_vehicles)
sw_people %>% map_chr(~ length(.x[["starships"]]))
[1] "2" "0" "0" "1" "0" "0" "0" "0" "1" "5" "3" "0" "2" "2" "0" "0" "1" "1"
[19] "0" "0" "1" "0" "0" "1" "0" "0" "0" "1" "0" "1" "0" "0" "0" "0" "0" "0"
[37] "1" "0" "0" "0" "0" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
[55] "1" "0" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
[73] "0" "0" "0" "0" "1" "0" "0" "0" "0" "0" "0" "1" "0" "0" "3"
sw_people %>% map_chr(~ .x[["hair_color"]])
[1] "blond" "n/a" "n/a" "none"
[5] "brown" "brown, grey" "brown" "n/a"
[9] "black" "auburn, white" "blond" "auburn, grey"
[13] "brown" "brown" "n/a" "n/a"
[17] "brown" "brown" "white" "grey"
[21] "black" "none" "none" "black"
[25] "none" "none" "auburn" "brown"
[29] "brown" "none" "brown" "none"
[33] "blond" "none" "none" "none"
[37] "brown" "black" "none" "black"
[41] "black" "none" "none" "none"
[45] "none" "none" "none" "none"
[49] "white" "none" "black" "none"
[53] "none" "none" "none" "none"
[57] "black" "brown" "brown" "none"
[61] "black" "black" "brown" "white"
[65] "black" "black" "blonde" "none"
[69] "none" "none" "white" "none"
[73] "none" "none" "none" "none"
[77] "none" "brown" "brown" "none"
[81] "none" "black" "brown" "brown"
[85] "none" "unknown" "brown"
sw_people %>% map_lgl(~ .x[["gender"]] == "male")
[1] TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE TRUE
[13] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[25] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[37] TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
[49] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[61] FALSE FALSE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE
[73] FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE
[85] FALSE FALSE FALSE
sw_people %>% map_dbl(~ as.numeric(.x[["mass"]]))
[1] 77.0 75.0 32.0 136.0 49.0 120.0 75.0 32.0 84.0 77.0 84.0 NA
[13] 112.0 80.0 74.0 NA 77.0 110.0 17.0 75.0 78.2 140.0 113.0 79.0
[25] 79.0 83.0 NA NA 20.0 68.0 89.0 90.0 NA 66.0 82.0 NA
[37] NA NA 40.0 NA NA 80.0 NA 55.0 45.0 NA 65.0 84.0
[49] 82.0 87.0 NA 50.0 NA NA 80.0 NA 85.0 NA NA 80.0
[61] 56.2 50.0 NA 80.0 NA 79.0 55.0 102.0 88.0 NA NA 15.0
[73] NA 48.0 NA 57.0 159.0 136.0 79.0 48.0 80.0 NA NA NA
[85] NA NA 45.0
sw_people %>% map_dbl(~ parse_number(.x[["mass"]]))
[1] 77.0 75.0 32.0 136.0 49.0 120.0 75.0 32.0 84.0 77.0
[11] 84.0 NA 112.0 80.0 74.0 1358.0 77.0 110.0 17.0 75.0
[21] 78.2 140.0 113.0 79.0 79.0 83.0 NA NA 20.0 68.0
[31] 89.0 90.0 NA 66.0 82.0 NA NA NA 40.0 NA
[41] NA 80.0 NA 55.0 45.0 NA 65.0 84.0 82.0 87.0
[51] NA 50.0 NA NA 80.0 NA 85.0 NA NA 80.0
[61] 56.2 50.0 NA 80.0 NA 79.0 55.0 102.0 88.0 NA
[71] NA 15.0 NA 48.0 NA 57.0 159.0 136.0 79.0 48.0
[81] 80.0 NA NA NA NA NA 45.0
sw_people %>% map("starships") %>%
map_int(length)
[1] 2 0 0 1 0 0 0 0 1 5 3 0 2 2 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1
[38] 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[75] 0 0 1 0 0 0 0 0 0 1 0 0 3
planet_lookup <- sw_planets %>% map_chr("name") %>%
set_names(map_chr(sw_planets, "url"))
planet_lookup
http://swapi.co/api/planets/2/ http://swapi.co/api/planets/3/
"Alderaan" "Yavin IV"
http://swapi.co/api/planets/4/ http://swapi.co/api/planets/5/
"Hoth" "Dagobah"
http://swapi.co/api/planets/6/ http://swapi.co/api/planets/7/
"Bespin" "Endor"
http://swapi.co/api/planets/8/ http://swapi.co/api/planets/9/
"Naboo" "Coruscant"
http://swapi.co/api/planets/10/ http://swapi.co/api/planets/11/
"Kamino" "Geonosis"
http://swapi.co/api/planets/12/ http://swapi.co/api/planets/13/
"Utapau" "Mustafar"
http://swapi.co/api/planets/14/ http://swapi.co/api/planets/15/
"Kashyyyk" "Polis Massa"
http://swapi.co/api/planets/16/ http://swapi.co/api/planets/17/
"Mygeeto" "Felucia"
http://swapi.co/api/planets/18/ http://swapi.co/api/planets/19/
"Cato Neimoidia" "Saleucami"
http://swapi.co/api/planets/20/ http://swapi.co/api/planets/21/
"Stewjon" "Eriadu"
http://swapi.co/api/planets/22/ http://swapi.co/api/planets/23/
"Corellia" "Rodia"
http://swapi.co/api/planets/24/ http://swapi.co/api/planets/25/
"Nal Hutta" "Dantooine"
http://swapi.co/api/planets/26/ http://swapi.co/api/planets/27/
"Bestine IV" "Ord Mantell"
http://swapi.co/api/planets/28/ http://swapi.co/api/planets/29/
"unknown" "Trandosha"
http://swapi.co/api/planets/30/ http://swapi.co/api/planets/31/
"Socorro" "Mon Cala"
http://swapi.co/api/planets/32/ http://swapi.co/api/planets/33/
"Chandrila" "Sullust"
http://swapi.co/api/planets/34/ http://swapi.co/api/planets/35/
"Toydaria" "Malastare"
http://swapi.co/api/planets/36/ http://swapi.co/api/planets/37/
"Dathomir" "Ryloth"
http://swapi.co/api/planets/38/ http://swapi.co/api/planets/39/
"Aleen Minor" "Vulpter"
http://swapi.co/api/planets/40/ http://swapi.co/api/planets/41/
"Troiken" "Tund"
http://swapi.co/api/planets/42/ http://swapi.co/api/planets/43/
"Haruun Kal" "Cerea"
http://swapi.co/api/planets/44/ http://swapi.co/api/planets/45/
"Glee Anselm" "Iridonia"
http://swapi.co/api/planets/46/ http://swapi.co/api/planets/47/
"Tholoth" "Iktotch"
http://swapi.co/api/planets/48/ http://swapi.co/api/planets/49/
"Quermia" "Dorin"
http://swapi.co/api/planets/50/ http://swapi.co/api/planets/51/
"Champala" "Mirial"
http://swapi.co/api/planets/52/ http://swapi.co/api/planets/53/
"Serenno" "Concord Dawn"
http://swapi.co/api/planets/54/ http://swapi.co/api/planets/55/
"Zolan" "Ojom"
http://swapi.co/api/planets/56/ http://swapi.co/api/planets/57/
"Skako" "Muunilinst"
http://swapi.co/api/planets/58/ http://swapi.co/api/planets/59/
"Shili" "Kalee"
http://swapi.co/api/planets/60/ http://swapi.co/api/planets/1/
"Umbara" "Tatooine"
http://swapi.co/api/planets/61/
"Jakku"
Answers:
Answers:
Answers:
Answers: Attack of the Clones
# do one.
length(sw_films[[1]]$characters)
[1] 18
sw_films %>% map("characters") %>%
map_int(~ length(.x)) %>%
set_names(map_chr(sw_films, "title"))
A New Hope Attack of the Clones The Phantom Menace
18 40 34
Revenge of the Sith Return of the Jedi The Empire Strikes Back
34 20 16
The Force Awakens
11
sw_films %>% map("characters") %>%
map_int(length) %>%
set_names(map_chr(sw_films, "title")) %>%
sort(decreasing = TRUE)
Attack of the Clones The Phantom Menace Revenge of the Sith
40 34 34
Return of the Jedi A New Hope The Empire Strikes Back
20 18 16
The Force Awakens
11
Answers: Mirialan, Togruta, Wookiee, Human
# do one
length(unlist(str_split(sw_species[[1]]$"eye_colors", ", ")))
[1] 2
sw_species %>% map_chr("eye_colors") %>%
stringr::str_split(", ") %>%
map_int(length) %>%
set_names(map_chr(sw_species, "name")) %>%
sort(decreasing = TRUE)
Mirialan Togruta Wookiee Human Twi'lek
6 6 6 6 4
Yoda's species Hutt Trandoshan Ewok Neimodian
3 2 2 2 2
Dug Zabrak Tholothian Kel Dor Geonosian
2 2 2 2 2
Mon Calamari Sullustan Gungan Toydarian Aleena
1 1 1 1 1
Vulptereen Xexto Toong Cerean Nautolan
1 1 1 1 1
Iktotchi Quermian Chagrian Clawdite Besalisk
1 1 1 1 1
Kaminoan Skakoan Muun Kaleesh Pau'an
1 1 1 1 1
Droid Rodian
1 1
Answers: Aleen Minor, Tholoth, Quermia, Zolan, Umbara
sw_planets %>% map_int( ~ map_lgl(.x, ~ "unknown" %in% .x) %>%
sum() ) %>%
set_names(map_chr(sw_planets, "name")) %>%
sort(decreasing = TRUE)
Aleen Minor Tholoth Quermia Zolan Umbara
8 8 8 8 8
Troiken Mirial Serenno Concord Dawn Jakku
7 7 7 7 7
unknown Iridonia Shili Stewjon Ojom
6 5 5 4 4
Tund Iktotch Dorin Saleucami Champala
3 3 3 2 2
Skako Hoth Dagobah Coruscant Mygeeto
2 1 1 1 1
Felucia Cato Neimoidia Eriadu Nal Hutta Dantooine
1 1 1 1 1
Bestine IV Trandosha Socorro Toydaria Malastare
1 1 1 1 1
Dathomir Vulpter Haruun Kal Cerea Kalee
1 1 1 1 1
Alderaan Yavin IV Bespin Endor Naboo
0 0 0 0 0
Kamino Geonosis Utapau Mustafar Kashyyyk
0 0 0 0 0
Polis Massa Corellia Rodia Ord Mantell Mon Cala
0 0 0 0 0
Chandrila Sullust Ryloth Glee Anselm Muunilinst
0 0 0 0 0
Tatooine
0
Read .csv files using map.
First, write come .csv files.
flights %>% filter(origin == "LGA") %>%
write_csv("flights_LGA.csv")
flights %>% filter(origin == "EWR") %>%
write_csv("flights_EWR.csv")
flights %>% filter(origin == "JFK") %>%
write_csv("flights_JFK.csv")
myfiles <- list.files(pattern = ".csv")
myfiles
[1] "flights_EWR.csv" "flights_JFK.csv" "flights_LGA.csv"
mydata <- myfiles %>% map_df(read_csv)
Parsed with column specification:
cols(
year = col_double(),
month = col_double(),
day = col_double(),
dep_time = col_double(),
sched_dep_time = col_double(),
dep_delay = col_double(),
arr_time = col_double(),
sched_arr_time = col_double(),
arr_delay = col_double(),
carrier = col_character(),
flight = col_double(),
tailnum = col_character(),
origin = col_character(),
dest = col_character(),
air_time = col_double(),
distance = col_double(),
hour = col_double(),
minute = col_double(),
time_hour = col_datetime(format = "")
)
Parsed with column specification:
cols(
year = col_double(),
month = col_double(),
day = col_double(),
dep_time = col_double(),
sched_dep_time = col_double(),
dep_delay = col_double(),
arr_time = col_double(),
sched_arr_time = col_double(),
arr_delay = col_double(),
carrier = col_character(),
flight = col_double(),
tailnum = col_character(),
origin = col_character(),
dest = col_character(),
air_time = col_double(),
distance = col_double(),
hour = col_double(),
minute = col_double(),
time_hour = col_datetime(format = "")
)
Parsed with column specification:
cols(
year = col_double(),
month = col_double(),
day = col_double(),
dep_time = col_double(),
sched_dep_time = col_double(),
dep_delay = col_double(),
arr_time = col_double(),
sched_arr_time = col_double(),
arr_delay = col_double(),
carrier = col_character(),
flight = col_double(),
tailnum = col_character(),
origin = col_character(),
dest = col_character(),
air_time = col_double(),
distance = col_double(),
hour = col_double(),
minute = col_double(),
time_hour = col_datetime(format = "")
)
dim(flights)
[1] 336776 19
dim(mydata)
[1] 336776 19
file_delete(glue("{myfiles}"))
Try rerun().
x <- 3 %>% rerun(rnorm(100)) %>%
map(mean)
x
[[1]]
[1] -0.01373121
[[2]]
[1] -0.1657158
[[3]]
[1] 0.1884074
x <- 1000 %>% rerun( tibble( x = rnorm(100),
y = rexp(100) )
)
x_cor <- x %>% map_df(~ tibble(x_cor = as.vector(cor(.x))[2]) )
x_cor %>% ggplot(aes(x = x_cor)) +
geom_histogram()
x_cor <- x_cor %>% mutate( index = 1:length(x_cor))
x_cor %>% ggplot(aes(x = index, y = x_cor)) +
geom_line()
Back to the tutorial.
Try to use the gap_split dataset to make plots for each country.
Make a list of ggplots.
Try walk().
data(gap_split)
View(gap_split)
countries <- names(gap_split)
countries
[1] "Afghanistan" "Albania"
[3] "Algeria" "Angola"
[5] "Argentina" "Australia"
[7] "Austria" "Bahrain"
[9] "Bangladesh" "Belgium"
[11] "Benin" "Bolivia"
[13] "Bosnia and Herzegovina" "Botswana"
[15] "Brazil" "Bulgaria"
[17] "Burkina Faso" "Burundi"
[19] "Cambodia" "Cameroon"
[21] "Canada" "Central African Republic"
[23] "Chad" "Chile"
[25] "China" "Colombia"
[27] "Comoros" "Congo, Dem. Rep."
[29] "Congo, Rep." "Costa Rica"
[31] "Cote d'Ivoire" "Croatia"
[33] "Cuba" "Czech Republic"
[35] "Denmark" "Djibouti"
[37] "Dominican Republic" "Ecuador"
[39] "Egypt" "El Salvador"
[41] "Equatorial Guinea" "Eritrea"
[43] "Ethiopia" "Finland"
[45] "France" "Gabon"
[47] "Gambia" "Germany"
[49] "Ghana" "Greece"
[51] "Guatemala" "Guinea"
[53] "Guinea-Bissau" "Haiti"
[55] "Honduras" "Hong Kong, China"
[57] "Hungary" "Iceland"
[59] "India" "Indonesia"
[61] "Iran" "Iraq"
[63] "Ireland" "Israel"
[65] "Italy" "Jamaica"
[67] "Japan" "Jordan"
[69] "Kenya" "Korea, Dem. Rep."
[71] "Korea, Rep." "Kuwait"
[73] "Lebanon" "Lesotho"
[75] "Liberia" "Libya"
[77] "Madagascar" "Malawi"
[79] "Malaysia" "Mali"
[81] "Mauritania" "Mauritius"
[83] "Mexico" "Mongolia"
[85] "Montenegro" "Morocco"
[87] "Mozambique" "Myanmar"
[89] "Namibia" "Nepal"
[91] "Netherlands" "New Zealand"
[93] "Nicaragua" "Niger"
[95] "Nigeria" "Norway"
[97] "Oman" "Pakistan"
[99] "Panama" "Paraguay"
[101] "Peru" "Philippines"
[103] "Poland" "Portugal"
[105] "Puerto Rico" "Reunion"
[107] "Romania" "Rwanda"
[109] "Sao Tome and Principe" "Saudi Arabia"
[111] "Senegal" "Serbia"
[113] "Sierra Leone" "Singapore"
[115] "Slovak Republic" "Slovenia"
[117] "Somalia" "South Africa"
[119] "Spain" "Sri Lanka"
[121] "Sudan" "Swaziland"
[123] "Sweden" "Switzerland"
[125] "Syria" "Taiwan"
[127] "Tanzania" "Thailand"
[129] "Togo" "Trinidad and Tobago"
[131] "Tunisia" "Turkey"
[133] "Uganda" "United Kingdom"
[135] "United States" "Uruguay"
[137] "Venezuela" "Vietnam"
[139] "West Bank and Gaza" "Yemen, Rep."
[141] "Zambia" "Zimbabwe"
plots <- map2( gap_split, countries,
~ ggplot(.x, aes(x = year, y = lifeExp))
+ geom_line()
+ labs(title = .y))
plots
$Afghanistan
$Albania
$Algeria
$Angola
$Argentina
$Australia
$Austria
$Bahrain
$Bangladesh
$Belgium
$Benin
$Bolivia
$`Bosnia and Herzegovina`
$Botswana
$Brazil
$Bulgaria
$`Burkina Faso`
$Burundi
$Cambodia
$Cameroon
$Canada
$`Central African Republic`
$Chad
$Chile
$China
$Colombia
$Comoros
$`Congo, Dem. Rep.`
$`Congo, Rep.`
$`Costa Rica`
$`Cote d'Ivoire`
$Croatia
$Cuba
$`Czech Republic`
$Denmark
$Djibouti
$`Dominican Republic`
$Ecuador
$Egypt
$`El Salvador`
$`Equatorial Guinea`
$Eritrea
$Ethiopia
$Finland
$France
$Gabon
$Gambia
$Germany
$Ghana
$Greece
$Guatemala
$Guinea
$`Guinea-Bissau`
$Haiti
$Honduras
$`Hong Kong, China`
$Hungary
$Iceland
$India
$Indonesia
$Iran
$Iraq
$Ireland
$Israel
$Italy
$Jamaica
$Japan
$Jordan
$Kenya
$`Korea, Dem. Rep.`
$`Korea, Rep.`
$Kuwait
$Lebanon
$Lesotho
$Liberia
$Libya
$Madagascar
$Malawi
$Malaysia
$Mali
$Mauritania
$Mauritius
$Mexico
$Mongolia
$Montenegro
$Morocco
$Mozambique
$Myanmar
$Namibia
$Nepal
$Netherlands
$`New Zealand`
$Nicaragua
$Niger
$Nigeria
$Norway
$Oman
$Pakistan
$Panama
$Paraguay
$Peru
$Philippines
$Poland
$Portugal
$`Puerto Rico`
$Reunion
$Romania
$Rwanda
$`Sao Tome and Principe`
$`Saudi Arabia`
$Senegal
$Serbia
$`Sierra Leone`
$Singapore
$`Slovak Republic`
$Slovenia
$Somalia
$`South Africa`
$Spain
$`Sri Lanka`
$Sudan
$Swaziland
$Sweden
$Switzerland
$Syria
$Taiwan
$Tanzania
$Thailand
$Togo
$`Trinidad and Tobago`
$Tunisia
$Turkey
$Uganda
$`United Kingdom`
$`United States`
$Uruguay
$Venezuela
$Vietnam
$`West Bank and Gaza`
$`Yemen, Rep.`
$Zambia
$Zimbabwe
dir_create("plots")
walk2(.x = plots, .y = countries,
~ ggsave(filename = glue("plots/{.y}.pdf"), plot = .x))
Saving 7.29 x 4.5 in image
file_delete(glue("plots/{countries}.pdf"))
walk2(.x = plots, .y = countries,
~ ggsave(filename = paste0("plots/",.y,".pdf"), plot = .x))
Saving 7.29 x 4.5 in image
file.remove(paste0("plots/",countries, ".pdf"))
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[15] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[29] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[43] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[57] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[71] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[85] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[99] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[113] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[127] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[141] TRUE TRUE
The babynames data has been updated.
babynames %>% group_by(sex, name) %>%
summarise( total_births = sum(n)) %>%
arrange(desc(total_births))
`summarise()` regrouping output by 'sex' (override with `.groups` argument)
babynames %>% filter(name == "Sue") %>%
group_by(sex, name) %>%
summarise( total = sum(n))
`summarise()` regrouping output by 'sex' (override with `.groups` argument)
babynames %>% filter(name == "Robin") %>%
group_by(sex, name) %>%
summarise( total = sum(n))
`summarise()` regrouping output by 'sex' (override with `.groups` argument)
babynames %>% filter(name %in% c("Sue", "Robin", "Leslie")) %>%
group_by(sex, name) %>%
summarise( total = sum(n)) %>%
pivot_wider(names_from = "sex", values_from = "total")
`summarise()` regrouping output by 'sex' (override with `.groups` argument)
BabyWide <- babynames %>%
group_by(sex, name) %>%
summarise( total = sum(n)) %>%
pivot_wider(names_from = sex, values_from = total)
`summarise()` regrouping output by 'sex' (override with `.groups` argument)
head(BabyWide, 10)
BabyWide %>% filter( M > 50000, F > 50000) %>%
mutate( ratio = pmin(M/F, F/M) ) %>%
arrange(desc(ratio))
The Lahman data has been updated.
names(Teams)
[1] "yearID" "lgID" "teamID" "franchID"
[5] "divID" "Rank" "G" "Ghome"
[9] "W" "L" "DivWin" "WCWin"
[13] "LgWin" "WSWin" "R" "AB"
[17] "H" "X2B" "X3B" "HR"
[21] "BB" "SO" "SB" "CS"
[25] "HBP" "SF" "RA" "ER"
[29] "ERA" "CG" "SHO" "SV"
[33] "IPouts" "HA" "HRA" "BBA"
[37] "SOA" "E" "DP" "FP"
[41] "name" "park" "attendance" "BPF"
[45] "PPF" "teamIDBR" "teamIDlahman45" "teamIDretro"
Teams %>% select(15:40) %>%
map_df(mean, na.rm = TRUE)
angles <- Teams %>% filter(franchID == "ANA") %>%
group_by(teamID, name) %>%
summarize( began = first(yearID), end = last(yearID) ) %>%
arrange(began)
`summarise()` regrouping output by 'teamID' (override with `.groups` argument)
angles
angles_names <- angles$name
angles_names
[1] "Los Angeles Angels" "California Angels"
[3] "Anaheim Angels" "Los Angeles Angels of Anaheim"
nchar(angles_names[1])
[1] 18
nchar(angles_names[2])
[1] 17
nchar(angles_names[3])
[1] 14
nchar(angles_names[4])
[1] 29
angles_names %>% map(nchar)
[[1]]
[1] 18
[[2]]
[1] 17
[[3]]
[1] 14
[[4]]
[1] 29
top5 <- function(x, teamname){
x %>%
filter(name == teamname) %>%
select(teamID, yearID, W, L, name) %>%
arrange(desc(W)) %>%
head(5)
}
top5(Teams, angles_names[1])
top5(Teams, angles_names[2])
top5(Teams, angles_names[3])
top5(Teams, angles_names[4])
Compare lapply and map
angles_list <- lapply(angles_names, FUN = top5, x = Teams)
angles_list
[[1]]
[[2]]
[[3]]
[[4]]
angles_list2 <- angles_names %>% map(~ top5(Teams, .x))
angles_list2
[[1]]
[[2]]
[[3]]
[[4]]
NA
angles_df2 <- angles_names %>% map_df(~ top5(Teams, .x))
angles_df2
angles_df2 %>% group_by(teamID, name) %>%
summarise(N = n(), mean_wins = mean(W)) %>%
arrange(desc(mean_wins))
`summarise()` regrouping output by 'teamID' (override with `.groups` argument)
exp_wpct <- function(x){
return( 1 / (1 + (1/x)^2))
}
TeamRuns <- Teams %>%
filter(yearID >= 1954) %>%
rename(RS = R) %>%
mutate(WPct = W / (W+L), run_ratio = RS/RA) %>%
select(yearID, teamID, lgID, WPct, run_ratio)
TeamRuns %>% ggplot(aes(x = run_ratio, y = WPct)) +
geom_vline(xintercept = 1, color ="darkgrey", linetype = 2) +
geom_hline(yintercept = 0.5, color = "darkgrey", linetype = 2 ) +
geom_point(alpha = 0.3) +
stat_function(fun = exp_wpct, size = 2, color = "blue") +
xlab("Ratio of Runs Scored to Runs Allowed") +
ylab("Winning Percentage")
library(mosaic)
exWpct <- fitModel( WPct ~ 1/(1 + (1/run_ratio)^k), data = TeamRuns)
coef(exWpct)
k
1.836508
exWpct <- nls( WPct ~ 1/(1 + (1/run_ratio)^k), data = TeamRuns)
No starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart model
coef(exWpct)
k
1.836508
fit_k <- function(x){
mod <- nls( formula = WPct ~ 1/(1 + (1/run_ratio)^k), data = x )
return(data.frame(k = coef(mod)))
}
fit_k(TeamRuns)
No starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart model
TeamRuns2 <- TeamRuns %>%
mutate(decade = yearID %/% 10 * 10)
TeamRuns2
n_TeamRuns2 <- TeamRuns2 %>% group_by(decade) %>%
nest()
n_TeamRuns2
m_TeamRuns2 <- n_TeamRuns2 %>%
mutate(decade, model = map(data, fit_k))
No starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart model
m_TeamRuns2
m_TeamRuns2 %>% mutate( model = unlist(model) ) %>%
select(-data)
hr_leader <- function(x){
x %>%
select(teamID, HR) %>%
arrange(desc(HR)) %>%
head(n=1)
}
Teams %>%
filter(yearID == 1961 & lgID == "AL") %>%
arrange(desc(HR)) %>%
head(1)
n_TeamRuns2 <- Teams %>% group_by(yearID, lgID) %>%
nest()
n_TeamRuns2
hr_leaders <- n_TeamRuns2 %>%
mutate(yearID, lgID, hr = map(data, hr_leader) ) %>%
arrange(yearID) %>%
select(-data) %>%
unnest(hr)
head(hr_leaders, 10)
NA
hr_leaders %>% group_by(lgID) %>%
summarize(homerun_mean = mean(HR))
`summarise()` ungrouping output (override with `.groups` argument)
mean(HR ~ lgID, data = hr_leaders)
AA AL FL NA NL PL UA
40.5000 157.1092 51.0000 13.8000 129.2917 66.0000 32.0000
hr_leaders %>% filter(yearID >= 1916) %>%
group_by(lgID) %>%
summarize(homerun_mean = mean(HR))
`summarise()` ungrouping output (override with `.groups` argument)
mean(HR ~ lgID, data = hr_leaders)
AA AL FL NA NL PL UA
40.5000 157.1092 51.0000 13.8000 129.2917 66.0000 32.0000
hr_leaders %>%
filter(yearID >= 1916) %>%
ggplot(aes(x = yearID, y = HR, color = lgID)) +
geom_line() +
geom_point() +
geom_smooth(se=0) +
geom_vline(xintercept = 1973) +
annotate("text", x=1974, y=25, label = "AL adopts DH", hjust="left")
n_TeamRuns3 <- TeamRuns %>%
group_by(yearID) %>%
nest()
n_TeamRuns3
k_actual <- n_TeamRuns3 %>% # m_TeamRuns3
mutate(model = map(data, fit_k))
No starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart modelNo starting values specified for some parameters.
Initializing ‘k’ to '1.'.
Consider specifying 'start' or using a selfStart model
k_actual <- k_actual %>% select(-data) %>%
unnest(model)
k_actual %>% ungroup() %>%
summarize(
min = min(k),
median = median(k),
max = max(k),
mean = mean(k),
sd = sd(k),
n = n()
)
NA
NA
NA
NA
k_actual %>% ggplot(aes(x = k)) +
geom_density() +
xlab("Best fit exponent for a single season.")
Use moderndive
library(moderndive)
virtual_resamples <- k_actual %>%
rep_sample_n(size = 66, replace = TRUE, reps = 1000)
virtual_resampled_means <- virtual_resamples %>%
group_by(replicate) %>%
summarize(mean_k = mean(k))
`summarise()` ungrouping output (override with `.groups` argument)
virtual_resampled_means %>% ggplot( aes(x = mean_k)) +
geom_histogram(color = "white") +
labs(x = "Resample mean k")
virtual_resampled_means %>% ggplot( aes(x = mean_k)) +
geom_density() +
labs(x = "Resample mean k")
library(infer)
bootstrap_distribution <- k_actual %>%
specify(response = k) %>%
generate(reps = 1000, type = "bootstrap") %>%
calculate(stat = "mean")
bootstrap_distribution %>%
visualise()
percentile_ci <- bootstrap_distribution %>%
get_confidence_interval(level = 0.95, type = "percentile")
percentile_ci
NA
To do: Add further Boostrapping code.
Use tidymodels to implement this.
From the purrr cheatsheet.
n_iris <- iris %>% group_by(Species) %>%
nest()
n_iris
mod_fun <- function(df){
lm(Sepal.Length ~ ., data = df)
}
m_iris <- n_iris %>%
mutate(model = map(data, mod_fun))
m_iris
b_fun <- function(mod){
coefficients(mod)[[1]]
}
m_iris %>% transmute( Species, beta = map_dbl(model, b_fun) )