# Start of script ---- # Draw 5 cards from a deck of 52 cards and estimate the probability that # you observe all 2 5's and 2 8's in a hand of 5 cards. # Using code from the Tidyverse in R. ---- library(tidyverse) # Draw 5 cards from a deck of 52 cards ---- ## Replace 10 with T, so the string is length 3. cards <- tibble(card = c("RH2", "RH3", "RH4", "RH5", "RH6", "RH7", "RH8", "RH9", "RHT", "RHJ", "RHQ", "RHK", "RHA", "RD2", "RD3", "RD4", "RD5", "RD6", "RD7", "RD8", "RD9", "RDT", "RDJ", "RDQ", "RDK", "RDA", "BS2", "BS3", "BS4", "BS5", "BS6", "BS7", "BS8", "BS9", "BST", "BSJ", "BSQ", "BSK", "BSA", "BC2", "BC3", "BC4", "BC5", "BC6", "BC7", "BC8", "BC9", "BCT", "BCJ", "BCQ", "BCK", "BCA")) cards_draw <- cards |> sample_n(5) cards_draw # Estimate the probability that you observe 2 5's and 2 8's in a hand of 5 cards ----- ## To see 2 5's and 2 8's there will be 4 cards with 5's and 8's in the hand. # Using a regex function from the Tidyverse R package stringr cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) |> filter(kind == "5" | kind == "8") |> filter(n == 2) |> n_distinct() == 2 event <- replicate(10000, { cards_draw <- cards |> sample_n(5) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) |> filter(kind == "5" | kind == "8") |> filter(n == 2) |> n_distinct() == 2 }) format(mean(event), scientific = FALSE) # Answer ---- choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5) # Using a process bar library(pbapply) # R package for progress bars on replicate event <- pbreplicate(10000, { cards_draw <- cards |> sample_n(5) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) |> filter(kind == "5" | kind == "8") |> filter(n == 2) |> n_distinct() == 2 }) format(mean(event), scientific = FALSE) # Answer ---- choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5) # Use parallel processing in R. ---- library(future.apply) # R package with a parallel replicate function library(tictoc) # R package for timing code execution of code # Compare single core speed with the above code. tic() plan(sequential) event <- future_replicate(10000, { cards_draw <- cards |> sample_n(5) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) |> filter(kind == "5" | kind == "8") |> filter(n == 2) |> n_distinct() == 2 }) toc() format(mean(event), scientific = FALSE) # Answer ---- choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5) # Compare multi-core speed with the above code. tic() plan(multisession) event <- future_replicate(50000, { cards_draw <- cards |> sample_n(5) cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> select(kind) |> group_by(kind) |> summarize(n = n()) |> filter(kind == "5" | kind == "8") |> filter(n == 2) |> n_distinct() == 2 }) toc() format(mean(event), scientific = FALSE) # Answer ---- choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5) # End of script ----