# Start of script ----

# Draw 5 cards from a deck of 52 cards and estimate the probability that 
# you observe all 2 5's and 2 8's in a hand of 5 cards.
# Using code from the Tidyverse in R. ----

library(tidyverse)

# Draw 5 cards from a deck of 52 cards ----

## Replace 10 with T, so the string is length 3.

cards <- tibble(card = c("RH2", "RH3", "RH4", "RH5", "RH6", "RH7", "RH8", "RH9", "RHT", "RHJ", "RHQ", "RHK", "RHA",
           "RD2", "RD3", "RD4", "RD5", "RD6", "RD7", "RD8", "RD9", "RDT", "RDJ", "RDQ", "RDK", "RDA",
           "BS2", "BS3", "BS4", "BS5", "BS6", "BS7", "BS8", "BS9", "BST", "BSJ", "BSQ", "BSK", "BSA",
           "BC2", "BC3", "BC4", "BC5", "BC6", "BC7", "BC8", "BC9", "BCT", "BCJ", "BCQ", "BCK", "BCA"))

cards_draw <- cards |> sample_n(5)
cards_draw

# Estimate the probability that you observe 2 5's and 2 8's in a hand of 5 cards -----

## To see 2 5's and 2 8's there will be 4 cards with 5's and 8's in the hand.

# Using a regex function from the Tidyverse R package stringr

cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE)

cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
  select(kind) |> 
  group_by(kind) |> 
  summarize(n = n())

cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
  select(kind) |> 
  group_by(kind) |> 
  summarize(n = n()) |>
  filter(kind == "5" | kind == "8") |>
  filter(n == 2) |>
  n_distinct() == 2


event <- replicate(10000, {
  cards_draw <- cards |> sample_n(5)
  cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
    select(kind) |> 
    group_by(kind) |> 
    summarize(n = n()) |>
    filter(kind == "5" | kind == "8") |>
    filter(n == 2) |>
    n_distinct() == 2
})

format(mean(event), scientific = FALSE)

# Answer ----

choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5)

# Using a process bar

library(pbapply)  # R package for progress bars on replicate

event <- pbreplicate(10000, {
  cards_draw <- cards |> sample_n(5)
  cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
    select(kind) |> 
    group_by(kind) |> 
    summarize(n = n()) |>
    filter(kind == "5" | kind == "8") |>
    filter(n == 2) |>
    n_distinct() == 2
})

format(mean(event), scientific = FALSE)

# Answer ----

choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5)

# Use parallel processing in R. ----

library(future.apply)  # R package with a parallel replicate function
library(tictoc)        # R package for timing code execution of code

# Compare single core speed with the above code.

tic()
plan(sequential)
event <- future_replicate(10000, {
  cards_draw <- cards |> sample_n(5)
  cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
    select(kind) |> 
    group_by(kind) |> 
    summarize(n = n()) |>
    filter(kind == "5" | kind == "8") |>
    filter(n == 2) |>
    n_distinct() == 2
})
toc()

format(mean(event), scientific = FALSE)

# Answer ----

choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5)

# Compare multi-core speed with the above code.

tic()
plan(multisession)
event <- future_replicate(50000, {
  cards_draw <- cards |> sample_n(5)
  cards_draw |> separate(card, sep = "", into = c("sep", "color", "suit", "kind"), remove = FALSE) |> 
    select(kind) |> 
    group_by(kind) |> 
    summarize(n = n()) |>
    filter(kind == "5" | kind == "8") |>
    filter(n == 2) |>
    n_distinct() == 2
})
toc()

format(mean(event), scientific = FALSE)

# Answer ----

choose(4,2)*choose(4,2)*choose(44,1)/choose(52,5)

# End of script ----