Stat. 654 Quiz brulee using torch

Author

Your Name Here

Feed-Forward Neural Network for the Google Tensorflow Playground XOR Data

Clone the TFPlayground Github repository into your R Project folder.

To clone the repository you can use RStudio

File > New Project > Version Control > Git

and paste the URL of the repository into the Git Repository URL box. Then select a folder to clone the repository into.

Click the Green button and copy the ulr: https://github.com/hyounesy/TFPlaygroundPSA.git

Then paste the URL into the Git Repository URL box. Select a folder to clone the repository into. Click the Create Project button.

Use the data in ../data/tiny/xor_25/input.txt to create a feed-forward neural network to classify the data. Use the keras package to create the model.

Load the required libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidymodels)
── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
✔ broom        1.0.7          ✔ rsample      1.2.1.9000
✔ dials        1.4.0.9000     ✔ tune         1.3.0.9000
✔ infer        1.0.7          ✔ workflows    1.2.0.9000
✔ modeldata    1.4.0          ✔ workflowsets 1.1.0     
✔ parsnip      1.3.0.9000     ✔ yardstick    1.3.2     
✔ recipes      1.1.1.9000     
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
library(readr)
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(brulee)
library(keras)

Attaching package: 'keras'

The following object is masked from 'package:yardstick':

    get_weights

Load the data

input <- read_delim("data/tiny/xor_25/input.txt", 
     delim = "\t", escape_double = FALSE, 
     trim_ws = TRUE)
Rows: 200 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
dbl (9): pid, X1, X2, X1Squared, X2Squared, X1X2, sinX1, sinX2, label

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
input <- input |> 
  clean_names() |> 
  select(-pid) |> 
  mutate(label = as.factor(label)) |>
  tibble()

head(input)
# A tibble: 6 × 8
     x1     x2 x1squared x2squared  x1x2 sin_x1 sin_x2 label
  <dbl>  <dbl>     <dbl>     <dbl> <dbl>  <dbl>  <dbl> <fct>
1 -2.82  2.52       7.96     6.33  -7.10 -0.314  0.586 -1   
2  3.49  1.19      12.2      1.43   4.17 -0.342  0.930 1    
3 -3.78 -2.68      14.3      7.20  10.1   0.592 -0.442 1    
4  2.22  4.14       4.92    17.1    9.18  0.798 -0.841 1    
5  3.64  0.970     13.2      0.941  3.53 -0.474  0.825 1    
6  2.22  0.788      4.95     0.621  1.75  0.794  0.709 1    

Split the data into training and testing sets

n <- nrow(input)

input_parts <- input |>
  initial_split(prop = 0.8)

train <- input_parts |>
  training()

test <- input_parts |>
  testing()

list(train, test) |>
  map_int(nrow)
[1] 160  40

Visualize the data

train |> 
  ggplot(aes(x = x1, y = x2, color = factor(label))) +
  geom_point()

test |> 
  ggplot(aes(x = x1, y = x2, color = factor(label))) +
  geom_point()

brulee uses torch

mod_nn <- mlp(mode = "classification", hidden_units = 8) |>
  set_engine("brulee") |>
  fit(label ~ x1 + x2, data = train)
mod_nn
parsnip model object

Multilayer perceptron

relu activation,
8 hidden units,
42 model parameters
160 samples, 2 features, 2 classes 
class weights -1=1, 1=1 
weight decay: 0.001 
dropout proportion: 0 
batch size: 144 
learn rate: 0.01 
validation loss after 5 epochs: 0.484 
#  train
pred <- train |> bind_cols(
    predict(mod_nn, new_data = train, type = "class")
  ) |>
  rename(mod_nn = .pred_class)

accuracy(pred, label, mod_nn)
# A tibble: 1 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.738
# test
pred <- test |> bind_cols(
    predict(mod_nn, new_data = test, type = "class")
  ) |>
  rename(mod_nn = .pred_class)

accuracy(pred, label, mod_nn)
# A tibble: 1 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.775

brulee two layer

mod_nn <- mlp(mode = "classification", hidden_units = 8) |>
  set_engine("brulee_two_layer") |>
  fit(label ~ x1 + x2, data = train)
mod_nn
parsnip model object

Multilayer perceptron

c(relu,relu) activation,
c(8,3) hidden units,
59 model parameters
160 samples, 2 features, 2 classes 
class weights -1=1, 1=1 
weight decay: 0.001 
dropout proportion: 0 
batch size: 144 
learn rate: 0.01 
validation loss after 32 epochs: 0.0774 
#  train
pred <- train |> bind_cols(
    predict(mod_nn, new_data = train, type = "class")
  ) |>
  rename(mod_nn = .pred_class)

accuracy(pred, label, mod_nn)
# A tibble: 1 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary         0.975
# test
pred <- test |> bind_cols(
    predict(mod_nn, new_data = test, type = "class")
  ) |>
  rename(mod_nn = .pred_class)

accuracy(pred, label, mod_nn)
# A tibble: 1 × 3
  .metric  .estimator .estimate
  <chr>    <chr>          <dbl>
1 accuracy binary             1