##### Chapter 7: Neural Networks and Support Vector Machines ------------------- ##### Part 1: Neural Networks ------------------- ## Example: Modeling the Strength of Concrete ---- ## Step 2: Exploring and preparing the data ---- # read in data and examine structure concrete <- read.csv("concrete.csv") str(concrete) # custom normalization function normalize <- function(x) { return((x - min(x)) / (max(x) - min(x))) } # apply normalization to entire data frame concrete_norm <- as.data.frame(lapply(concrete, normalize)) # confirm that the range is now between zero and one summary(concrete_norm$strength) # compared to the original minimum and maximum summary(concrete$strength) # create training and test data concrete_train <- concrete_norm[1:773, ] concrete_test <- concrete_norm[774:1030, ] ## Step 3: Training a model on the data ---- # train the neuralnet model library(neuralnet) # simple ANN with only a single hidden neuron set.seed(12345) # to guarantee repeatable results concrete_model <- neuralnet(formula = strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data = concrete_train) # visualize the network topology plot(concrete_model) # Reference: http://www.r-bloggers.com/neuralnettools-1-0-0-now-on-cran/ # alternative plot library(NeuralNetTools) # plotnet par(mar = numeric(4), family = 'serif') plotnet(concrete_model, alpha = 0.6) ## Step 4: Evaluating model performance ---- # obtain model results model_results <- compute(concrete_model, concrete_test[1:8]) # obtain predicted strength values predicted_strength <- model_results$net.result # examine the correlation between predicted and actual values cor(predicted_strength, concrete_test$strength) # higher than stated in book 0.7170368646 # produce actual predictions by head(predicted_strength) concrete_train_original_strength <- concrete[1:773,"strength"] strength_min <- min(concrete_train_original_strength) strength_max <- max(concrete_train_original_strength) head(concrete_train_original_strength) # custom normalization function unnormalize <- function(x, min, max) { return( (max - min)*x + min ) } strength_pred <- unnormalize(predicted_strength, strength_min, strength_max) strength_pred ## Step 5: Improving model performance ---- # a more complex neural network topology with 5 hidden neurons set.seed(12345) # to guarantee repeatable results concrete_model2 <- neuralnet(strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data = concrete_train, hidden = 5, act.fct = "logistic") # plot the network plot(concrete_model2) # plotnet par(mar = numeric(4), family = 'serif') plotnet(concrete_model2, alpha = 0.6) # evaluate the results as we did before model_results2 <- compute(concrete_model2, concrete_test[1:8]) predicted_strength2 <- model_results2$net.result cor(predicted_strength2, concrete_test$strength) # higher than stated in book 0.801444583 # try different activation function # a more complex neural network topology with 5 hidden neurons set.seed(12345) # to guarantee repeatable results concrete_model2 <- neuralnet(strength ~ cement + slag + ash + water + superplastic + coarseagg + fineagg + age, data = concrete_train, hidden = 5, act.fct = "tanh") # evaluate the results as we did before model_results2 <- compute(concrete_model2, concrete_test[1:8]) predicted_strength2 <- model_results2$net.result cor(predicted_strength2, concrete_test$strength) ##### Part 2: Support Vector Machines ------------------- ## Example: Optical Character Recognition ---- ## Step 2: Exploring and preparing the data ---- # read in data and examine structure letters <- read.csv("letterdata.csv") str(letters) # divide into training and test data letters_train <- letters[1:16000, ] letters_test <- letters[16001:20000, ] ## Step 3: Training a model on the data ---- # begin by training a simple linear SVM library(kernlab) letter_classifier <- ksvm(letter ~ ., data = letters_train, kernel = "vanilladot") # look at basic information about the model letter_classifier ## Step 4: Evaluating model performance ---- # predictions on testing dataset letter_predictions <- predict(letter_classifier, letters_test) head(letter_predictions) table(letters_test$letter, letter_predictions) # look only at agreement vs. non-agreement # construct a vector of TRUE/FALSE indicating correct/incorrect predictions agreement <- letter_predictions == letters_test$letter table(agreement) prop.table(table(agreement)) ## Step 5: Improving model performance ---- set.seed(12345) letter_classifier_rbf <- ksvm(letter ~ ., data = letters_train, kernel = "rbfdot") letter_predictions_rbf <- predict(letter_classifier_rbf, letters_test) table(letters_test$letter, letter_predictions_rbf) agreement_rbf <- letter_predictions_rbf == letters_test$letter table(agreement_rbf) prop.table(table(agreement_rbf))