--- title: "movie_classification" author: "Prof. Eric A. Suess" format: html: embed-resources: true --- ## Classifying newswires: A multiclass classification example Load the data. ```{r} library(keras) reuters <- dataset_reuters(num_words = 10000) c(c(train_data, train_labels), c(test_data, test_labels)) %<-% reuters ``` ```{r} str(train_data) ``` ```{r} word_index <- dataset_reuters_word_index() reverse_word_index <- names(word_index) names(reverse_word_index) <- as.character(word_index) decoded_words <- train_data[[1]] %>% sapply(function(i) { if (i > 3) reverse_word_index[[as.character(i - 3)]] else "?" }) decoded_review <- paste0(decoded_words, collapse = " ") decoded_review ``` Encoding the input data ```{r} vectorize_sequences <- function(sequences, dimension = 10000) { results <- matrix(0, nrow = length(sequences), ncol = dimension) for (i in seq_along(sequences)) results[i, sequences[[i]]] <- 1 results } ``` ```{r} x_train <- vectorize_sequences(train_data) x_test <- vectorize_sequences(test_data) ``` ```{r} to_one_hot <- function(labels, dimension = 46) { results <- matrix(0, nrow = length(labels), ncol = dimension) labels <- labels + 1 for(i in seq_along(labels)) { j <- labels[[i]] results[i, j] <- 1 } results } y_train <- to_one_hot(train_labels) y_test <- to_one_hot(test_labels) ``` ```{r} y_train <- to_categorical(train_labels) y_test <- to_categorical(test_labels) ``` Build the network. ```{r} model <- keras_model_sequential() %>% layer_dense(64, activation = "relu") %>% layer_dense(64, activation = "relu") %>% layer_dense(46, activation = "softmax") ``` Compile the model. ```{r} model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = "accuracy") ``` ```{r} val_indices <- 1:1000 x_val <- x_train[val_indices, ] partial_x_train <- x_train[-val_indices, ] y_val <- y_train[val_indices, ] partial_y_train <- y_train[-val_indices, ] ``` ```{r} history <- model %>% fit(partial_x_train, partial_y_train, epochs = 20, batch_size = 512, validation_data = list(x_val, y_val)) ``` ```{r} plot(history) ``` # Retrain the data. ```{r} model <- keras_model_sequential() %>% layer_dense(64, activation = "relu") %>% layer_dense(64, activation = "relu") %>% layer_dense(46, activation = "softmax") model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = "accuracy") model %>% fit(x_train, y_train, epochs = 9, batch_size = 512) results <- model %>% evaluate(x_test, y_test) results ``` ```{r} mean(test_labels == sample(test_labels)) ``` Predictions. Inferences. ```{r} predictions <- model %>% predict(x_test) predictions ``` ```{r} str(predictions) sum(predictions[1, ]) which.max(predictions[1, ]) ``` A different way. ```{r} y_train <- train_labels y_test <- test_labels ``` ```{r} model %>% compile( optimizer = "rmsprop", loss = "sparse_categorical_crossentropy", metrics = "accuracy") ``` ```{r} model <- keras_model_sequential() %>% layer_dense(64, activation = "relu") %>% layer_dense(4, activation = "relu") %>% layer_dense(46, activation = "softmax") model %>% compile(optimizer = "rmsprop", loss = "categorical_crossentropy", metrics = "accuracy") model %>% fit(partial_x_train, partial_y_train, epochs = 20, batch_size = 128, validation_data = list(x_val, y_val)) ```