# The original data is from: https://www.kaggle.com/c/dogs-vs-cats/data # Download the data file from kaggle. # You need to log into kaggle to be able to download the file. # If you do not have a kaggle account I have uploaded it to the class Blackboard site under Course Materials. # The name of the data file is: dogs-vs-cats.zip # Unzip the file dogs-vs-cats.zip. # Inside the directory dogs-vs-cats, unzip train.zip. # There should be pictures of cats and dogs in the train subdirectory. # Be sure to check that the pictures are in the subdirectories before proceeding. # Set the path to where the original data from kaggle was downloaded and unzipped. original_dataset_dir <- "~/Downloads/kaggle_original_data/train" # Update this line to where you have downloaded your data. # In Windows E:/kaggle_original_data/train # Note: We will only use pictures from the train directory # Set the paths to where the train, validation, and test data will be copied to. The pictures will be from the original data directory train. base_dir <- "~/Downloads/cats_and_dogs_small_TEST" # name of new directory where the pictures will be copied to train_dir <- file.path(base_dir, "train") # subdirectory for the tranining data validation_dir <- file.path(base_dir, "validation") # subdirectory for the validation data test_dir <- file.path(base_dir, "test") # subdirectory for the test data train_cats_dir <- file.path(train_dir, "cats") # cats subdirectory in the training subdirectory train_dogs_dir <- file.path(train_dir, "dogs") # dogs subdirectory in the training subdirectory validation_cats_dir <- file.path(validation_dir, "cats") # cats subdirectory in the validation subdirectory validation_dogs_dir <- file.path(validation_dir, "dogs") # dogs subdirectory in the validation subdirectory test_cats_dir <- file.path(test_dir, "cats") # cats subdirectory in the testing subdirectory test_dogs_dir <- file.path(test_dir, "dogs") # dogs subdirectory in the testing subdirectory # Create all of the new directories. dir.create(base_dir) dir.create(train_dir) dir.create(validation_dir) dir.create(test_dir) dir.create(train_cats_dir) dir.create(train_dogs_dir) dir.create(validation_cats_dir) dir.create(validation_dogs_dir) dir.create(test_cats_dir) dir.create(test_dogs_dir) # Copy all of the pictures to the new directories. # Note I have added the overwrite = TRUE option so you can do the copy over and over if you want. fnames <- paste0("cat.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_cats_dir), overwrite = TRUE) fnames <- paste0("cat.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_cats_dir), overwrite = TRUE) fnames <- paste0("cat.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_cats_dir), overwrite = TRUE) fnames <- paste0("dog.", 1:1000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(train_dogs_dir), overwrite = TRUE) fnames <- paste0("dog.", 1001:1500, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(validation_dogs_dir), overwrite = TRUE) fnames <- paste0("dog.", 1501:2000, ".jpg") file.copy(file.path(original_dataset_dir, fnames), file.path(test_dogs_dir), overwrite = TRUE) # Check cat("total training cat images:", length(list.files(train_cats_dir)), "\n") cat("total training dog images:", length(list.files(train_dogs_dir)), "\n") cat("total validation cat images:", length(list.files(validation_cats_dir)), "\n") cat("total validation dog images:", length(list.files(validation_dogs_dir)), "\n") cat("total test cat images:", length(list.files(test_cats_dir)), "\n") cat("total test dog images:", length(list.files(test_dogs_dir)), "\n")