add data augmentation script

2021-04-16 23:59:20 +01:00 · 2021-04-16 23:59:20 +01:00 · 5bab446799
commit 5bab446799
parent 97d4a7f044
5 changed files with 221 additions and 26 deletions
--- a/cars/README.md
+++ b/cars/README.md
@ -0,0 +1,14 @@
 # Stanford Cars
 The majority of the work was completed on the Stanford cars dataset. This folder contains the different investigations made using the data.
 * architecture-investigations
    - How does altering the structure of AlexNet affect performance?
 * lr-investigations
    - How does affecting the learning rate, both the value itself and the schedule, affect performance?
 * split-investigations
    - How does the proportions of training/validation/test data affect performance?
 [Homepage](https://ai.stanford.edu/~jkrause/cars/car_dataset.html)
 *The Cars dataset contains 16,185 images of 196 classes of cars. The data is split into 8,144 training images and 8,041 testing images, where each class has been split roughly in a 50-50 split. Classes are typically at the level of Make, Model, Year, e.g. 2012 Tesla Model S or 2012 BMW M3 coupe.*
--- a/cars/confusions.ipynb
+++ b/cars/confusions.ipynb
--- a/cifar100/README.md
+++ b/cifar100/README.md
@ -0,0 +1,9 @@
 # CIFAR-100
 At the beginning of the investigations, the CIFAR-100 was considered for use in the coursework. This was deemed non-viable due to the size compared to the Stanford cars. Models took far too long to train and the work was not iterative and agile enough. 
 [Homepage](https://www.cs.toronto.edu/~kriz/cifar.html)
 *The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.*
 *The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.*
--- a/data_aug.py
+++ b/data_aug.py
@ -0,0 +1,138 @@
 #!/usr/bin/env python3
 """Generate extra training data using rotations and flips
 Read a DIGITS train.txt and use OpenCV to generate extra data for training.
 FLIP an image and/or apply various rotations from ROTATE_DEGREES
 Optionally rotate both clockwise and counter-clockwise by given degrees and
 apply rotations to flipped images 
 """
 import os
 import pathlib
 import cv2 as cv
 import numpy as np
 TRAIN_FILE = input('enter train file path: ') # path to train.txt
 OUTPUT_PATH = input('enter o/p path: ') # output folder for altered images
 # TRAIN_FILE = 'cars/default-split/train.txt' # path to train.txt
 # OUTPUT_PATH = '/scratch/Teaching/ap00824/cars/train' # output folder for altered images
 DRY_RUN = False # dont output files, just a new train.txt
 FLIP = True # just flip image left to right
 ROTATE = False # enable rotating image by below options
 ROTATE_BOTH = False # do clockwise and counter-clockwise
 ROTATE_DEGREES = [15] # different rotations to apply
 FLIP_ROTATED = False # do rotations on both flipped images
 INCLUDE_ORIG = True # include original train.txt entry in ouput
 # if true the output extra_training.txt can be used as a whole train.txt
 # otherwise must be merged with original
 ###################
 #    EXP FACTOR
 ###################
 exp_factor = int(ROTATE) * len(ROTATE_DEGREES)
 exp_factor *= int(ROTATE_BOTH) + 1 # either 1 or 2 scale factor
 exp_factor *= int(FLIP_ROTATED) + 1 # either 1 or 2 scale factor
 exp_factor += int(FLIP) + 1 # flip is one extra image, + 1 for original file
 print("Expansion Factor of {}".format(exp_factor))
 train_file = pathlib.Path(TRAIN_FILE)
 output_path = pathlib.Path(OUTPUT_PATH).resolve()
 # read input train.txt
 with open(TRAIN_FILE, 'r') as tf:
    train_txt_lines = tf.readlines()
 # parse to dict objects
 train_split = list()
 for line in train_txt_lines:
    space_split = line.split(' ')
    train_split.append({
        # "raw_path": space_split[0],
        "image": pathlib.Path(space_split[0]),
        "class": space_split[1].replace('\n', '')
    })
 print('New Training Set: {} images'.format(len(train_split) * exp_factor))
 print('Generating {} images...'.format(len(train_split) * (exp_factor - 1)))
 ##################
 #     PROCESS
 ##################
 # rotate_bound from imutils
 # https://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
 def rotate_bound(image, angle):
    # grab the dimensions of the image and then determine the
    # center
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)
    # grab the rotation matrix (applying the negative of the
    # angle to rotate clockwise), then grab the sine and cosine
    # (i.e., the rotation components of the matrix)
    M = cv.getRotationMatrix2D((cX, cY), -angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])
    # compute the new bounding dimensions of the image
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))
    # adjust the rotation matrix to take into account translation
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY
    # perform the actual rotation and return the image
    return cv.warpAffine(image, M, (nW, nH))
 # get a modified image name
 def insert_path_part(obj, part):
    return obj["image"].stem + '-' + part + obj["image"].suffix
 def get_train_entry(obj, path):
    return "{} {}\n".format(str(path), obj['class'])
 new_lines = list()
 for train in train_split:
    if not DRY_RUN:
        img = cv.imread(str(train["image"]))
    if INCLUDE_ORIG:
        new_lines.append(get_train_entry(train, train["image"]))
    if FLIP:
        op_path = output_path / insert_path_part(train, 'flip')
        if not DRY_RUN:
            cv.imwrite(str(op_path), cv.flip(img, 1))
        new_lines.append(get_train_entry(train, op_path))
    if ROTATE:
        for deg in ROTATE_DEGREES:
            op_path = output_path / insert_path_part(train, 'rot-{}'.format(deg))
            if not DRY_RUN:
                cv.imwrite(str(op_path), rotate_bound(img, deg))
            new_lines.append(get_train_entry(train, op_path))
            if FLIP_ROTATED:
                op_path = output_path / insert_path_part(train, 'flip-rot-{}'.format(deg))
                if not DRY_RUN:
                    cv.imwrite(str(op_path), cv.flip(rotate_bound(img, deg), 1))
                new_lines.append(get_train_entry(train, op_path))
            if ROTATE_BOTH:
                op_path = output_path / insert_path_part(train, 'rot-min-{}'.format(deg))
                if not DRY_RUN:
                    cv.imwrite(str(op_path), rotate_bound(img, -deg))
                new_lines.append(get_train_entry(train, op_path))
                if FLIP_ROTATED:
                    op_path = output_path / insert_path_part(train, 'flip-rot-min-{}'.format(deg))
                    if not DRY_RUN:
                        cv.imwrite(str(op_path), cv.flip(rotate_bound(img, -deg), 1))
                    new_lines.append(get_train_entry(train, op_path))
 with open('extra_training.txt', 'w') as op_file:
    op_file.writelines(new_lines)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -11,6 +11,7 @@ matplotlib = "^3.4.1"
 jupyterlab = "^3.0.12"
 pandas = "^1.2.3"
 seaborn = "^0.11.1"
 opencv-python = "^4.5.1"
 [tool.poetry.dev-dependencies]