add data augmentation script

This commit is contained in:
andy 2021-04-16 23:59:20 +01:00
parent 97d4a7f044
commit 5bab446799
5 changed files with 221 additions and 26 deletions

14
cars/README.md Normal file
View File

@ -0,0 +1,14 @@
# Stanford Cars
The majority of the work was completed on the Stanford cars dataset. This folder contains the different investigations made using the data.
* architecture-investigations
- How does altering the structure of AlexNet affect performance?
* lr-investigations
- How does affecting the learning rate, both the value itself and the schedule, affect performance?
* split-investigations
- How does the proportions of training/validation/test data affect performance?
[Homepage](https://ai.stanford.edu/~jkrause/cars/car_dataset.html)
*The Cars dataset contains 16,185 images of 196 classes of cars. The data is split into 8,144 training images and 8,041 testing images, where each class has been split roughly in a 50-50 split. Classes are typically at the level of Make, Model, Year, e.g. 2012 Tesla Model S or 2012 BMW M3 coupe.*

File diff suppressed because one or more lines are too long

9
cifar100/README.md Normal file
View File

@ -0,0 +1,9 @@
# CIFAR-100
At the beginning of the investigations, the CIFAR-100 was considered for use in the coursework. This was deemed non-viable due to the size compared to the Stanford cars. Models took far too long to train and the work was not iterative and agile enough.
[Homepage](https://www.cs.toronto.edu/~kriz/cifar.html)
*The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.*
*The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.*

138
data_aug.py Normal file
View File

@ -0,0 +1,138 @@
#!/usr/bin/env python3
"""Generate extra training data using rotations and flips
Read a DIGITS train.txt and use OpenCV to generate extra data for training.
FLIP an image and/or apply various rotations from ROTATE_DEGREES
Optionally rotate both clockwise and counter-clockwise by given degrees and
apply rotations to flipped images
"""
import os
import pathlib
import cv2 as cv
import numpy as np
TRAIN_FILE = input('enter train file path: ') # path to train.txt
OUTPUT_PATH = input('enter o/p path: ') # output folder for altered images
# TRAIN_FILE = 'cars/default-split/train.txt' # path to train.txt
# OUTPUT_PATH = '/scratch/Teaching/ap00824/cars/train' # output folder for altered images
DRY_RUN = False # dont output files, just a new train.txt
FLIP = True # just flip image left to right
ROTATE = False # enable rotating image by below options
ROTATE_BOTH = False # do clockwise and counter-clockwise
ROTATE_DEGREES = [15] # different rotations to apply
FLIP_ROTATED = False # do rotations on both flipped images
INCLUDE_ORIG = True # include original train.txt entry in ouput
# if true the output extra_training.txt can be used as a whole train.txt
# otherwise must be merged with original
###################
# EXP FACTOR
###################
exp_factor = int(ROTATE) * len(ROTATE_DEGREES)
exp_factor *= int(ROTATE_BOTH) + 1 # either 1 or 2 scale factor
exp_factor *= int(FLIP_ROTATED) + 1 # either 1 or 2 scale factor
exp_factor += int(FLIP) + 1 # flip is one extra image, + 1 for original file
print("Expansion Factor of {}".format(exp_factor))
train_file = pathlib.Path(TRAIN_FILE)
output_path = pathlib.Path(OUTPUT_PATH).resolve()
# read input train.txt
with open(TRAIN_FILE, 'r') as tf:
train_txt_lines = tf.readlines()
# parse to dict objects
train_split = list()
for line in train_txt_lines:
space_split = line.split(' ')
train_split.append({
# "raw_path": space_split[0],
"image": pathlib.Path(space_split[0]),
"class": space_split[1].replace('\n', '')
})
print('New Training Set: {} images'.format(len(train_split) * exp_factor))
print('Generating {} images...'.format(len(train_split) * (exp_factor - 1)))
##################
# PROCESS
##################
# rotate_bound from imutils
# https://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/
def rotate_bound(image, angle):
# grab the dimensions of the image and then determine the
# center
(h, w) = image.shape[:2]
(cX, cY) = (w // 2, h // 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# perform the actual rotation and return the image
return cv.warpAffine(image, M, (nW, nH))
# get a modified image name
def insert_path_part(obj, part):
return obj["image"].stem + '-' + part + obj["image"].suffix
def get_train_entry(obj, path):
return "{} {}\n".format(str(path), obj['class'])
new_lines = list()
for train in train_split:
if not DRY_RUN:
img = cv.imread(str(train["image"]))
if INCLUDE_ORIG:
new_lines.append(get_train_entry(train, train["image"]))
if FLIP:
op_path = output_path / insert_path_part(train, 'flip')
if not DRY_RUN:
cv.imwrite(str(op_path), cv.flip(img, 1))
new_lines.append(get_train_entry(train, op_path))
if ROTATE:
for deg in ROTATE_DEGREES:
op_path = output_path / insert_path_part(train, 'rot-{}'.format(deg))
if not DRY_RUN:
cv.imwrite(str(op_path), rotate_bound(img, deg))
new_lines.append(get_train_entry(train, op_path))
if FLIP_ROTATED:
op_path = output_path / insert_path_part(train, 'flip-rot-{}'.format(deg))
if not DRY_RUN:
cv.imwrite(str(op_path), cv.flip(rotate_bound(img, deg), 1))
new_lines.append(get_train_entry(train, op_path))
if ROTATE_BOTH:
op_path = output_path / insert_path_part(train, 'rot-min-{}'.format(deg))
if not DRY_RUN:
cv.imwrite(str(op_path), rotate_bound(img, -deg))
new_lines.append(get_train_entry(train, op_path))
if FLIP_ROTATED:
op_path = output_path / insert_path_part(train, 'flip-rot-min-{}'.format(deg))
if not DRY_RUN:
cv.imwrite(str(op_path), cv.flip(rotate_bound(img, -deg), 1))
new_lines.append(get_train_entry(train, op_path))
with open('extra_training.txt', 'w') as op_file:
op_file.writelines(new_lines)

View File

@ -11,6 +11,7 @@ matplotlib = "^3.4.1"
jupyterlab = "^3.0.12"
pandas = "^1.2.3"
seaborn = "^0.11.1"
opencv-python = "^4.5.1"
[tool.poetry.dev-dependencies]