From 5bab446799e08f0e1499c62d03a12221a89be924 Mon Sep 17 00:00:00 2001 From: andy Date: Fri, 16 Apr 2021 23:59:20 +0100 Subject: [PATCH] add data augmentation script --- cars/README.md | 14 +++++ cars/confusions.ipynb | 85 ++++++++++++++++++-------- cifar100/README.md | 9 +++ data_aug.py | 138 ++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 5 files changed, 221 insertions(+), 26 deletions(-) create mode 100644 cars/README.md create mode 100644 cifar100/README.md create mode 100644 data_aug.py diff --git a/cars/README.md b/cars/README.md new file mode 100644 index 0000000..2974564 --- /dev/null +++ b/cars/README.md @@ -0,0 +1,14 @@ +# Stanford Cars + +The majority of the work was completed on the Stanford cars dataset. This folder contains the different investigations made using the data. + +* architecture-investigations + - How does altering the structure of AlexNet affect performance? +* lr-investigations + - How does affecting the learning rate, both the value itself and the schedule, affect performance? +* split-investigations + - How does the proportions of training/validation/test data affect performance? + +[Homepage](https://ai.stanford.edu/~jkrause/cars/car_dataset.html) + +*The Cars dataset contains 16,185 images of 196 classes of cars. The data is split into 8,144 training images and 8,041 testing images, where each class has been split roughly in a 50-50 split. Classes are typically at the level of Make, Model, Year, e.g. 2012 Tesla Model S or 2012 BMW M3 coupe.* \ No newline at end of file diff --git a/cars/confusions.ipynb b/cars/confusions.ipynb index 55b5bcf..1448b23 100644 --- a/cars/confusions.ipynb +++ b/cars/confusions.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "f027fe48", + "id": "34b0345b", "metadata": {}, "outputs": [], "source": [ @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "d3f5ae86", + "id": "75b9f701", "metadata": {}, "source": [ "# Render Confusion Matrices\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "753e7bc3", + "id": "10521209", "metadata": {}, "outputs": [], "source": [ @@ -37,8 +37,49 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "6250e9b7", + "execution_count": 19, + "id": "02c2844f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "50.0% 0.5\n", + "25.0% 0.25\n", + "30.0% 0.3\n", + "100.0% 1.0\n", + "71.43% 0.7142857142857143\n", + "44.44% 0.4444444444444444\n", + "62.5% 0.625\n", + "0.0% 0.0\n", + "50.0% 0.5\n", + "20.0% 0.2\n", + "\n", + "MAP = 49.3%\n" + ] + } + ], + "source": [ + "normalised_array = np.ndarray((196, 196))\n", + "for idx, row in enumerate(frame.to_numpy()):\n", + " normalised_array[idx, :] = row / np.sum(row)\n", + "\n", + "# check correct accuracy\n", + "for acc, (idx, row) in zip(accuracy_col, enumerate(normalised_array[:10])):\n", + " print(acc, row[idx])\n", + "\n", + "class_acc_counter = 0\n", + "for idx, row in enumerate(normalised_array):\n", + " class_acc_counter += row[idx]\n", + " \n", + "print(f\"\\nMAP = {class_acc_counter*100/196:.4}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "91132579", "metadata": {}, "outputs": [ { @@ -580,7 +621,7 @@ "[196 rows x 196 columns]" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -591,24 +632,15 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "af1f4956", + "execution_count": 14, + "id": "72eacc1e", "metadata": {}, "outputs": [ { "data": { + "image/png": "\n", "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" + "
" ] }, "metadata": { @@ -618,25 +650,26 @@ } ], "source": [ - "plt.figure(figsize=(12, 10)\n", + "fig, ax = plt.subplots(figsize=(12, 10)\n", "# , dpi=400\n", " )\n", "\n", - "plt.matshow(frame)\n", + "ax.matshow(normalised_array)\n", "\n", - "plt.title('Confusion matrix')\n", + "ax.set_title('Confusion matrix')\n", + "# plt.colorbar(ax=ax)\n", "plt.show()" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "cde63a06", + "execution_count": 26, + "id": "831a9fab", "metadata": {}, "outputs": [ { "data": { - "image/png": "\n", + "image/png": "\n", "text/plain": [ "
" ] @@ -651,7 +684,7 @@ "plt.figure(figsize=(12, 10)\n", "# , dpi=400\n", " )\n", - "sns.heatmap(frame, xticklabels=False, yticklabels=False)\n", + "sns.heatmap(normalised_array, xticklabels=False, yticklabels=False, cmap='inferno')\n", "\n", "plt.title('Confusion matrix')\n", "plt.show()" diff --git a/cifar100/README.md b/cifar100/README.md new file mode 100644 index 0000000..de780fd --- /dev/null +++ b/cifar100/README.md @@ -0,0 +1,9 @@ +# CIFAR-100 + +At the beginning of the investigations, the CIFAR-100 was considered for use in the coursework. This was deemed non-viable due to the size compared to the Stanford cars. Models took far too long to train and the work was not iterative and agile enough. + +[Homepage](https://www.cs.toronto.edu/~kriz/cifar.html) + +*The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.* + +*The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.* \ No newline at end of file diff --git a/data_aug.py b/data_aug.py new file mode 100644 index 0000000..fb0f3f7 --- /dev/null +++ b/data_aug.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +"""Generate extra training data using rotations and flips + +Read a DIGITS train.txt and use OpenCV to generate extra data for training. +FLIP an image and/or apply various rotations from ROTATE_DEGREES + +Optionally rotate both clockwise and counter-clockwise by given degrees and +apply rotations to flipped images +""" + +import os +import pathlib +import cv2 as cv +import numpy as np + +TRAIN_FILE = input('enter train file path: ') # path to train.txt +OUTPUT_PATH = input('enter o/p path: ') # output folder for altered images + +# TRAIN_FILE = 'cars/default-split/train.txt' # path to train.txt +# OUTPUT_PATH = '/scratch/Teaching/ap00824/cars/train' # output folder for altered images + +DRY_RUN = False # dont output files, just a new train.txt + +FLIP = True # just flip image left to right +ROTATE = False # enable rotating image by below options +ROTATE_BOTH = False # do clockwise and counter-clockwise +ROTATE_DEGREES = [15] # different rotations to apply +FLIP_ROTATED = False # do rotations on both flipped images + +INCLUDE_ORIG = True # include original train.txt entry in ouput +# if true the output extra_training.txt can be used as a whole train.txt +# otherwise must be merged with original + +################### +# EXP FACTOR +################### + +exp_factor = int(ROTATE) * len(ROTATE_DEGREES) +exp_factor *= int(ROTATE_BOTH) + 1 # either 1 or 2 scale factor +exp_factor *= int(FLIP_ROTATED) + 1 # either 1 or 2 scale factor +exp_factor += int(FLIP) + 1 # flip is one extra image, + 1 for original file + +print("Expansion Factor of {}".format(exp_factor)) + +train_file = pathlib.Path(TRAIN_FILE) +output_path = pathlib.Path(OUTPUT_PATH).resolve() + +# read input train.txt +with open(TRAIN_FILE, 'r') as tf: + train_txt_lines = tf.readlines() + +# parse to dict objects +train_split = list() +for line in train_txt_lines: + space_split = line.split(' ') + train_split.append({ + # "raw_path": space_split[0], + "image": pathlib.Path(space_split[0]), + "class": space_split[1].replace('\n', '') + }) + +print('New Training Set: {} images'.format(len(train_split) * exp_factor)) +print('Generating {} images...'.format(len(train_split) * (exp_factor - 1))) + +################## +# PROCESS +################## + +# rotate_bound from imutils +# https://www.pyimagesearch.com/2017/01/02/rotate-images-correctly-with-opencv-and-python/ +def rotate_bound(image, angle): + # grab the dimensions of the image and then determine the + # center + (h, w) = image.shape[:2] + (cX, cY) = (w // 2, h // 2) + # grab the rotation matrix (applying the negative of the + # angle to rotate clockwise), then grab the sine and cosine + # (i.e., the rotation components of the matrix) + M = cv.getRotationMatrix2D((cX, cY), -angle, 1.0) + cos = np.abs(M[0, 0]) + sin = np.abs(M[0, 1]) + # compute the new bounding dimensions of the image + nW = int((h * sin) + (w * cos)) + nH = int((h * cos) + (w * sin)) + # adjust the rotation matrix to take into account translation + M[0, 2] += (nW / 2) - cX + M[1, 2] += (nH / 2) - cY + # perform the actual rotation and return the image + return cv.warpAffine(image, M, (nW, nH)) + +# get a modified image name +def insert_path_part(obj, part): + return obj["image"].stem + '-' + part + obj["image"].suffix + +def get_train_entry(obj, path): + return "{} {}\n".format(str(path), obj['class']) + +new_lines = list() +for train in train_split: + if not DRY_RUN: + img = cv.imread(str(train["image"])) + + if INCLUDE_ORIG: + new_lines.append(get_train_entry(train, train["image"])) + + if FLIP: + op_path = output_path / insert_path_part(train, 'flip') + if not DRY_RUN: + cv.imwrite(str(op_path), cv.flip(img, 1)) + new_lines.append(get_train_entry(train, op_path)) + + if ROTATE: + for deg in ROTATE_DEGREES: + op_path = output_path / insert_path_part(train, 'rot-{}'.format(deg)) + if not DRY_RUN: + cv.imwrite(str(op_path), rotate_bound(img, deg)) + new_lines.append(get_train_entry(train, op_path)) + + if FLIP_ROTATED: + op_path = output_path / insert_path_part(train, 'flip-rot-{}'.format(deg)) + if not DRY_RUN: + cv.imwrite(str(op_path), cv.flip(rotate_bound(img, deg), 1)) + new_lines.append(get_train_entry(train, op_path)) + + if ROTATE_BOTH: + op_path = output_path / insert_path_part(train, 'rot-min-{}'.format(deg)) + if not DRY_RUN: + cv.imwrite(str(op_path), rotate_bound(img, -deg)) + new_lines.append(get_train_entry(train, op_path)) + + if FLIP_ROTATED: + op_path = output_path / insert_path_part(train, 'flip-rot-min-{}'.format(deg)) + if not DRY_RUN: + cv.imwrite(str(op_path), cv.flip(rotate_bound(img, -deg), 1)) + new_lines.append(get_train_entry(train, op_path)) + +with open('extra_training.txt', 'w') as op_file: + op_file.writelines(new_lines) diff --git a/pyproject.toml b/pyproject.toml index 0b23182..327f0c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ matplotlib = "^3.4.1" jupyterlab = "^3.0.12" pandas = "^1.2.3" seaborn = "^0.11.1" +opencv-python = "^4.5.1" [tool.poetry.dev-dependencies]