DIGITS-CNN/report/report.lyx
2021-04-30 19:47:47 +01:00

2547 lines
44 KiB
Plaintext

#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\begin_preamble
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
\let\endchangemargin=\endlist
\pagenumbering{roman}
\usepackage{color}
\usepackage{pxfonts}
\definecolor{commentgreen}{RGB}{0,94,11}
\definecolor{darkblue}{rgb}{0,0,0.75}
\definecolor{darkred}{rgb}{0.6,0,0}
\end_preamble
\use_default_options true
\begin_modules
customHeadersFooters
minimalistic
todonotes
\end_modules
\maintain_unincluded_children false
\language british
\language_package default
\inputencoding auto
\fontencoding global
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype true
\use_dash_ligatures true
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command biber
\index_command default
\paperfontsize 11
\spacing single
\use_hyperref true
\pdf_title "Convolutional Neural Networks with DIGITS"
\pdf_author "Andy Pack"
\pdf_subject "EEEM063 Image Processing & Deep Learning"
\pdf_keywords "EEEM063"
\pdf_bookmarks true
\pdf_bookmarksnumbered false
\pdf_bookmarksopen false
\pdf_bookmarksopenlevel 1
\pdf_breaklinks false
\pdf_pdfborder true
\pdf_colorlinks false
\pdf_backref false
\pdf_pdfusetitle true
\papersize default
\use_geometry true
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine biblatex
\cite_engine_type authoryear
\biblio_style plain
\biblio_options urldate=long
\biblatex_bibstyle ieee
\biblatex_citestyle ieee
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date true
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 2cm
\topmargin 2cm
\rightmargin 2cm
\bottommargin 2cm
\secnumdepth 3
\tocdepth 3
\paragraph_separation skip
\defskip smallskip
\is_math_indent 0
\math_numbering_side default
\quotes_style british
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle fancy
\listings_params "language=Python,breaklines=true,frame=tb,otherkeywords={self},emph={State},emphstyle={\ttb\color{darkred}},basicstyle={\ttfamily},commentstyle={\bfseries\color{commentgreen}\itshape},keywordstyle={\color{darkblue}},emphstyle={\color{red}},stringstyle={\color{red}}"
\bullet 1 0 9 -1
\bullet 2 0 24 -1
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Title
\size giant
Convolutional Neural Networks with DIGITS
\end_layout
\begin_layout Author
Andy Pack
\end_layout
\begin_layout Standard
\begin_inset VSpace 15pheight%
\end_inset
\end_layout
\begin_layout Standard
\align center
\begin_inset Graphics
filename surrey.png
lyxscale 15
width 40col%
\end_inset
\end_layout
\begin_layout Standard
\begin_inset VSpace vfill
\end_inset
\end_layout
\begin_layout Standard
\noindent
\align center
EEEM063
\begin_inset Newline newline
\end_inset
May 2021
\size large
\begin_inset Newline newline
\end_inset
Department of Electrical and Electronic Engineering
\begin_inset Newline newline
\end_inset
Faculty of Engineering and Physical Sciences
\begin_inset Newline newline
\end_inset
University of Surrey
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Abstract
abstract
\end_layout
\begin_layout Standard
\begin_inset CommandInset toc
LatexCommand tableofcontents
\end_inset
\end_layout
\begin_layout List of TODOs
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Standard
\begin_inset FloatList figure
\end_inset
\end_layout
\begin_layout Standard
\begin_inset FloatList table
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset toc
LatexCommand lstlistoflistings
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Right Footer
Andy Pack / 6420013
\end_layout
\begin_layout Left Footer
May 2021
\end_layout
\begin_layout Left Header
EEEM063 Coursework
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
\backslash
pagenumbering{arabic}
\end_layout
\begin_layout Plain Layout
\backslash
setcounter{page}{1}
\end_layout
\end_inset
\end_layout
\begin_layout Section
Introduction
\end_layout
\begin_layout Standard
Although much of the theory for convolutional neural networks (CNNs) was
developed throughout the 20th century, their importance to the field of
computer vision was not widely appreciated until the early 2010s.
\begin_inset Flex TODO Note (inline)
status open
\begin_layout Plain Layout
More context
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Although CNNs can appear opaque when attempting to understand how decisions
are made, they are not black boxes and there are many ways to affect a
model's performance.
This work presents investigations into how a CNN's performance is affected
by the subject dataset, the architecture of the network and the parameters
used when training.
Section
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:Investigations-Scope"
plural "false"
caps "false"
noprefix "false"
\end_inset
outlines the scope of the investigations made herein, describing the motivation
for the variations and expectations as to how this would affect performance.
The results for these investigations are presented in section
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:Results"
plural "false"
caps "false"
noprefix "false"
\end_inset
with interpretations made in the following section.
Section
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:Conclusions"
plural "false"
caps "false"
noprefix "false"
\end_inset
summarises and concludes the work.
\end_layout
\begin_layout Section
Investigations Scope
\begin_inset CommandInset label
LatexCommand label
name "sec:Investigations-Scope"
\end_inset
\end_layout
\begin_layout Standard
The investigations presented in this work use the Stanford Cars dataset
\begin_inset CommandInset citation
LatexCommand cite
key "cars"
literal "false"
\end_inset
, a selection of 16,185 images of 196 different classes of car.
In terms of network architecture, the seminal AlexNet
\begin_inset CommandInset citation
LatexCommand cite
key "alexnet"
literal "false"
\end_inset
was used as the template for the investigations presented.
\end_layout
\begin_layout Subsection
Dataset Processing
\end_layout
\begin_layout Standard
Prior to more in-depth investigations, how the dataset is divided into training,
validation and test data was investigated in order to identify a suitable
proportion for later work.
As a fixed size dataset, a balance must be struck between how much is reserved
for training the network and how much should be used to evaluate the network.
Throughout this paper, the term
\emph on
split
\emph default
will be used to denote a single division of the dataset into the three
required subsets.
\end_layout
\begin_layout Standard
Although the dataset is of a fixed size, there are methods to artificially
expand the set of training data by performing image manipulations such
as rotations and zooms.
This aims to teach the network invariance to such transforms during classificat
ion.
A Python script was written to take a training dataset and perform a range
of manipulations in order to create a synthetically larger training set.
The expansion factor,
\begin_inset Formula $E$
\end_inset
, is used to described the scale factor for the new dataset's size.
The ideal rotation angle was investigated by rotating all images by a given
value.
\end_layout
\begin_layout Subsection
Meta-Parameters
\end_layout
\begin_layout Standard
The number of epochs that a network is trained for is important for balancing
the fit to the training set.
Too few and the CNN will be underfit whereas too many and the network will
be too specific to the training set.
\end_layout
\begin_layout Standard
The learning rate of a CNN is critical for attaining high-performance results.
The value and how it changes over the range of training epochs or the
\emph on
learning schedule
\emph default
are investigated.
A fixed learning rate will first be evaluated before varying the parameter
as a function of epochs in the form of a sigmoid function, exponential
decay and a step-down schedule with a step,
\begin_inset Formula $S=33\%$
\end_inset
for two steps at 1/3 and 2/3 of the total epochs.
These learning schedules were evaluated over 50 and 100 epochs to investigate
how this affects test accuracy.
\end_layout
\begin_layout Subsection
Network Architectures
\end_layout
\begin_layout Subsubsection
Convolutional Layers
\end_layout
\begin_layout Standard
The convolutional layers of AlexNet are responsible for applying subsequent
image manipulations by convolving the sample with a kernel of learned parameter
s.
The kernel size of each layer was varied in order visualise performance.
\end_layout
\begin_layout Subsubsection
Fully-Connected Layers
\end_layout
\begin_layout Standard
Following the convolutional stages there are three dense or fully-connected
layers which provide two key features in image classification.
The first is flattening the 2D cross-section of the preceding convolutional
layers into a 1D representation for propagation to a final one-hot vector
output.
The second is as a traditional multi-layer perceptron classifier, taking
the high-level visual insights of the later convolutional layers and reasoning
these into a final classification.
\end_layout
\begin_layout Standard
When treated as an MLP, these can instead be considered as 2 hidden layers
and a single output layer.
As the last layer is of a fixed number of nodes equal to the number of
classes and is required to form the one-hot vector output, it is treated
separately to the others.
Within this paper, when reporting the number of fully-connected layers
it is the number of hidden layers without the output layer.
\end_layout
\begin_layout Subsection
New Layers
\end_layout
\begin_layout Standard
It has been shown that the early layers (~1-3) of AlexNet are responsible
for identifying low-level features such as edges while the latter layers
(~3-5) perform higher level reasoning including texture.
The addition of a new layer in both of these regions of the network were
investigated.
Reasonable values for kernel sizes and number of layers were selected consideri
ng the values from the neighbouring layers.
\end_layout
\begin_layout Section
Results
\begin_inset CommandInset label
LatexCommand label
name "sec:Results"
\end_inset
\end_layout
\begin_layout Subsection
Dataset
\end_layout
\begin_layout Subsubsection
Train/Validation/Test Proportions
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/split-investigations/split-barh.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Top-1 and Top-5 test accuracy for different train/validation/test proportions
\begin_inset CommandInset label
LatexCommand label
name "fig:split-barh"
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Different splits of the cars dataset were made, the test accuracies can
be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:split-barh"
plural "false"
caps "false"
noprefix "false"
\end_inset
and the number of images in each subset can be seen in appendix
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:Dataset-Image-Counts"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
A fixed learning rate of 0.001 was used over 100 epochs.
Increasing the proportion of data reserved for training the model can be
seen to increase the classification accuracy while varying the proportion
between the test and validation split had little effect.
The 80/10/10 split was deemed an appropriate balance of proportions and,
unless otherwise stated, the 80/10/10 split is used for later experiments.
\end_layout
\begin_layout Subsubsection
Data Augmentation
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/data-aug-investigations/rot-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Batch size = 128, AlexNet's default
\begin_inset CommandInset label
LatexCommand label
name "fig:rot-128b"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/data-aug-investigations/rot-accuracy-256batch.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Batch size = 256
\begin_inset CommandInset label
LatexCommand label
name "fig:rot-256b"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies for different rotation degrees when augmenting the training
set with fixed or dynamic batch size
\begin_inset CommandInset label
LatexCommand label
name "fig:rot-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Test results for varied degrees of rotation when augmenting the training
data can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:rot-accuracy"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
In
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:rot-128b"
plural "false"
caps "false"
noprefix "false"
\end_inset
, the batch size was set to 128, the default value and one used for the
unaugmented control experiment for comparison later.
Figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:rot-256b"
plural "false"
caps "false"
noprefix "false"
\end_inset
doubles the batch size to match the doubled training dataset.
\end_layout
\begin_layout Standard
Figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:data-aug-best-barh"
plural "false"
caps "false"
noprefix "false"
\end_inset
presents the test accuracies for different training pre-processing methods
including flipping the image horizontally and the best reported results
from the rotation experiment.
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:data-aug-best-barh-128E"
plural "false"
caps "false"
noprefix "false"
\end_inset
reports results with the previously described scaling of batch size with
training set size to maintain a constant number of network changes throughout
\begin_inset Foot
status open
\begin_layout Plain Layout
A batch size of
\begin_inset Formula $6\times128=768$
\end_inset
for
\emph on
full
\emph default
processing could not fit into the memory of the test equipment and as such
a batch accumulation of 6 was used to simulate the larger batch size.
\end_layout
\end_inset
.
\emph on
Full
\emph default
pre-processing significantly expanded the training dataset by rotating
the image both clockwise and counter-clockwise by the given degrees and
then flipping both the rotated and original images for an expansion factor
of 6.
With the fixed batch size (
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:data-aug-best-barh-fixed"
plural "false"
caps "false"
noprefix "false"
\end_inset
), augmenting the dataset more than the doubled the accuracy.
Rotation performed better than flipping the images while the described
combination performing best.
\begin_inset Flex TODO Note (inline)
status open
\begin_layout Plain Layout
Scaled batch size
\end_layout
\end_inset
\end_layout
\begin_layout Standard
With a constant number of network updates (figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:data-aug-best-barh-128E"
plural "false"
caps "false"
noprefix "false"
\end_inset
), data augmentation still performed better than the unaugmented dataset
however the performance was not as high as with a constant batch size.
Full processing performed worse than just flipping or rotating in this
case.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/data-aug-investigations/best-barh.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Batch size = 128
\begin_inset CommandInset label
LatexCommand label
name "fig:data-aug-best-barh-fixed"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/data-aug-investigations/best-barh-256batch.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Batch size =
\begin_inset Formula $128E$
\end_inset
\begin_inset CommandInset label
LatexCommand label
name "fig:data-aug-best-barh-128E"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Best top-1 and top-5 test accuracies for different data augmentation methods
with fixed or dynamic batch size
\begin_inset CommandInset label
LatexCommand label
name "fig:data-aug-best-barh"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Subsection
Meta-Parameters
\end_layout
\begin_layout Subsubsection
Epochs
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/epochs-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies
\begin_inset CommandInset label
LatexCommand label
name "fig:epochs-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/epochs-loss.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Final validation loss
\begin_inset CommandInset label
LatexCommand label
name "fig:epochs-loss"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test results for varied training epochs with a sigmoid learning rate decay
of
\begin_inset Formula $\gamma=0.1$
\end_inset
\begin_inset CommandInset label
LatexCommand label
name "fig:epochs-results"
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\end_layout
\end_inset
\end_layout
\begin_layout Standard
The effect of varying the number of training epochs can be seen visualised
in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:epochs-results"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
More epochs can be seen to increase performance until ~70 epochs, after
this the accuracy gradually declines.
The opposite trend can be seen in the loss of figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:epochs-loss"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
\end_layout
\begin_layout Subsubsection
Fixed Learning Rate
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/fixed-accuracy-50e.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
50 Epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:fixed-accuracy-50e"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/fixed-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
100 Epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:fixed-accuracy-100e"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies for fixed learning schedules results
\begin_inset CommandInset label
LatexCommand label
name "fig:fixed-results"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
In order to evaluate different learning rates and learning schedules, initial
investigations were made across six decades of a fixed learning rate over
50 and 100 epochs.
The accuracies and final validation loss can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:fixed-results"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
For a fixed learning rate, values between 0.01 and 0.001 gave the best accuracy
with values both larger or smaller giving a top-1 accuracy less than 10%.
The highest value between 50 and 100 epochs were similar.
\begin_inset Note Comment
status open
\begin_layout Plain Layout
The final validation loss can be seen to generally follow the inverse trend,
the lowest values lie between 0.01 and 0.001.
Interestingly, a learning rate of
\begin_inset Formula $1\times10^{-4}$
\end_inset
had a validation loss that somewhat interpolated the surrounding values
for
\begin_inset Formula $1\times10^{-5}$
\end_inset
and
\begin_inset Formula $1\times10^{-3}$
\end_inset
however the accuracy does not reflect this, instead being much lower at
2%.
\begin_inset Flex TODO Note (Margin)
status open
\begin_layout Plain Layout
reword?
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Subsubsection
Step-Down
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/step-down-accuracy-50e.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
50 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/step-down-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
100 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies for different step-down learning schedules.
Step of 1/3 for 2 learning rate drops
\begin_inset CommandInset label
LatexCommand label
name "fig:step-down-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Test accuracies for different step-down learning schedule can be seen in
figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:step-down-accuracy"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
Over both 50 and 100 epochs, the step-down scale factor can be seen to
have little effect on test accuracy.
\begin_inset Flex TODO Note (inline)
status open
\begin_layout Plain Layout
Finish
\end_layout
\end_inset
\end_layout
\begin_layout Subsubsection
Exponential
\end_layout
\begin_layout Standard
Different exponential learning decay rates were investigated, the results
can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp-results"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
From these results, a slow decay rate can be seen to give the best results,
values between 0.95 and 0.99 gave the highest accuracies, over both 50 and
100 epochs.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/exp-accuracy-50e.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
50 Epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:exp-50e"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/exp-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
100 Epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:exp-100e"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies for different exponentially decaying learning schedules
\begin_inset CommandInset label
LatexCommand label
name "fig:exp-results"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Subsubsection
Sigmoid
\end_layout
\begin_layout Standard
Reasonable values of gamma for the sigmoid function were selected
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none
between 0.05 and 0.2.
Accuracies over 50 and 100 epochs were evaluated and can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:sig-accuracy"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
\begin_inset Formula $\gamma$
\end_inset
had little effect on performance for 50 epochs with an average top-1 accuracy
of 40%.
Over 100 epochs, the performance increased from this value to 47% when
\begin_inset Formula $\gamma=0.05$
\end_inset
.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/sig-accuracy-50e.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
50 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/sig-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
100 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Test accuracies for different sigmoid learning schedules
\begin_inset CommandInset label
LatexCommand label
name "fig:sig-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Subsubsection
Summary
\end_layout
\begin_layout Standard
A comparison of the best reported accuracies for the investigated learning
rate schedules can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:lr-best-barh"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
Over both 50 and 100 epochs, a fixed learning rate performed the worst
while using a learning schedule achieved a ~10% accuracy gain.
Training for 100 epochs increased the accuracy by between 3% and 7% over
training for 50 epochs.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/best-barh-50e.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
50 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/lr-investigations/best-barh.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
100 Epochs
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Best top-1 and Top-5 test accuracies for investigated learning schedules
\begin_inset CommandInset label
LatexCommand label
name "fig:lr-best-barh"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Subsection
Network Architectures
\end_layout
\begin_layout Subsubsection
Convolutional Kernel Size
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/architecture-investigations/kernel-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Top-1 test accuracy for different convolutional kernel sizes over 100 epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:kernel-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Test accuracies for varied convolutional kernel sizes can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:kernel-accuracy"
plural "false"
caps "false"
noprefix "false"
\end_inset
with the standard kernel sizes for AlexNet also marked.
In general, varying the kernel size of the earlier layers (1 and 2) had
little effect on the accuracy with little gain made over the default.
Higher gains were made in the later layers, where a size of 5 or 7 tended
to perform better than the standard 3.
\end_layout
\begin_layout Subsubsection
Fully-Connected Layers
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/architecture-investigations/fc-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Top-1 test accuracy for different fully-connected layer shapes over 100
epochs
\begin_inset CommandInset label
LatexCommand label
name "fig:fc-accuracy"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
The shape and number of fully-connected layers were varied, test results
can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:fc-accuracy"
plural "false"
caps "false"
noprefix "false"
\end_inset
, AlexNet's standard 2 hidden layers each of 4,096 nodes can be seen marked
for reference.
Each number of layers shows a peak with a steep ascent and a more gradual
descent, as the number of layers increases the nodes associated with the
peak also increases.
\end_layout
\begin_layout Subsubsection
Additional Layers
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/architecture-investigations/new-layer-kernel-accuracy.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Top-1 test accuracy for varied kernel size of additional convolutional layers
\begin_inset CommandInset label
LatexCommand label
name "fig:new-layer"
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\end_layout
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open
\begin_layout Plain Layout
TODO
\end_layout
\end_inset
\end_layout
\begin_layout Subsubsection
Summary
\end_layout
\begin_layout Standard
A comparison of the best reported accuracies for the investigated architecture
changes can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:architecture-best-barh"
plural "false"
caps "false"
noprefix "false"
\end_inset
.
Each of the investigated architecture changes was able to outperform AlexNet.
The largest increase was achieved by reducing the number of nodes in the
2 hidden dense layers from 4096 to 512 for a ~10% increase to 57%.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename ../cars/architecture-investigations/best-barh.png
lyxscale 30
width 50col%
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Best top-1 and Top-5 test accuracies for investigated architectures over
100 epochs with exponentially decaying learning rate,
\begin_inset Formula $\lambda=0.98$
\end_inset
\begin_inset CommandInset label
LatexCommand label
name "fig:architecture-best-barh"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Section
Discussion
\begin_inset CommandInset label
LatexCommand label
name "sec:Discussion"
\end_inset
\end_layout
\begin_layout Subsection
Dataset
\end_layout
\begin_layout Standard
The batch size scaling inline with the training set growth was conducted
in an effort to control for the amount of extra training being conducted.
When comparing data augmentation methods, difficulty comes in comparing
processing methods which expand the training set by different amounts.
Synthetically larger datasets not only present the network with new perspective
s of the image but also train the network for longer.
A method to better control for this in the future could be to define a
constant expansion factor across processing methods and then compose this
extra training data of different proportions of augmentations (rotations
of varying angles and flips).
\end_layout
\begin_layout Subsection
Meta-Parameters
\end_layout
\begin_layout Standard
As presented, it can be seen that training a network beyond a threshold
number of epochs leads to diminishing performance as the network overfits
to the training set.
\end_layout
\begin_layout Subsection
Network Architectures
\end_layout
\begin_layout Standard
From the reported results each investigation outperformed the standard AlexNet
architecture.
It is worth specifying the significance of this, however.
It would be inaccurate from these results to suggest that these derivative
architectures are better than AlexNet as the performance is a function
of the dataset, the specific dataset split used, the learning rate schedule
and number of epochs trained for.
Instead what is being stated is that, for the selected, specific values
of those, a more optimal architecture than the standard AlexNet was found.
\end_layout
\begin_layout Section
Conclusions
\begin_inset CommandInset label
LatexCommand label
name "sec:Conclusions"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset label
LatexCommand label
name "sec:bibliography"
\end_inset
\begin_inset CommandInset bibtex
LatexCommand bibtex
btprint "btPrintCited"
bibfiles "references"
options "bibtotoc"
\end_inset
\end_layout
\begin_layout Section
\start_of_appendix
Dataset Image Counts
\begin_inset CommandInset label
LatexCommand label
name "sec:Dataset-Image-Counts"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Float table
placement H
wide false
sideways false
status open
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Tabular
<lyxtabular version="3" rows="7" columns="4">
<features tabularvalignment="middle">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<column alignment="center" valignment="top">
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\end_layout
\end_inset
</cell>
<cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Number of Images
\end_layout
\end_inset
</cell>
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\end_layout
\end_inset
</cell>
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Split
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Training
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Validation
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
Test
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
90/5/5
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
14,566
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
809
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
810
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
80/10/10
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
12,948
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
1,618
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
1,619
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
70/15/15
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
11,329
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
2,427
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
2,429
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
50/25/25
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
8,092
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
4,046
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
4,047
\end_layout
\end_inset
</cell>
</row>
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
50/5/45
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
8,092
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
809
\end_layout
\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text
\begin_layout Plain Layout
7,284
\end_layout
\end_inset
</cell>
</row>
</lyxtabular>
\end_inset
\end_layout
\begin_layout Plain Layout
\begin_inset Caption Standard
\begin_layout Plain Layout
Number of images in each subset for each evaluated split
\begin_inset CommandInset label
LatexCommand label
name "tab:split-image-counts"
\end_inset
\end_layout
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Section
Training Set Expander Script
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../data_aug.py"
lstparams "caption={Python script for synthetically expanding a DIGITs training set},label={script:data_aug}"
\end_inset
\end_layout
\end_body
\end_document