2547 lines
44 KiB
Plaintext
2547 lines
44 KiB
Plaintext
#LyX 2.3 created this file. For more info see http://www.lyx.org/
|
|
\lyxformat 544
|
|
\begin_document
|
|
\begin_header
|
|
\save_transient_properties true
|
|
\origin unavailable
|
|
\textclass article
|
|
\begin_preamble
|
|
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
|
|
\let\endchangemargin=\endlist
|
|
\pagenumbering{roman}
|
|
|
|
\usepackage{color}
|
|
\usepackage{pxfonts}
|
|
|
|
\definecolor{commentgreen}{RGB}{0,94,11}
|
|
\definecolor{darkblue}{rgb}{0,0,0.75}
|
|
\definecolor{darkred}{rgb}{0.6,0,0}
|
|
\end_preamble
|
|
\use_default_options true
|
|
\begin_modules
|
|
customHeadersFooters
|
|
minimalistic
|
|
todonotes
|
|
\end_modules
|
|
\maintain_unincluded_children false
|
|
\language british
|
|
\language_package default
|
|
\inputencoding auto
|
|
\fontencoding global
|
|
\font_roman "default" "default"
|
|
\font_sans "default" "default"
|
|
\font_typewriter "default" "default"
|
|
\font_math "auto" "auto"
|
|
\font_default_family default
|
|
\use_non_tex_fonts false
|
|
\font_sc false
|
|
\font_osf false
|
|
\font_sf_scale 100 100
|
|
\font_tt_scale 100 100
|
|
\use_microtype true
|
|
\use_dash_ligatures true
|
|
\graphics default
|
|
\default_output_format default
|
|
\output_sync 0
|
|
\bibtex_command biber
|
|
\index_command default
|
|
\paperfontsize 11
|
|
\spacing single
|
|
\use_hyperref true
|
|
\pdf_title "Convolutional Neural Networks with DIGITS"
|
|
\pdf_author "Andy Pack"
|
|
\pdf_subject "EEEM063 Image Processing & Deep Learning"
|
|
\pdf_keywords "EEEM063"
|
|
\pdf_bookmarks true
|
|
\pdf_bookmarksnumbered false
|
|
\pdf_bookmarksopen false
|
|
\pdf_bookmarksopenlevel 1
|
|
\pdf_breaklinks false
|
|
\pdf_pdfborder true
|
|
\pdf_colorlinks false
|
|
\pdf_backref false
|
|
\pdf_pdfusetitle true
|
|
\papersize default
|
|
\use_geometry true
|
|
\use_package amsmath 1
|
|
\use_package amssymb 1
|
|
\use_package cancel 1
|
|
\use_package esint 1
|
|
\use_package mathdots 1
|
|
\use_package mathtools 1
|
|
\use_package mhchem 1
|
|
\use_package stackrel 1
|
|
\use_package stmaryrd 1
|
|
\use_package undertilde 1
|
|
\cite_engine biblatex
|
|
\cite_engine_type authoryear
|
|
\biblio_style plain
|
|
\biblio_options urldate=long
|
|
\biblatex_bibstyle ieee
|
|
\biblatex_citestyle ieee
|
|
\use_bibtopic false
|
|
\use_indices false
|
|
\paperorientation portrait
|
|
\suppress_date true
|
|
\justification true
|
|
\use_refstyle 1
|
|
\use_minted 0
|
|
\index Index
|
|
\shortcut idx
|
|
\color #008000
|
|
\end_index
|
|
\leftmargin 2cm
|
|
\topmargin 2cm
|
|
\rightmargin 2cm
|
|
\bottommargin 2cm
|
|
\secnumdepth 3
|
|
\tocdepth 3
|
|
\paragraph_separation skip
|
|
\defskip smallskip
|
|
\is_math_indent 0
|
|
\math_numbering_side default
|
|
\quotes_style british
|
|
\dynamic_quotes 0
|
|
\papercolumns 1
|
|
\papersides 1
|
|
\paperpagestyle fancy
|
|
\listings_params "language=Python,breaklines=true,frame=tb,otherkeywords={self},emph={State},emphstyle={\ttb\color{darkred}},basicstyle={\ttfamily},commentstyle={\bfseries\color{commentgreen}\itshape},keywordstyle={\color{darkblue}},emphstyle={\color{red}},stringstyle={\color{red}}"
|
|
\bullet 1 0 9 -1
|
|
\bullet 2 0 24 -1
|
|
\tracking_changes false
|
|
\output_changes false
|
|
\html_math_output 0
|
|
\html_css_as_file 0
|
|
\html_be_strict false
|
|
\end_header
|
|
|
|
\begin_body
|
|
|
|
\begin_layout Title
|
|
|
|
\size giant
|
|
Convolutional Neural Networks with DIGITS
|
|
\end_layout
|
|
|
|
\begin_layout Author
|
|
Andy Pack
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset VSpace 15pheight%
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename surrey.png
|
|
lyxscale 15
|
|
width 40col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset VSpace vfill
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\noindent
|
|
\align center
|
|
EEEM063
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
May 2021
|
|
\size large
|
|
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
Department of Electrical and Electronic Engineering
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
Faculty of Engineering and Physical Sciences
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
University of Surrey
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Abstract
|
|
abstract
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset toc
|
|
LatexCommand tableofcontents
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout List of TODOs
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset FloatList figure
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset FloatList table
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset toc
|
|
LatexCommand lstlistoflistings
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Right Footer
|
|
Andy Pack / 6420013
|
|
\end_layout
|
|
|
|
\begin_layout Left Footer
|
|
May 2021
|
|
\end_layout
|
|
|
|
\begin_layout Left Header
|
|
EEEM063 Coursework
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset ERT
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
|
|
\backslash
|
|
pagenumbering{arabic}
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
|
|
\backslash
|
|
setcounter{page}{1}
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Introduction
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although much of the theory for convolutional neural networks (CNNs) was
|
|
developed throughout the 20th century, their importance to the field of
|
|
computer vision was not widely appreciated until the early 2010s.
|
|
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
More context
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although CNNs can appear opaque when attempting to understand how decisions
|
|
are made, they are not black boxes and there are many ways to affect a
|
|
model's performance.
|
|
This work presents investigations into how a CNN's performance is affected
|
|
by the subject dataset, the architecture of the network and the parameters
|
|
used when training.
|
|
Section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Investigations-Scope"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
outlines the scope of the investigations made herein, describing the motivation
|
|
for the variations and expectations as to how this would affect performance.
|
|
The results for these investigations are presented in section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
with interpretations made in the following section.
|
|
Section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Conclusions"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
summarises and concludes the work.
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Investigations Scope
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Investigations-Scope"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The investigations presented in this work use the Stanford Cars dataset
|
|
|
|
\begin_inset CommandInset citation
|
|
LatexCommand cite
|
|
key "cars"
|
|
literal "false"
|
|
|
|
\end_inset
|
|
|
|
, a selection of 16,185 images of 196 different classes of car.
|
|
In terms of network architecture, the seminal AlexNet
|
|
\begin_inset CommandInset citation
|
|
LatexCommand cite
|
|
key "alexnet"
|
|
literal "false"
|
|
|
|
\end_inset
|
|
|
|
was used as the template for the investigations presented.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset Processing
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Prior to more in-depth investigations, how the dataset is divided into training,
|
|
validation and test data was investigated in order to identify a suitable
|
|
proportion for later work.
|
|
As a fixed size dataset, a balance must be struck between how much is reserved
|
|
for training the network and how much should be used to evaluate the network.
|
|
Throughout this paper, the term
|
|
\emph on
|
|
split
|
|
\emph default
|
|
will be used to denote a single division of the dataset into the three
|
|
required subsets.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although the dataset is of a fixed size, there are methods to artificially
|
|
expand the set of training data by performing image manipulations such
|
|
as rotations and zooms.
|
|
This aims to teach the network invariance to such transforms during classificat
|
|
ion.
|
|
A Python script was written to take a training dataset and perform a range
|
|
of manipulations in order to create a synthetically larger training set.
|
|
The expansion factor,
|
|
\begin_inset Formula $E$
|
|
\end_inset
|
|
|
|
, is used to described the scale factor for the new dataset's size.
|
|
The ideal rotation angle was investigated by rotating all images by a given
|
|
value.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The number of epochs that a network is trained for is important for balancing
|
|
the fit to the training set.
|
|
Too few and the CNN will be underfit whereas too many and the network will
|
|
be too specific to the training set.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The learning rate of a CNN is critical for attaining high-performance results.
|
|
The value and how it changes over the range of training epochs or the
|
|
\emph on
|
|
learning schedule
|
|
\emph default
|
|
are investigated.
|
|
A fixed learning rate will first be evaluated before varying the parameter
|
|
as a function of epochs in the form of a sigmoid function, exponential
|
|
decay and a step-down schedule with a step,
|
|
\begin_inset Formula $S=33\%$
|
|
\end_inset
|
|
|
|
for two steps at 1/3 and 2/3 of the total epochs.
|
|
These learning schedules were evaluated over 50 and 100 epochs to investigate
|
|
how this affects test accuracy.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Convolutional Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The convolutional layers of AlexNet are responsible for applying subsequent
|
|
image manipulations by convolving the sample with a kernel of learned parameter
|
|
s.
|
|
The kernel size of each layer was varied in order visualise performance.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fully-Connected Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Following the convolutional stages there are three dense or fully-connected
|
|
layers which provide two key features in image classification.
|
|
The first is flattening the 2D cross-section of the preceding convolutional
|
|
layers into a 1D representation for propagation to a final one-hot vector
|
|
output.
|
|
The second is as a traditional multi-layer perceptron classifier, taking
|
|
the high-level visual insights of the later convolutional layers and reasoning
|
|
these into a final classification.
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
When treated as an MLP, these can instead be considered as 2 hidden layers
|
|
and a single output layer.
|
|
As the last layer is of a fixed number of nodes equal to the number of
|
|
classes and is required to form the one-hot vector output, it is treated
|
|
separately to the others.
|
|
Within this paper, when reporting the number of fully-connected layers
|
|
it is the number of hidden layers without the output layer.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
New Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
It has been shown that the early layers (~1-3) of AlexNet are responsible
|
|
for identifying low-level features such as edges while the latter layers
|
|
(~3-5) perform higher level reasoning including texture.
|
|
The addition of a new layer in both of these regions of the network were
|
|
investigated.
|
|
Reasonable values for kernel sizes and number of layers were selected consideri
|
|
ng the values from the neighbouring layers.
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Results
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Train/Validation/Test Proportions
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/split-investigations/split-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 and Top-5 test accuracy for different train/validation/test proportions
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:split-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Different splits of the cars dataset were made, the test accuracies can
|
|
be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:split-barh"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
and the number of images in each subset can be seen in appendix
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Dataset-Image-Counts"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
A fixed learning rate of 0.001 was used over 100 epochs.
|
|
Increasing the proportion of data reserved for training the model can be
|
|
seen to increase the classification accuracy while varying the proportion
|
|
between the test and validation split had little effect.
|
|
The 80/10/10 split was deemed an appropriate balance of proportions and,
|
|
unless otherwise stated, the 80/10/10 split is used for later experiments.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Data Augmentation
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/rot-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 128, AlexNet's default
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-128b"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/rot-accuracy-256batch.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 256
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-256b"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different rotation degrees when augmenting the training
|
|
set with fixed or dynamic batch size
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Test results for varied degrees of rotation when augmenting the training
|
|
data can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:rot-accuracy"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
In
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:rot-128b"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
, the batch size was set to 128, the default value and one used for the
|
|
unaugmented control experiment for comparison later.
|
|
Figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:rot-256b"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
doubles the batch size to match the doubled training dataset.
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:data-aug-best-barh"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
presents the test accuracies for different training pre-processing methods
|
|
including flipping the image horizontally and the best reported results
|
|
from the rotation experiment.
|
|
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:data-aug-best-barh-128E"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
reports results with the previously described scaling of batch size with
|
|
training set size to maintain a constant number of network changes throughout
|
|
\begin_inset Foot
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
A batch size of
|
|
\begin_inset Formula $6\times128=768$
|
|
\end_inset
|
|
|
|
for
|
|
\emph on
|
|
full
|
|
\emph default
|
|
processing could not fit into the memory of the test equipment and as such
|
|
a batch accumulation of 6 was used to simulate the larger batch size.
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
.
|
|
|
|
\emph on
|
|
Full
|
|
\emph default
|
|
pre-processing significantly expanded the training dataset by rotating
|
|
the image both clockwise and counter-clockwise by the given degrees and
|
|
then flipping both the rotated and original images for an expansion factor
|
|
of 6.
|
|
With the fixed batch size (
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:data-aug-best-barh-fixed"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
), augmenting the dataset more than the doubled the accuracy.
|
|
Rotation performed better than flipping the images while the described
|
|
combination performing best.
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
Scaled batch size
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
With a constant number of network updates (figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:data-aug-best-barh-128E"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
), data augmentation still performed better than the unaugmented dataset
|
|
however the performance was not as high as with a constant batch size.
|
|
Full processing performed worse than just flipping or rotating in this
|
|
case.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/best-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 128
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:data-aug-best-barh-fixed"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/best-barh-256batch.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size =
|
|
\begin_inset Formula $128E$
|
|
\end_inset
|
|
|
|
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:data-aug-best-barh-128E"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Best top-1 and top-5 test accuracies for different data augmentation methods
|
|
with fixed or dynamic batch size
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:data-aug-best-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Epochs
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/epochs-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:epochs-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/epochs-loss.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Final validation loss
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:epochs-loss"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test results for varied training epochs with a sigmoid learning rate decay
|
|
of
|
|
\begin_inset Formula $\gamma=0.1$
|
|
\end_inset
|
|
|
|
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:epochs-results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The effect of varying the number of training epochs can be seen visualised
|
|
in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:epochs-results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
More epochs can be seen to increase performance until ~70 epochs, after
|
|
this the accuracy gradually declines.
|
|
The opposite trend can be seen in the loss of figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:epochs-loss"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fixed Learning Rate
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/fixed-accuracy-50e.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
50 Epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-accuracy-50e"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/fixed-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
100 Epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-accuracy-100e"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for fixed learning schedules results
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
In order to evaluate different learning rates and learning schedules, initial
|
|
investigations were made across six decades of a fixed learning rate over
|
|
50 and 100 epochs.
|
|
The accuracies and final validation loss can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:fixed-results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
For a fixed learning rate, values between 0.01 and 0.001 gave the best accuracy
|
|
with values both larger or smaller giving a top-1 accuracy less than 10%.
|
|
The highest value between 50 and 100 epochs were similar.
|
|
\begin_inset Note Comment
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
The final validation loss can be seen to generally follow the inverse trend,
|
|
the lowest values lie between 0.01 and 0.001.
|
|
Interestingly, a learning rate of
|
|
\begin_inset Formula $1\times10^{-4}$
|
|
\end_inset
|
|
|
|
had a validation loss that somewhat interpolated the surrounding values
|
|
for
|
|
\begin_inset Formula $1\times10^{-5}$
|
|
\end_inset
|
|
|
|
and
|
|
\begin_inset Formula $1\times10^{-3}$
|
|
\end_inset
|
|
|
|
however the accuracy does not reflect this, instead being much lower at
|
|
2%.
|
|
\begin_inset Flex TODO Note (Margin)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
reword?
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Step-Down
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/step-down-accuracy-50e.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
50 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/step-down-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
100 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different step-down learning schedules.
|
|
Step of 1/3 for 2 learning rate drops
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:step-down-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Test accuracies for different step-down learning schedule can be seen in
|
|
figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:step-down-accuracy"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
Over both 50 and 100 epochs, the step-down scale factor can be seen to
|
|
have little effect on test accuracy.
|
|
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
Finish
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Exponential
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Different exponential learning decay rates were investigated, the results
|
|
can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:exp-results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
From these results, a slow decay rate can be seen to give the best results,
|
|
values between 0.95 and 0.99 gave the highest accuracies, over both 50 and
|
|
100 epochs.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/exp-accuracy-50e.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
50 Epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-50e"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/exp-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
100 Epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-100e"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different exponentially decaying learning schedules
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Sigmoid
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Reasonable values of gamma for the sigmoid function were selected
|
|
\family roman
|
|
\series medium
|
|
\shape up
|
|
\size normal
|
|
\emph off
|
|
\bar no
|
|
\strikeout off
|
|
\xout off
|
|
\uuline off
|
|
\uwave off
|
|
\noun off
|
|
\color none
|
|
between 0.05 and 0.2.
|
|
Accuracies over 50 and 100 epochs were evaluated and can be seen in figure
|
|
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:sig-accuracy"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
|
|
\begin_inset Formula $\gamma$
|
|
\end_inset
|
|
|
|
had little effect on performance for 50 epochs with an average top-1 accuracy
|
|
of 40%.
|
|
Over 100 epochs, the performance increased from this value to 47% when
|
|
|
|
\begin_inset Formula $\gamma=0.05$
|
|
\end_inset
|
|
|
|
.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/sig-accuracy-50e.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
50 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/sig-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
100 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different sigmoid learning schedules
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:sig-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Summary
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
A comparison of the best reported accuracies for the investigated learning
|
|
rate schedules can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:lr-best-barh"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
Over both 50 and 100 epochs, a fixed learning rate performed the worst
|
|
while using a learning schedule achieved a ~10% accuracy gain.
|
|
Training for 100 epochs increased the accuracy by between 3% and 7% over
|
|
training for 50 epochs.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/best-barh-50e.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
50 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/best-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
100 Epochs
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Best top-1 and Top-5 test accuracies for investigated learning schedules
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:lr-best-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Convolutional Kernel Size
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/kernel-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 test accuracy for different convolutional kernel sizes over 100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:kernel-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Test accuracies for varied convolutional kernel sizes can be seen in figure
|
|
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:kernel-accuracy"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
with the standard kernel sizes for AlexNet also marked.
|
|
In general, varying the kernel size of the earlier layers (1 and 2) had
|
|
little effect on the accuracy with little gain made over the default.
|
|
Higher gains were made in the later layers, where a size of 5 or 7 tended
|
|
to perform better than the standard 3.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fully-Connected Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/fc-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 test accuracy for different fully-connected layer shapes over 100
|
|
epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fc-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The shape and number of fully-connected layers were varied, test results
|
|
can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:fc-accuracy"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
, AlexNet's standard 2 hidden layers each of 4,096 nodes can be seen marked
|
|
for reference.
|
|
Each number of layers shows a peak with a steep ascent and a more gradual
|
|
descent, as the number of layers increases the nodes associated with the
|
|
peak also increases.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Additional Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/new-layer-kernel-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 test accuracy for varied kernel size of additional convolutional layers
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:new-layer"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
TODO
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Summary
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
A comparison of the best reported accuracies for the investigated architecture
|
|
changes can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:architecture-best-barh"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
Each of the investigated architecture changes was able to outperform AlexNet.
|
|
The largest increase was achieved by reducing the number of nodes in the
|
|
2 hidden dense layers from 4096 to 512 for a ~10% increase to 57%.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/best-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Best top-1 and Top-5 test accuracies for investigated architectures over
|
|
100 epochs with exponentially decaying learning rate,
|
|
\begin_inset Formula $\lambda=0.98$
|
|
\end_inset
|
|
|
|
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:architecture-best-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Discussion
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Discussion"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The batch size scaling inline with the training set growth was conducted
|
|
in an effort to control for the amount of extra training being conducted.
|
|
When comparing data augmentation methods, difficulty comes in comparing
|
|
processing methods which expand the training set by different amounts.
|
|
Synthetically larger datasets not only present the network with new perspective
|
|
s of the image but also train the network for longer.
|
|
A method to better control for this in the future could be to define a
|
|
constant expansion factor across processing methods and then compose this
|
|
extra training data of different proportions of augmentations (rotations
|
|
of varying angles and flips).
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
As presented, it can be seen that training a network beyond a threshold
|
|
number of epochs leads to diminishing performance as the network overfits
|
|
to the training set.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
From the reported results each investigation outperformed the standard AlexNet
|
|
architecture.
|
|
It is worth specifying the significance of this, however.
|
|
It would be inaccurate from these results to suggest that these derivative
|
|
architectures are better than AlexNet as the performance is a function
|
|
of the dataset, the specific dataset split used, the learning rate schedule
|
|
and number of epochs trained for.
|
|
Instead what is being stated is that, for the selected, specific values
|
|
of those, a more optimal architecture than the standard AlexNet was found.
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Conclusions
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Conclusions"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:bibliography"
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset CommandInset bibtex
|
|
LatexCommand bibtex
|
|
btprint "btPrintCited"
|
|
bibfiles "references"
|
|
options "bibtotoc"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
\start_of_appendix
|
|
Dataset Image Counts
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Dataset-Image-Counts"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float table
|
|
placement H
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Tabular
|
|
<lyxtabular version="3" rows="7" columns="4">
|
|
<features tabularvalignment="middle">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Number of Images
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Split
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Training
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Validation
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Test
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
90/5/5
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
14,566
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
809
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
810
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
80/10/10
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
12,948
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
1,618
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
1,619
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
70/15/15
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
11,329
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
2,427
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
2,429
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
50/25/25
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
8,092
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
4,046
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
4,047
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
50/5/45
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
8,092
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
809
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
7,284
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
</lyxtabular>
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Number of images in each subset for each evaluated split
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "tab:split-image-counts"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Training Set Expander Script
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset include
|
|
LatexCommand lstinputlisting
|
|
filename "../data_aug.py"
|
|
lstparams "caption={Python script for synthetically expanding a DIGITs training set},label={script:data_aug}"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_body
|
|
\end_document
|