1726 lines
28 KiB
Plaintext
1726 lines
28 KiB
Plaintext
#LyX 2.3 created this file. For more info see http://www.lyx.org/
|
|
\lyxformat 544
|
|
\begin_document
|
|
\begin_header
|
|
\save_transient_properties true
|
|
\origin unavailable
|
|
\textclass article
|
|
\begin_preamble
|
|
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
|
|
\let\endchangemargin=\endlist
|
|
\pagenumbering{roman}
|
|
|
|
\usepackage{color}
|
|
|
|
\definecolor{commentgreen}{RGB}{0,94,11}
|
|
\end_preamble
|
|
\use_default_options true
|
|
\begin_modules
|
|
customHeadersFooters
|
|
minimalistic
|
|
todonotes
|
|
\end_modules
|
|
\maintain_unincluded_children false
|
|
\language british
|
|
\language_package default
|
|
\inputencoding auto
|
|
\fontencoding global
|
|
\font_roman "default" "default"
|
|
\font_sans "default" "default"
|
|
\font_typewriter "default" "default"
|
|
\font_math "auto" "auto"
|
|
\font_default_family default
|
|
\use_non_tex_fonts false
|
|
\font_sc false
|
|
\font_osf false
|
|
\font_sf_scale 100 100
|
|
\font_tt_scale 100 100
|
|
\use_microtype true
|
|
\use_dash_ligatures true
|
|
\graphics default
|
|
\default_output_format default
|
|
\output_sync 0
|
|
\bibtex_command biber
|
|
\index_command default
|
|
\paperfontsize 11
|
|
\spacing onehalf
|
|
\use_hyperref true
|
|
\pdf_title "Convolutional Neural Networks with DIGITS"
|
|
\pdf_author "Andy Pack"
|
|
\pdf_subject "EEEM063 Image Processing & Deep Learning"
|
|
\pdf_keywords "EEEM063"
|
|
\pdf_bookmarks true
|
|
\pdf_bookmarksnumbered false
|
|
\pdf_bookmarksopen false
|
|
\pdf_bookmarksopenlevel 1
|
|
\pdf_breaklinks false
|
|
\pdf_pdfborder true
|
|
\pdf_colorlinks false
|
|
\pdf_backref false
|
|
\pdf_pdfusetitle true
|
|
\papersize default
|
|
\use_geometry true
|
|
\use_package amsmath 1
|
|
\use_package amssymb 1
|
|
\use_package cancel 1
|
|
\use_package esint 1
|
|
\use_package mathdots 1
|
|
\use_package mathtools 1
|
|
\use_package mhchem 1
|
|
\use_package stackrel 1
|
|
\use_package stmaryrd 1
|
|
\use_package undertilde 1
|
|
\cite_engine biblatex
|
|
\cite_engine_type authoryear
|
|
\biblio_style plain
|
|
\biblio_options urldate=long
|
|
\biblatex_bibstyle ieee
|
|
\biblatex_citestyle ieee
|
|
\use_bibtopic false
|
|
\use_indices false
|
|
\paperorientation portrait
|
|
\suppress_date true
|
|
\justification true
|
|
\use_refstyle 1
|
|
\use_minted 0
|
|
\index Index
|
|
\shortcut idx
|
|
\color #008000
|
|
\end_index
|
|
\leftmargin 2cm
|
|
\topmargin 2cm
|
|
\rightmargin 2cm
|
|
\bottommargin 2cm
|
|
\secnumdepth 3
|
|
\tocdepth 3
|
|
\paragraph_separation skip
|
|
\defskip medskip
|
|
\is_math_indent 0
|
|
\math_numbering_side default
|
|
\quotes_style british
|
|
\dynamic_quotes 0
|
|
\papercolumns 1
|
|
\papersides 1
|
|
\paperpagestyle fancy
|
|
\bullet 1 0 9 -1
|
|
\bullet 2 0 24 -1
|
|
\tracking_changes false
|
|
\output_changes false
|
|
\html_math_output 0
|
|
\html_css_as_file 0
|
|
\html_be_strict false
|
|
\end_header
|
|
|
|
\begin_body
|
|
|
|
\begin_layout Title
|
|
|
|
\size giant
|
|
Convolutional Neural Networks with DIGITS
|
|
\end_layout
|
|
|
|
\begin_layout Author
|
|
Andy Pack
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset VSpace 15pheight%
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename surrey.png
|
|
lyxscale 15
|
|
width 40col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset VSpace vfill
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\noindent
|
|
\align center
|
|
EEEM063
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
May 2021
|
|
\size large
|
|
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
Department of Electrical and Electronic Engineering
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
Faculty of Engineering and Physical Sciences
|
|
\begin_inset Newline newline
|
|
\end_inset
|
|
|
|
University of Surrey
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Abstract
|
|
abstract
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset toc
|
|
LatexCommand tableofcontents
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout List of TODOs
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset FloatList figure
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset FloatList table
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset toc
|
|
LatexCommand lstlistoflistings
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Right Footer
|
|
Andy Pack / 6420013
|
|
\end_layout
|
|
|
|
\begin_layout Left Footer
|
|
May 2021
|
|
\end_layout
|
|
|
|
\begin_layout Left Header
|
|
EEEM063 Coursework
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset ERT
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
|
|
\backslash
|
|
pagenumbering{arabic}
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
|
|
\backslash
|
|
setcounter{page}{1}
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Introduction
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although much of the theory for convolutional neural networks (CNNs) was
|
|
developed throughout the 20th century, their importance to the field of
|
|
computer vision was not widely appreciated until the early 2010s.
|
|
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
More context
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although CNNs can appear opaque when attempting to understand how decisions
|
|
are made, they are not black boxes and there are many ways to affect a
|
|
model's performance.
|
|
This work presents investigations into how a CNN's performance is affected
|
|
by the subject dataset, the architecture of the network and the parameters
|
|
used when training.
|
|
Section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Investigations-Scope"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
outlines the scope of the investigations made herein, describing the motivation
|
|
for the variations and expectations as to how this would affect performance.
|
|
The results for these investigations are presented in section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
with interpretations made in the following section.
|
|
Section
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Conclusions"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
summarises and concludes the work.
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Investigations Scope
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Investigations-Scope"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The investigations presented in this work use the Stanford Cars dataset
|
|
|
|
\begin_inset CommandInset citation
|
|
LatexCommand cite
|
|
key "cars"
|
|
literal "false"
|
|
|
|
\end_inset
|
|
|
|
, a selection of 16,185 images of 196 different classes of car.
|
|
In terms of network architecture, the seminal AlexNet
|
|
\begin_inset CommandInset citation
|
|
LatexCommand cite
|
|
key "alexnet"
|
|
literal "false"
|
|
|
|
\end_inset
|
|
|
|
was used as the template for the investigations presented.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset Processing
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Prior to more in-depth investigations, how the dataset is divided into training,
|
|
validation and test data was investigated in order to identify a suitable
|
|
proportion for later work.
|
|
As a fixed size dataset, a balance must be struck between how much is reserved
|
|
for training the network and how much should be used to evaluate the network.
|
|
Throughout this paper, the term
|
|
\emph on
|
|
split
|
|
\emph default
|
|
will be used to denote a single division of the dataset into the three
|
|
required subsets.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although the dataset is of a fixed size, there are methods to artificially
|
|
grow the training images by performing image manipulations such as rotations
|
|
and zooms.
|
|
This attempts to teach the network to learn invariance to such transforms
|
|
during classification.
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Flex TODO Note (inline)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
Epochs/learning rate/momentum?
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Convolutional Layers
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fully-Connected Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Following the convolutional stages there are three dense or fully-connected
|
|
layers which provide two key features in image classification.
|
|
The first is flattening the 2D cross-section of the preceding convolutional
|
|
layers into a 1D representation for propagation to a final one-hot vector
|
|
output.
|
|
The second is as a traditional multi-layer perceptron classifier, taking
|
|
the high-level visual insights of the later convolutional layers and reasoning
|
|
these into a final classification.
|
|
When treated as an MLP, these can instead be considered as 2 hidden layers
|
|
and a single output layer.
|
|
The reason for designating the last layer separately is the level to which
|
|
it is fixed when varying the classifier as a whole.
|
|
The number of neurons in this layer remains equal to the number of classes
|
|
in the dataset in order to form a one-hot vector output when the network
|
|
makes a classification.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Non-Linearity
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
The inclusion of non-linear layers throughout AlexNet is critical to it's
|
|
ability to learn complex insights into a dataset.
|
|
Convolution as a mathematical operation can be proven to be associative
|
|
\begin_inset Flex TODO Note (Margin)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
Fubini's theorem
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
in a similar fashion to multiplication.
|
|
This means that consecutive convolutions can be collapsed into a single
|
|
operation, for example multiple filters can be merged into a single compound
|
|
operation for less expensive application to an image.
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Results
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Train/Validation/Test Proportions
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/split-investigations/split-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 and Top-5 test accuracy for different train/validation/test proportions
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:split-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Different splits of the cars dataset were made, the test accuracies can
|
|
be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:split-barh"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
and the number of images in each subset can be seen in appendix
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "sec:Dataset-Image-Counts"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
A fixed learning rate of 0.001 was used over 100 epochs.
|
|
Increasing the proportion of data reserved for training the model can be
|
|
seen to increase the classification accuracy while varying the proportion
|
|
between the test and validation split has little effect.
|
|
The 80/10/10 split was deemed an appropriate balance in proportion and
|
|
, unless otherwise stated, the 80/10/10 split is used for later experiments.
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Data Augmentation
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/rot-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 128, AlexNet's default
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-128b"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/rot-accuracy-256batch.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 256
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-256b"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different rotation degrees when augmenting the training
|
|
set
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:rot-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/best-barh.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size = 128
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/data-aug-investigations/best-barh-256batch.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Batch size =
|
|
\begin_inset Formula $128E$
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Best top-1 and Top-5 test accuracies for different data augmentation methods
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:data-aug-best-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fixed Learning Rate
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/fixed-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/fixed-loss.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Final validation loss
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-loss"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Fixed learning schedules results over 100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fixed-results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
In order to evaluate different learning rates and learning schedules, initial
|
|
investigations were made across six decades of a fixed learning rate over
|
|
100 epochs.
|
|
The accuracies and final validation loss can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:fixed-results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
For a fixed learning rate, values between 0.01 and 0.001 gave the best accuracy
|
|
with values both larger or smaller giving a top-1 accuracy less than 10%.
|
|
The final validation loss can be seen to generally follow the inverse trend,
|
|
the lowest values lie between 0.01 and 0.001.
|
|
Interestingly, a learning rate of
|
|
\begin_inset Formula $1\times10^{-4}$
|
|
\end_inset
|
|
|
|
had a validation loss that somewhat interpolated the surrounding values
|
|
for
|
|
\begin_inset Formula $1\times10^{-5}$
|
|
\end_inset
|
|
|
|
and
|
|
\begin_inset Formula $1\times10^{-3}$
|
|
\end_inset
|
|
|
|
however the accuracy does not reflect this, instead being much lower at
|
|
2%.
|
|
\begin_inset Flex TODO Note (Margin)
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
reword?
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Step-Down
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/step-down-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different step-down learning schedules over 100 epochs.
|
|
Step of 1/3 for 2 learning rate drops
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:step-down-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Exponential
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Different exponential learning decay rates were investigated, the results
|
|
can be seen in figure
|
|
\begin_inset CommandInset ref
|
|
LatexCommand ref
|
|
reference "fig:exp-results"
|
|
plural "false"
|
|
caps "false"
|
|
noprefix "false"
|
|
|
|
\end_inset
|
|
|
|
.
|
|
From these results, a slow decay rate can be seen to give the best results,
|
|
values between 0.95 and 0.99 gave the highest accuracies.
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/exp-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/exp-loss.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Final validation loss
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-loss"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different exponentially decaying learning schedules
|
|
over 100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:exp-results"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Sigmoid
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/sig-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Test accuracies for different sigmoid learning schedules over 100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:sig-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/lr-investigations/best-barh.png
|
|
lyxscale 30
|
|
width 60col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Best top-1 and Top-5 test accuracies for different learning schedules over
|
|
100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:lr-best-barh"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Convolutional Kernel Size
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/kernel-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 test accuracy for different convolutional kernel sizes over 100 epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:kernel-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsubsection
|
|
Fully-Connected Layers
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float figure
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Graphics
|
|
filename ../cars/architecture-investigations/fc-accuracy.png
|
|
lyxscale 30
|
|
width 50col%
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Top-1 test accuracy for different fully-connected layer shapes over 100
|
|
epochs
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "fig:fc-accuracy"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Discussion
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Discussion"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Dataset
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
Although the
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Meta-Parameters
|
|
\end_layout
|
|
|
|
\begin_layout Subsection
|
|
Network Architectures
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
Conclusions
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Conclusions"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Newpage newpage
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:bibliography"
|
|
|
|
\end_inset
|
|
|
|
|
|
\begin_inset CommandInset bibtex
|
|
LatexCommand bibtex
|
|
btprint "btPrintCited"
|
|
bibfiles "references"
|
|
options "bibtotoc"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Section
|
|
\start_of_appendix
|
|
Dataset Image Counts
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "sec:Dataset-Image-Counts"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Standard
|
|
\begin_inset Float table
|
|
placement H
|
|
wide false
|
|
sideways false
|
|
status open
|
|
|
|
\begin_layout Plain Layout
|
|
\noindent
|
|
\align center
|
|
\begin_inset Tabular
|
|
<lyxtabular version="3" rows="7" columns="4">
|
|
<features tabularvalignment="middle">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<column alignment="center" valignment="top">
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="1" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Number of Images
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell multicolumn="2" alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Split
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Training
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Validation
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
Test
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
90/5/5
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
14,566
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
809
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
810
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
80/10/10
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
12,948
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
1,618
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
1,619
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
70/15/15
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
11,329
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
2,427
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
2,429
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
50/25/25
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
8,092
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
4,046
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
4,047
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
<row>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
50/5/45
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
8,092
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
809
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
|
|
\begin_inset Text
|
|
|
|
\begin_layout Plain Layout
|
|
7,284
|
|
\end_layout
|
|
|
|
\end_inset
|
|
</cell>
|
|
</row>
|
|
</lyxtabular>
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\begin_layout Plain Layout
|
|
\begin_inset Caption Standard
|
|
|
|
\begin_layout Plain Layout
|
|
Number of images in each subset for each evaluated split
|
|
\begin_inset CommandInset label
|
|
LatexCommand label
|
|
name "tab:split-image-counts"
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_inset
|
|
|
|
|
|
\end_layout
|
|
|
|
\end_body
|
|
\end_document
|