#LyX 2.3 created this file. For more info see http://www.lyx.org/ \lyxformat 544 \begin_document \begin_header \save_transient_properties true \origin unavailable \textclass article \begin_preamble \def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]} \let\endchangemargin=\endlist \pagenumbering{roman} \usepackage{color} \definecolor{commentgreen}{RGB}{0,94,11} \end_preamble \use_default_options true \begin_modules customHeadersFooters minimalistic todonotes \end_modules \maintain_unincluded_children false \language british \language_package default \inputencoding auto \fontencoding global \font_roman "default" "default" \font_sans "default" "default" \font_typewriter "default" "default" \font_math "auto" "auto" \font_default_family default \use_non_tex_fonts false \font_sc false \font_osf false \font_sf_scale 100 100 \font_tt_scale 100 100 \use_microtype true \use_dash_ligatures true \graphics default \default_output_format default \output_sync 0 \bibtex_command biber \index_command default \paperfontsize 11 \spacing onehalf \use_hyperref true \pdf_title "Convolutional Neural Networks with DIGITS" \pdf_author "Andy Pack" \pdf_subject "EEEM063 Image Processing & Deep Learning" \pdf_keywords "EEEM063" \pdf_bookmarks true \pdf_bookmarksnumbered false \pdf_bookmarksopen false \pdf_bookmarksopenlevel 1 \pdf_breaklinks false \pdf_pdfborder true \pdf_colorlinks false \pdf_backref false \pdf_pdfusetitle true \papersize default \use_geometry true \use_package amsmath 1 \use_package amssymb 1 \use_package cancel 1 \use_package esint 1 \use_package mathdots 1 \use_package mathtools 1 \use_package mhchem 1 \use_package stackrel 1 \use_package stmaryrd 1 \use_package undertilde 1 \cite_engine biblatex \cite_engine_type authoryear \biblio_style plain \biblio_options urldate=long \biblatex_bibstyle ieee \biblatex_citestyle ieee \use_bibtopic false \use_indices false \paperorientation portrait \suppress_date true \justification true \use_refstyle 1 \use_minted 0 \index Index \shortcut idx \color #008000 \end_index \leftmargin 2cm \topmargin 2cm \rightmargin 2cm \bottommargin 2cm \secnumdepth 3 \tocdepth 3 \paragraph_separation skip \defskip medskip \is_math_indent 0 \math_numbering_side default \quotes_style british \dynamic_quotes 0 \papercolumns 1 \papersides 1 \paperpagestyle fancy \bullet 1 0 9 -1 \bullet 2 0 24 -1 \tracking_changes false \output_changes false \html_math_output 0 \html_css_as_file 0 \html_be_strict false \end_header \begin_body \begin_layout Title \size giant Convolutional Neural Networks with DIGITS \end_layout \begin_layout Author Andy Pack \end_layout \begin_layout Standard \begin_inset VSpace 15pheight% \end_inset \end_layout \begin_layout Standard \align center \begin_inset Graphics filename surrey.png lyxscale 15 width 40col% \end_inset \end_layout \begin_layout Standard \begin_inset VSpace vfill \end_inset \end_layout \begin_layout Standard \noindent \align center EEEM063 \begin_inset Newline newline \end_inset May 2021 \size large \begin_inset Newline newline \end_inset Department of Electrical and Electronic Engineering \begin_inset Newline newline \end_inset Faculty of Engineering and Physical Sciences \begin_inset Newline newline \end_inset University of Surrey \end_layout \begin_layout Standard \begin_inset Newpage newpage \end_inset \end_layout \begin_layout Abstract abstract \end_layout \begin_layout Standard \begin_inset CommandInset toc LatexCommand tableofcontents \end_inset \end_layout \begin_layout List of TODOs \end_layout \begin_layout Standard \begin_inset Newpage newpage \end_inset \end_layout \begin_layout Standard \begin_inset FloatList figure \end_inset \end_layout \begin_layout Standard \begin_inset FloatList table \end_inset \end_layout \begin_layout Standard \begin_inset CommandInset toc LatexCommand lstlistoflistings \end_inset \end_layout \begin_layout Standard \begin_inset Newpage newpage \end_inset \end_layout \begin_layout Right Footer Andy Pack / 6420013 \end_layout \begin_layout Left Footer May 2021 \end_layout \begin_layout Left Header EEEM063 Coursework \end_layout \begin_layout Standard \begin_inset ERT status open \begin_layout Plain Layout \backslash pagenumbering{arabic} \end_layout \begin_layout Plain Layout \backslash setcounter{page}{1} \end_layout \end_inset \end_layout \begin_layout Section Introduction \end_layout \begin_layout Standard Although much of the theory for convolutional neural networks (CNNs) was developed throughout the 20th century, their importance to the field of computer vision was not widely appreciated until the early 2010s. \begin_inset Flex TODO Note (inline) status open \begin_layout Plain Layout More context \end_layout \end_inset \end_layout \begin_layout Standard Although CNNs can appear opaque when attempting to understand how decisions are made, they are not black boxes and there are many ways to affect a model's performance. This work presents investigations into how a CNN's performance is affected by the subject dataset, the architecture of the network and the parameters used when training. Section \begin_inset CommandInset ref LatexCommand ref reference "sec:Investigations-Scope" plural "false" caps "false" noprefix "false" \end_inset outlines the scope of the investigations made herein, describing the motivation for the variations and expectations as to how this would affect performance. The results for these investigations are presented in section \begin_inset CommandInset ref LatexCommand ref reference "sec:Results" plural "false" caps "false" noprefix "false" \end_inset with interpretations made in the following section. Section \begin_inset CommandInset ref LatexCommand ref reference "sec:Conclusions" plural "false" caps "false" noprefix "false" \end_inset summarises and concludes the work. \end_layout \begin_layout Section Investigations Scope \begin_inset CommandInset label LatexCommand label name "sec:Investigations-Scope" \end_inset \end_layout \begin_layout Standard The investigations presented in this work use the Stanford Cars dataset \begin_inset CommandInset citation LatexCommand cite key "cars" literal "false" \end_inset , a selection of 16,185 images of 196 different classes of car. In terms of network architecture, the seminal AlexNet \begin_inset CommandInset citation LatexCommand cite key "alexnet" literal "false" \end_inset was used as the template for the investigations presented. \end_layout \begin_layout Subsection Dataset Processing \end_layout \begin_layout Standard Prior to more in-depth investigations, how the dataset is divided into training, validation and test data was investigated in order to identify a suitable proportion for later work. As a fixed size dataset, a balance must be struck between how much is reserved for training the network and how much should be used to evaluate the network. Throughout this paper, the term \emph on split \emph default will be used to denote a single division of the dataset into the three required subsets. \end_layout \begin_layout Standard Although the dataset is of a fixed size, there are methods to artificially grow the training images by performing image manipulations such as rotations and zooms. This attempts to teach the network to learn invariance to such transforms during classification. \end_layout \begin_layout Subsection Meta-Parameters \end_layout \begin_layout Standard \begin_inset Flex TODO Note (inline) status open \begin_layout Plain Layout Epochs/learning rate/momentum? \end_layout \end_inset \end_layout \begin_layout Subsection Network Architectures \end_layout \begin_layout Subsubsection Convolutional Layers \end_layout \begin_layout Subsubsection Fully-Connected Layers \end_layout \begin_layout Standard Following the convolutional stages there are three dense or fully-connected layers which provide two key features in image classification. The first is flattening the 2D cross-section of the preceding convolutional layers into a 1D representation for propagation to a final one-hot vector output. The second is as a traditional multi-layer perceptron classifier, taking the high-level visual insights of the later convolutional layers and reasoning these into a final classification. When treated as an MLP, these can instead be considered as 2 hidden layers and a single output layer. The reason for designating the last layer separately is the level to which it is fixed when varying the classifier as a whole. The number of neurons in this layer remains equal to the number of classes in the dataset in order to form a one-hot vector output when the network makes a classification. \end_layout \begin_layout Subsubsection Non-Linearity \end_layout \begin_layout Standard The inclusion of non-linear layers throughout AlexNet is critical to it's ability to learn complex insights into a dataset. Convolution as a mathematical operation can be proven to be associative \begin_inset Flex TODO Note (Margin) status open \begin_layout Plain Layout Fubini's theorem \end_layout \end_inset in a similar fashion to multiplication. This means that consecutive convolutions can be collapsed into a single operation, for example multiple filters can be merged into a single compound operation for less expensive application to an image. \end_layout \begin_layout Section Results \begin_inset CommandInset label LatexCommand label name "sec:Results" \end_inset \end_layout \begin_layout Subsection Dataset \end_layout \begin_layout Subsubsection Train/Validation/Test Proportions \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/split-investigations/split-barh.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Top-1 and Top-5 test accuracy for different train/validation/test proportions \begin_inset CommandInset label LatexCommand label name "fig:split-barh" \end_inset \end_layout \end_inset \end_layout \begin_layout Plain Layout \end_layout \end_inset \end_layout \begin_layout Standard Different splits of the cars dataset were made, the test accuracies can be seen in figure \begin_inset CommandInset ref LatexCommand ref reference "fig:split-barh" plural "false" caps "false" noprefix "false" \end_inset and the number of images in each subset can be seen in appendix \begin_inset CommandInset ref LatexCommand ref reference "sec:Dataset-Image-Counts" plural "false" caps "false" noprefix "false" \end_inset . A fixed learning rate of 0.001 was used over 100 epochs. Increasing the proportion of data reserved for training the model can be seen to increase the classification accuracy while varying the proportion between the test and validation split has little effect. The 80/10/10 split was deemed an appropriate balance in proportion and , unless otherwise stated, the 80/10/10 split is used for later experiments. \end_layout \begin_layout Subsubsection Data Augmentation \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/data-aug-investigations/rot-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Batch size = 128, AlexNet's default \begin_inset CommandInset label LatexCommand label name "fig:rot-128b" \end_inset \end_layout \end_inset \end_layout \end_inset \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/data-aug-investigations/rot-accuracy-256batch.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Batch size = 256 \begin_inset CommandInset label LatexCommand label name "fig:rot-256b" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies for different rotation degrees when augmenting the training set \begin_inset CommandInset label LatexCommand label name "fig:rot-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/data-aug-investigations/best-barh.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Batch size = 128 \end_layout \end_inset \end_layout \end_inset \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/data-aug-investigations/best-barh-256batch.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Batch size = \begin_inset Formula $128E$ \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Best top-1 and Top-5 test accuracies for different data augmentation methods \begin_inset CommandInset label LatexCommand label name "fig:data-aug-best-barh" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Subsection Meta-Parameters \end_layout \begin_layout Subsubsection Fixed Learning Rate \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/fixed-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies \begin_inset CommandInset label LatexCommand label name "fig:fixed-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/fixed-loss.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Final validation loss \begin_inset CommandInset label LatexCommand label name "fig:fixed-loss" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Fixed learning schedules results over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:fixed-results" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Standard In order to evaluate different learning rates and learning schedules, initial investigations were made across six decades of a fixed learning rate over 100 epochs. The accuracies and final validation loss can be seen in figure \begin_inset CommandInset ref LatexCommand ref reference "fig:fixed-results" plural "false" caps "false" noprefix "false" \end_inset . For a fixed learning rate, values between 0.01 and 0.001 gave the best accuracy with values both larger or smaller giving a top-1 accuracy less than 10%. The final validation loss can be seen to generally follow the inverse trend, the lowest values lie between 0.01 and 0.001. Interestingly, a learning rate of \begin_inset Formula $1\times10^{-4}$ \end_inset had a validation loss that somewhat interpolated the surrounding values for \begin_inset Formula $1\times10^{-5}$ \end_inset and \begin_inset Formula $1\times10^{-3}$ \end_inset however the accuracy does not reflect this, instead being much lower at 2%. \begin_inset Flex TODO Note (Margin) status open \begin_layout Plain Layout reword? \end_layout \end_inset \end_layout \begin_layout Subsubsection Step-Down \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/step-down-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies for different step-down learning schedules over 100 epochs. Step of 1/3 for 2 learning rate drops \begin_inset CommandInset label LatexCommand label name "fig:step-down-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Subsubsection Exponential \end_layout \begin_layout Standard Different exponential learning decay rates were investigated, the results can be seen in figure \begin_inset CommandInset ref LatexCommand ref reference "fig:exp-results" plural "false" caps "false" noprefix "false" \end_inset . From these results, a slow decay rate can be seen to give the best results, values between 0.95 and 0.99 gave the highest accuracies. \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/exp-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies \begin_inset CommandInset label LatexCommand label name "fig:exp-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/exp-loss.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Final validation loss \begin_inset CommandInset label LatexCommand label name "fig:exp-loss" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies for different exponentially decaying learning schedules over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:exp-results" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Subsubsection Sigmoid \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/sig-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Test accuracies for different sigmoid learning schedules over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:sig-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/lr-investigations/best-barh.png lyxscale 30 width 60col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Best top-1 and Top-5 test accuracies for different learning schedules over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:lr-best-barh" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Subsection Network Architectures \end_layout \begin_layout Subsubsection Convolutional Kernel Size \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/architecture-investigations/kernel-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Top-1 test accuracy for different convolutional kernel sizes over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:kernel-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Subsubsection Fully-Connected Layers \end_layout \begin_layout Standard \begin_inset Float figure wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Graphics filename ../cars/architecture-investigations/fc-accuracy.png lyxscale 30 width 50col% \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Top-1 test accuracy for different fully-connected layer shapes over 100 epochs \begin_inset CommandInset label LatexCommand label name "fig:fc-accuracy" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \begin_layout Section Discussion \begin_inset CommandInset label LatexCommand label name "sec:Discussion" \end_inset \end_layout \begin_layout Subsection Dataset \end_layout \begin_layout Standard Although the \end_layout \begin_layout Subsection Meta-Parameters \end_layout \begin_layout Subsection Network Architectures \end_layout \begin_layout Section Conclusions \begin_inset CommandInset label LatexCommand label name "sec:Conclusions" \end_inset \end_layout \begin_layout Standard \begin_inset Newpage newpage \end_inset \end_layout \begin_layout Standard \begin_inset CommandInset label LatexCommand label name "sec:bibliography" \end_inset \begin_inset CommandInset bibtex LatexCommand bibtex btprint "btPrintCited" bibfiles "references" options "bibtotoc" \end_inset \end_layout \begin_layout Section \start_of_appendix Dataset Image Counts \begin_inset CommandInset label LatexCommand label name "sec:Dataset-Image-Counts" \end_inset \end_layout \begin_layout Standard \begin_inset Float table placement H wide false sideways false status open \begin_layout Plain Layout \noindent \align center \begin_inset Tabular \begin_inset Text \begin_layout Plain Layout \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Number of Images \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Split \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Training \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Validation \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout Test \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 90/5/5 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 14,566 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 809 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 810 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 80/10/10 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 12,948 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1,618 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 1,619 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 70/15/15 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 11,329 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 2,427 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 2,429 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 50/25/25 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 8,092 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 4,046 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 4,047 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 50/5/45 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 8,092 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 809 \end_layout \end_inset \begin_inset Text \begin_layout Plain Layout 7,284 \end_layout \end_inset \end_inset \end_layout \begin_layout Plain Layout \begin_inset Caption Standard \begin_layout Plain Layout Number of images in each subset for each evaluated split \begin_inset CommandInset label LatexCommand label name "tab:split-image-counts" \end_inset \end_layout \end_inset \end_layout \end_inset \end_layout \end_body \end_document