#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\begin_preamble
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
\let\endchangemargin=\endlist 
\pagenumbering{gobble}

\usepackage{pxfonts}
\usepackage{color}

\definecolor{commentgreen}{RGB}{0,94,11}
\definecolor{darkblue}{rgb}{0,0,0.75}
\definecolor{darkred}{rgb}{0.6,0,0}
\end_preamble
\use_default_options true
\begin_modules
customHeadersFooters
minimalistic
todonotes
\end_modules
\maintain_unincluded_children false
\language british
\language_package default
\inputencoding utf8
\fontencoding global
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype true
\use_dash_ligatures true
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command biber
\index_command default
\paperfontsize default
\spacing onehalf
\use_hyperref true
\pdf_title "Training Neural Networks With Backpropagation"
\pdf_author "Andy Pack"
\pdf_subject "EEEM005"
\pdf_keywords "EEEM005"
\pdf_bookmarks true
\pdf_bookmarksnumbered false
\pdf_bookmarksopen false
\pdf_bookmarksopenlevel 1
\pdf_breaklinks false
\pdf_pdfborder true
\pdf_colorlinks false
\pdf_backref false
\pdf_pdfusetitle true
\papersize default
\use_geometry true
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine biblatex
\cite_engine_type authoryear
\biblio_style plain
\biblio_options urldate=long
\biblatex_bibstyle ieee
\biblatex_citestyle ieee
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date true
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 1.8cm
\topmargin 2cm
\rightmargin 1.8cm
\bottommargin 2cm
\secnumdepth 3
\tocdepth 3
\paragraph_separation skip
\defskip medskip
\is_math_indent 0
\math_numbering_side default
\quotes_style british
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle fancy
\listings_params "language=Python,breaklines=true,frame=tb,otherkeywords={self},emph={State},emphstyle={\ttb\color{darkred}},basicstyle={\ttfamily},commentstyle={\bfseries\color{commentgreen}\itshape},keywordstyle={\color{darkblue}},emphstyle={\color{red}},stringstyle={\color{red}}"
\bullet 1 0 9 -1
\bullet 2 0 24 -1
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Title

\size giant
Training Neural Networks with Backpropagation
\end_layout

\begin_layout Author
Andy Pack
\end_layout

\begin_layout Standard
\begin_inset VSpace 15pheight%
\end_inset


\end_layout

\begin_layout Standard
\align center
\begin_inset Graphics
	filename surrey.png
	lyxscale 15
	width 40col%

\end_inset


\end_layout

\begin_layout Standard
\begin_inset VSpace vfill
\end_inset


\end_layout

\begin_layout Standard
\noindent
\align center
EEEM005
\begin_inset Newline newline
\end_inset

May 2021
\size large

\begin_inset Newline newline
\end_inset

Department of Electrical and Electronic Engineering
\begin_inset Newline newline
\end_inset

Faculty of Engineering and Physical Sciences
\begin_inset Newline newline
\end_inset

University of Surrey
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Section*
Executive Summary
\end_layout

\begin_layout Standard
Summary here
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
pagenumbering{roman}
\end_layout

\end_inset


\end_layout

\begin_layout Abstract
abstract
\end_layout

\begin_layout Standard
\begin_inset CommandInset toc
LatexCommand tableofcontents

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Newpage pagebreak
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FloatList figure

\end_inset


\end_layout

\begin_layout Standard
\begin_inset FloatList table

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Newpage pagebreak
\end_inset


\end_layout

\begin_layout Right Footer
Andy Pack / 6420013
\end_layout

\begin_layout Left Footer
May 2021
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
pagenumbering{arabic} 
\end_layout

\begin_layout Plain Layout


\backslash
setcounter{page}{1}
\end_layout

\end_inset


\end_layout

\begin_layout Section
Introduction
\end_layout

\begin_layout Standard
Artificial neural networks have been the object of research and investigation
 since the 1940s with 
\noun on
McCulloch
\noun default
 and 
\noun on
Pitts
\noun default
' model of the artificial neuron
\begin_inset CommandInset citation
LatexCommand cite
key "McCulloch1943"
literal "false"

\end_inset

 or 
\emph on
Threshold Logic Unit
\emph default
.
 Throughout the century, the development of the single and multi-layer perceptro
ns (SLP/MLP) alongside the backpropagation algorithm
\begin_inset CommandInset citation
LatexCommand cite
key "Rumelhart1986"
literal "false"

\end_inset

 advanced the study of artificial intelligence.
 Throughout the 2010s, convolutional neural networks have proved critical
 in the field of computer vision and image recognition
\begin_inset CommandInset citation
LatexCommand cite
key "alexnet"
literal "false"

\end_inset

.
\end_layout

\begin_layout Standard
This work investigates the ability of a shallow multi-layer perceptron to
 classify breast tumours as either benign or malignant.
 The architecture and parameters were varied before exploring how in order
 to evaluate how this affects performance.
 
\end_layout

\begin_layout Standard
Investigations were carried out in 
\noun on
Python
\noun default
 using the 
\noun on
TensorFlow
\noun default
 package to construct, train and evaluate neural networks.
 The networks were trained using a supervised learning curriculum of labelled
 data taken from a standard 
\noun on
MatLab
\noun default
 dataset
\begin_inset CommandInset citation
LatexCommand cite
key "matlab-dataset"
literal "false"

\end_inset

 from the 
\noun on
Deep Learning Toolbox
\noun default
.
\end_layout

\begin_layout Standard
Section 
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:exp1"
plural "false"
caps "false"
noprefix "false"

\end_inset

 investigates the effect of varying the number of hidden nodes on test accuracy
 along with the number of epochs that the MLPs are trained for.
 Section 
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:exp2"
plural "false"
caps "false"
noprefix "false"

\end_inset

 builds on the previous experiment by using reasonable parameter values
 to investigate performance when using an ensemble of models to classify
 in conjunction.
 The effect of varying the number of nodes and epochs throughout the ensemble
 was considered in order to determine whether combining multiple models
 could produce a better accuracy than any individual model.
 Section 
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:exp3"
plural "false"
caps "false"
noprefix "false"

\end_inset

 investigates the effect of altering how the networks learn by changing
 the optimisation algorithm.
 Two additional algorithms to the previously used are considered and compared
 using the same test apparatus of section 
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:exp2"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Section
Hidden Nodes & Epochs
\begin_inset CommandInset label
LatexCommand label
name "sec:exp1"

\end_inset


\end_layout

\begin_layout Standard
This section investigates the effect of varying the number of nodes in the
 single hidden layer of a shallow multi-layer perceptron.
 This is compared to the effect of training the model with different numbers
 of epochs.
 Throughout the experiment, stochastic gradient descent with momentum is
 used as the optimiser, variations in both momentum and learning rate are
 presented.
 
\end_layout

\begin_layout Subsection
Results
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename ../graphs/exp1-test1-error-rate-curves.png
	lyxscale 50
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Varied hidden node performance results over varied training lengths for
 
\begin_inset Formula $\eta=0.01$
\end_inset

, 
\begin_inset Formula $p=0$
\end_inset


\begin_inset CommandInset label
LatexCommand label
name "fig:exp1-test1"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
Figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp1-test1"
plural "false"
caps "false"
noprefix "false"

\end_inset

 visualises the performance of hidden nodes up to 256 over training periods
 up to 200 epochs in length.
 In general, the error rate can be seen to decrease when the models are
 trained for longer.
 Increasing the number of nodes decreases the error rate and increases the
 gradient with which it falls up to a limit.
 64, 128 and 256 hidden nodes lie close together as the increases in performance
 slow.
 Between 0 and 25 epochs, the error rate throughout for any number of nodes
 can descend little below 0.35.
 The number of epochs to overcome this plateau is different for each number
 of nodes.
\end_layout

\begin_layout Standard
The standard deviations for the above discussed results of figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp1-test1"
plural "false"
caps "false"
noprefix "false"

\end_inset

 can be seen in figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp1-test1-std"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
 As the network starts training, the standard deviation decreases to a minimum
 between 
\begin_inset Formula $10-20$
\end_inset

 epochs before increasing to a peak at 64.
 As the number of hidden nodes increases, the standard deviation decreases.
 The initial drop is sharper and the 64 epoch peak increases higher.
 
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename /mnt/files/dev/py/shallow-training/graphs/exp1-test1-test-train-error-rate-std.png
	lyxscale 50
	width 60col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Standard deviation of results from figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp1-test1"
plural "false"
caps "false"
noprefix "false"

\end_inset

 with 
\begin_inset Formula $\eta=0.01$
\end_inset

, 
\begin_inset Formula $p=0$
\end_inset


\begin_inset CommandInset label
LatexCommand label
name "fig:exp1-test1-std"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename /mnt/files/dev/py/shallow-training/graphs/exp1-test2-2-error-rate-curves.png
	lyxscale 50
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Varied hidden node performance results over varied training lengths for
 
\begin_inset Formula $\eta=0.1$
\end_inset

, 
\begin_inset Formula $p=0$
\end_inset


\begin_inset CommandInset label
LatexCommand label
name "fig:exp1-test2-2"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Discussion
\end_layout

\begin_layout Section
Ensemble Classification
\begin_inset CommandInset label
LatexCommand label
name "sec:exp2"

\end_inset


\end_layout

\begin_layout Standard
A horizontal ensemble of 
\begin_inset Formula $m$
\end_inset

 models was constructed with majority vote in order to investigate whether
 this could improve performance over that of any single model.
 In order to introduce variation between models of the ensemble, a range
 for hidden nodes and epochs could be defined.
 When selecting parameters throughout the ensemble, the models are equally
 distributed throughout the ranges
\begin_inset Foot
status open

\begin_layout Plain Layout
For 
\begin_inset Formula $m=1$
\end_inset

, the average of the range is taken
\end_layout

\end_inset

.
 
\end_layout

\begin_layout Standard
The statistic 
\emph on
agreement
\emph default
, 
\begin_inset Formula $a$
\end_inset

, is defined as the proportion of models under the meta-classifier that
 correctly predict a sample's class when the ensemble correctly classifies.
 It could also be considered the confidence of the meta-classifier, for
 one horizontal model 
\begin_inset Formula $a_{m=1}=1$
\end_inset

.
 As error rates are presented, this is inverted by 
\begin_inset Formula $1-a$
\end_inset

 to 
\emph on
disagreement
\emph default
, 
\begin_inset Formula $d$
\end_inset

, the proportion of incorrect models when correctly group classifying.
\end_layout

\begin_layout Subsection
Results
\end_layout

\begin_layout Standard
For comparison, the average individual accuracy for both test and training
 data are presented.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename ../graphs/exp2-test8-error-rate-curves.png
	lyxscale 50
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Ensemble classifier performance results for 
\begin_inset Formula $\eta=0.03$
\end_inset

, 
\begin_inset Formula $p=0.01$
\end_inset

, nodes = 1 - 400, epochs = 5 - 100
\begin_inset CommandInset label
LatexCommand label
name "fig:exp2-test8"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Standard
An experiment with a fixed epoch value throughout the ensemble is presented
 in figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp2-test10"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
 Nodes between 1 and 400 were selected for the classifiers with a learning
 rate, 
\begin_inset Formula $\eta=0.15$
\end_inset

 and momentum, 
\begin_inset Formula $p=0.01$
\end_inset

.
 The ensemble accuracy can be seen to be fairly constant throughout the
 number of horizontal models with 3 models being the least accurate with
 a higher standard deviation.
 3 horizontal models also shows a significant spike in disagreement and
 individual error rates which gradually decreases as the number of models
 increases.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename ../graphs/exp2-test10-error-rate-curves.png
	lyxscale 50
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Ensemble classifier performance results for 
\begin_inset Formula $\eta=0.15$
\end_inset

, 
\begin_inset Formula $p=0.01$
\end_inset

, nodes = 
\begin_inset Formula $1-400$
\end_inset

, epochs = 20
\begin_inset CommandInset label
LatexCommand label
name "fig:exp2-test10"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Discussion
\end_layout

\begin_layout Standard
From the data of figure 
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:exp2-test10"
plural "false"
caps "false"
noprefix "false"

\end_inset

, 3 horizontal models was shown to be the worst performing configuration
 with lower ensemble accuracy and higher disagreement.
 This is likely due to larger proportion that a single model constitutes.
 
\end_layout

\begin_layout Section
Optimiser Comparisons
\begin_inset CommandInset label
LatexCommand label
name "sec:exp3"

\end_inset


\end_layout

\begin_layout Standard
Throughout the previous experiments the stochastic gradient descent optimiser
 was used to change the networks weights but there are many different optimisati
on algorithms.
 This section will present investigations into two other optimisation algorithms
 and discuss the differences between them using the horizontal ensemble
 classification of the previous section.
\end_layout

\begin_layout Standard
Prior to these investigations, however, stochastic gradient descent and
 the two other subject algorithms will be described.
\end_layout

\begin_layout Subsection
Optimisers
\end_layout

\begin_layout Subsubsection
Stochastic Gradient Descent
\end_layout

\begin_layout Subsubsection
RMSprop
\end_layout

\begin_layout Subsubsection
Adam
\end_layout

\begin_layout Subsection
Results
\end_layout

\begin_layout Subsection
Discussion
\end_layout

\begin_layout Section
Conclusions
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset label
LatexCommand label
name "sec:bibliography"

\end_inset


\begin_inset CommandInset bibtex
LatexCommand bibtex
btprint "btPrintCited"
bibfiles "references"
options "bibtotoc"

\end_inset


\end_layout

\begin_layout Section
\start_of_appendix
Source Code
\end_layout

\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../nncw.py"
lstparams "caption={Formatted Jupyter notebook containing experiment code},label={notebook-code}"

\end_inset


\end_layout

\end_body
\end_document