dissertation/dissertation/dissertation.lyx

#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\begin_preamble
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
\let\endchangemargin=\endlist
\end_preamble
\use_default_options true
\begin_modules
customHeadersFooters
minimalistic
todonotes
\end_modules
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype true
\use_dash_ligatures true
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command biber
\index_command default
\paperfontsize 11
\spacing onehalf
\use_hyperref false
\pdf_title "Holoportation"
\pdf_author "Andy Pack"
\pdf_subject "The use of Kinect cameras to stream 3D video from client to server"
\pdf_bookmarks true
\pdf_bookmarksnumbered false
\pdf_bookmarksopen false
\pdf_bookmarksopenlevel 1
\pdf_breaklinks false
\pdf_pdfborder false
\pdf_colorlinks false
\pdf_backref false
\pdf_pdfusetitle true
\papersize default
\use_geometry true
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine biblatex
\cite_engine_type authoryear
\biblio_style plain
\biblio_options urldate=long
\biblatex_bibstyle ieee
\biblatex_citestyle ieee
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date true
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 2cm
\topmargin 2.2cm
\rightmargin 2cm
\bottommargin 2.2cm
\secnumdepth 4
\tocdepth 4
\paragraph_separation skip
\defskip medskip
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle fancy
\bullet 1 0 9 -1
\bullet 2 0 24 -1
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Title

\size giant
Multi-Source Holoportation
\end_layout

\begin_layout Author
Andy Pack
\end_layout

\begin_layout Standard
\begin_inset VSpace bigskip
\end_inset


\end_layout

\begin_layout Standard
\align center
\begin_inset Graphics
	filename ../surreylogo.png
	lyxscale 30
	width 60col%

\end_inset


\end_layout

\begin_layout Standard
\begin_inset VSpace 4pheight%
\end_inset


\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
begin{changemargin}{3cm}{3cm}
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\noindent
\align center

\size large
A dissertation submitted to the Department of Electronic Engineering in
 partial fulfilment of the Degree of Bachelor of Engineering in Electronic
 Engineering
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
end{changemargin}
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset VSpace vfill
\end_inset


\end_layout

\begin_layout Standard
\noindent
\align center
May 2020
\size large

\begin_inset Newline newline
\end_inset

Department of Electrical and Electronic Engineering
\begin_inset Newline newline
\end_inset

Faculty of Engineering and Physical Sciences
\begin_inset Newline newline
\end_inset

University of Surrey
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Abstract
abstract
\end_layout

\begin_layout Standard
\begin_inset CommandInset toc
LatexCommand tableofcontents

\end_inset


\end_layout

\begin_layout Standard
\begin_inset VSpace medskip
\end_inset


\end_layout

\begin_layout Standard
\begin_inset FloatList figure

\end_inset


\begin_inset CommandInset toc
LatexCommand lstlistoflistings

\end_inset


\end_layout

\begin_layout Standard
\begin_inset VSpace vfill
\end_inset


\end_layout

\begin_layout Section*
Acknowledgements
\end_layout

\begin_layout Standard
\noindent
\align center
acknowledgements
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Right Footer
Andy Pack / 6420013
\end_layout

\begin_layout Left Footer
May 2020
\end_layout

\begin_layout Section
Introduction
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
add proper intro about XR and new media
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The aim of this project is to develop a piece of software capable of supporting
 multi-source holoportation (hologram teleportation) using the
\emph on
\noun on
LiveScan3D
\emph default
\noun default

\begin_inset CommandInset citation
LatexCommand cite
key "livescan3d"
literal "false"

\end_inset

 suite of software as a base.
\end_layout

\begin_layout Standard
As the spaces of augmented and virtual reality become more commonplace and
 mature, the ability to capture and stream 3D renders of objects and people
 over the internet using consumer-grade hardware has many possible applications.
\end_layout

\begin_layout Standard
This represents one of the most direct evolutions of traditional video streaming
 when applied to this new technological space.
\end_layout

\begin_layout Standard
A view of what multi-source achieves can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:premise"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
 Both single and multi-view configurations of cameras are shown, the latter
 allowing more complete renders of the subject to be acquired.
 Both shapes are presented through the
\emph on
user experience
\emph default
, control schemes and visual language can vary between implementations across
 AR/VR and traditional 2D displays.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename ../media/premise.png
	lyxscale 30
	width 70col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Demonstration of a multi-source holoportation system including single and
 multiple view camera configurations
\begin_inset CommandInset label
LatexCommand label
name "fig:premise"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard

\noun on
LiveScan3D
\noun default
 is a suite of 3D video software capable of recording and transmitting video
 from client to server for rendering.
 The suite is fast and uses consumer grade hardware for capture in the form
 of
\noun on
Xbox Kinect
\noun default
 cameras, it is used in various projects at the
\noun on
University of Surrey
\noun default
 and has multiple setups in dedicated lab space.
\end_layout

\begin_layout Standard

\noun on
LiveScan3D's
\noun default
 use
\noun on

\noun default
of
\noun on
Xbox Kinect
\noun default
 cameras allows the capture and stream of 3D renders in single or multi-view
 configurations using calibrated cameras however the server is only able
 to process and reconstruct one environment at a time.
\end_layout

\begin_layout Standard
The capability to concurrently receive and reconstruct streams of different
 objects further broadens the landscape of possible applications, analogous
 to the movement from traditional phone calls to conference calling.
\end_layout

\begin_layout Subsection
COVID-19
\end_layout

\begin_layout Section
Literature Review
\end_layout

\begin_layout Standard

\noun on
LiveScan3D
\noun default
 utilises the
\noun on
Microsoft Kinect
\noun default
 sensor in order to capture RGB video with depth information.
 While Kinect sensors have proved extremely popular in the computer vision
 sector, it does not represent the only method for such 3D reconstruction,
 traditional visual hull reconstruction is investigated before identifying
 the
\noun on
Kinect
\noun default
's role in this space.
\end_layout

\begin_layout Standard
The significance of 3D video like that captured and relayed using the
\noun on
LiveScan
\noun default
 suite is related to the development of new technologies able to immersively
 display such video content.

\end_layout

\begin_layout Standard
While this has been exemplified mostly through AR with
\begin_inset CommandInset citation
LatexCommand citeauthor
key "livescan3d-hololens"
literal "false"

\end_inset

's
\noun on
LiveScan
\noun default
 client for
\noun on
Microsoft Hololens
\begin_inset CommandInset citation
LatexCommand cite
key "livescan3d-hololens"
literal "false"

\end_inset


\noun default
 and
\begin_inset CommandInset citation
LatexCommand citeauthor
key "livescan3d-android"
literal "false"

\end_inset

's extension of this for
\noun on
Android
\noun default
 phones
\begin_inset CommandInset citation
LatexCommand cite
key "livescan3d-android"
literal "false"

\end_inset

, the collection and transmission of 3D holograms have applicability to
 all forms of XR and as such the state of this space as a whole is investigated.
\end_layout

\begin_layout Standard
As the foundation of this project, the
\noun on
LiveScan3D
\noun default
suite itself is presented in more depth following this review in order to
 contextualise it both within these investigations and the extension work
 presented herein.
\end_layout

\begin_layout Subsection
3D Capture & Reconstruction
\end_layout

\begin_layout Subsubsection
Visual Hull Reconstruction
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
CVSSP case study from CV?
\end_layout

\end_inset


\end_layout

\begin_layout Subsubsection
RGB-D Cameras
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Structure Sensor
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Initially designed as a motion control accessory for the
\noun on
Xbox
\noun default
, the
\noun on
Kinect
\noun default
 is a series of depth aware cameras produced by
\noun on
Microsoft
\noun default
.
 The device uses additional infrared lights and sensors alongside an RGB
 camera in a configuration referred to as a Time-of-Flight camera to generate
 3D renders of a surroundings.
 The device also includes motion tracking and skeleton isolation for figures
 in view.
\end_layout

\begin_layout Standard
Following the release of an SDK for Windows in 2012,
\begin_inset CommandInset citation
LatexCommand citeauthor
key "original-kinect-microsoft"
literal "false"

\end_inset

 at
\noun on
Microsoft Research
\noun default
 reflects on the original camera's capabilities and the applications to
 computer vision research in
\begin_inset CommandInset citation
LatexCommand cite
key "original-kinect-microsoft"
literal "false"

\end_inset

.
\end_layout

\begin_layout Standard
Here 3D conference calling of the type described in the introduction without
 AR or VR applications is presented, instead users watch a composite conference
 space on a screen with all participants rendered within.
 Work was undertaken to achieve mutual gaze between participants, a marked
 advantage over traditional conference calls where the lack of such aspects
 of group interaction make the experience more impersonal.
 Methods of achieving more natural virtual interactions or
\emph on
telepresence
\emph default
 are covered in section
\begin_inset CommandInset ref
LatexCommand ref
reference "subsec:Holoportation-and-Telepresence"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
A second version of the camera, v2, was released alongside the
\noun on
Xbox One
\noun default
 in 2013 and presented many improvements over the original.
 A higher quality RGB camera captures 1080p video at up to 30 frames per
 second with a wider field of view than the original
\begin_inset CommandInset citation
LatexCommand cite
key "kinect-specs"
literal "false"

\end_inset

.
 The physical capabilities of the camera are discussed by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "new-kinect"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "new-kinect"
literal "false"

\end_inset

.
 The second version of the camera was found to gather more accurate depth
 data than the original and was less sensitive to daylight.

\begin_inset CommandInset citation
LatexCommand citeauthor
key "kinectv1/v2-accuracy-precision"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "kinectv1/v2-accuracy-precision"
literal "false"

\end_inset

 found similar results with the v2 achieving higher accuracy results over
 the original.
 The second version did, however, achieve lower precision results than the
 v1 with recommendations made to include pre-processing on acquired depth
 images to control for random noise,
\emph on
flying pixels
\emph default
 and
\emph on
multipath interference
\emph default
.
\end_layout

\begin_layout Standard
The
\noun on
Kinect
\noun default
 is used successfully by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "greenhouse-kinect"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "greenhouse-kinect"
literal "false"

\end_inset

 for object detection in the context of an autonomous vehicle navigating
 a greenhouse.
 The depth information was used in conjunction with the RGB information
 to identify obstacles, while the paper lays out some limitations of the
 camera it was found to be effective in it's aim and was capable of running
 on a reasonable computer.
\end_layout

\begin_layout Standard
This second iteration on the
\noun on
Kinect
\noun default
 is frequently used in computer vision experiments with many of the works
 cited here using it for acquisition.
\end_layout

\begin_layout Subsection
Extended Reality (XR)
\end_layout

\begin_layout Standard
Immersive media experiences enhanced through the use of technology are typically
 defined by the level to which they affect the perception of the user.
 This distinction typically organises technologies into one of three established
 terms,
\emph on
Virtual Reality
\emph default
,
\emph on
Augmented Reality
\emph default
 and
\emph on
Mixed Reality
\emph default
.
\end_layout

\begin_layout Description
Virtual The replacement of a user's experience of unmediated reality, rendering
 a new computer-generated space that the user appears to immersively inhabit.
 Typically achieved through face mounted headsets (
\emph on
Facebook Oculus, HTC Vive, Playstation VR, Valve Index
\emph default
).
\end_layout

\begin_layout Description
Augmented The enhancement of a user's reality through the overlay of digital
 graphics.
 Typically facilitated with translucent/transparent headsets
\emph on
(Microsoft Hololens, Google Glass)
\emph default
 or increasingly with
\begin_inset Quotes eld
\end_inset

Window on the World
\begin_inset Quotes erd
\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "reality-virtuality-continuum"
literal "false"

\end_inset

 mobile technologies
\emph on
(Android ARCore
\emph default

\begin_inset CommandInset citation
LatexCommand cite
key "ARCore"
literal "false"

\end_inset

,
\emph on
Apple ARKit
\emph default

\begin_inset CommandInset citation
LatexCommand cite
key "arkit"
literal "false"

\end_inset


\emph on
)
\emph default
 such as
\emph on
Pokemon GO
\emph default

\begin_inset CommandInset citation
LatexCommand cite
key "pokemonGO"
literal "false"

\end_inset

.
\end_layout

\begin_layout Description
Mixed A combination of virtual elements with the real world in order to
 facilitate interaction with an augmented reality.
 A somewhat broad term owing to it's description of a point between augmented
 and virtual reality.
 An emphasis is typically placed on virtual elements existing coherently
 within the real world and interacting in real-time.
\end_layout

\begin_layout Standard
The term
\emph on
Extended Reality
\emph default
or XR functions as an umbrella term for all such experiences and is used
 throughout this paper, note that the terms
\emph on
mediated reality
\emph default
 and *R
\begin_inset CommandInset citation
LatexCommand cite
key "all-reality"
literal "false"

\end_inset

 are also sometimes used where the asterisk refers to
\begin_inset Quotes eld
\end_inset

all
\begin_inset Quotes erd
\end_inset

 realities.

\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Cross reality? Reference?
\end_layout

\end_inset


\end_layout

\begin_layout Standard
While individual classes of XR provide ostensibly different experiences,
 it can be seen that there is overlap between them, notably that at a high
 level all aim to extend a user's experience of reality.
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Reword
\end_layout

\end_inset

 All can be seen to employ
\emph on
Spatial Computing
\emph default
 as defined by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "spatial-computing"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "spatial-computing"
literal "false"

\end_inset

 to refer to
\end_layout

\begin_layout Quote

\emph on
Human interaction with a machine in which the machine retains and manipulates
 referents to real objects and spaces.
\end_layout

\begin_layout Standard
Identifying the common dimensions across XR has led to the proposal of various
 taxonomies providing insights in to how each implementation relate to others
\begin_inset CommandInset citation
LatexCommand cite
key "reality-virtuality-continuum,mr-taxonomy,all-reality"
literal "false"

\end_inset

.
\end_layout

\begin_layout Subsubsection
The Reality Virtuality Continuum
\end_layout

\begin_layout Subsubsection
XR Implementations
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Mobile AR examples
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Although VR and AR headsets have accelerated the development of XR technology,
 they are not the only way to construct XR experiences.

\begin_inset CommandInset citation
LatexCommand citeauthor
key "roomalive"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "roomalive"
literal "false"

\end_inset

 demonstrate
\emph on
RoomAlive
\emph default
, an AR experience using depth cameras and projectors (referred to as
\emph on
procams
\emph default
) to construct experiences in any room.
 This is presented through games and visual alterations to the users surrounding
s.
 A strength of the system is it's self contained nature, able to automatically
 calibrate the camera arrangements using correspondences found between each
 view.
 Experience level heuristics are also discussed regarding capturing and
 maintaining user attention in an environment where the experience can be
 occurring anywhere, including behind the user.

\begin_inset Flex TODO Note (Margin)
status open

\begin_layout Plain Layout
Link with work
\end_layout

\end_inset


\end_layout

\begin_layout Standard
A point is also made about how the nature of this room based experience
 breaks much of the typical game-user interaction established by virtual
 reality and video games.
 In contrast to traditional and virtual reality game experiences where the
 game is ultimately in control of the user or user avatar, AR experiences
 of this type have no physical control over the user and extra considerations
 must be made when designing such systems.
\end_layout

\begin_layout Standard
Traditional media consumption is not the only area of interest for developing
 interactive experiences, an investigation into the value of AR and VR for
 improving construction safety is presented by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "ar/vr-construction"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "ar/vr-construction"
literal "false"

\end_inset

.
 A broad look at the applicability is taken with assessments including VR
 experiences for developing worker balance to aid in working at elevation
 and AR experiences incorporated into the workplace for aiding in task sequencin
g to reduce the effect of memory on safety.
\begin_inset Flex TODO Note (Margin)
status open

\begin_layout Plain Layout
Link with work
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset citation
LatexCommand citeauthor
key "remixed-reality"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "remixed-reality"
literal "false"

\end_inset

 demonstrate an example of mixed reality through the use of
\noun on
Kinect
\noun default
 cameras and a virtual reality headset.
 Users are placed in a virtual space constructed from 3D renders of the
 physical environment around the user.
 Virtual manipulation of the space can then be achieved with visual, spatial
 and temporal changes supported.
 Objects can be scaled and sculpted in realtime while the environment can
 be paused and rewinded.
 The strength of mixed reality comes with the immersion of being virtually
 placed in a version of the physical surroundings, tactile feedback from
 the environment compounds this.
\begin_inset Flex TODO Note (Margin)
status open

\begin_layout Plain Layout
Link with work
\end_layout

\end_inset


\end_layout

\begin_layout Subsubsection
Augmented Reality
\end_layout

\begin_layout Standard
The advancement of mobile AR experiences spurred
\begin_inset Flex TODO Note (Margin)
status open

\begin_layout Plain Layout
?
\end_layout

\end_inset

 by the introduction of OS level SDK's in Google's ARCore
\begin_inset CommandInset citation
LatexCommand cite
key "ARCore"
literal "false"

\end_inset

 and Apple's ARKit
\begin_inset CommandInset citation
LatexCommand cite
key "arkit"
literal "false"

\end_inset

 has led this to be
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Handheld and Hololens
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Mobile AR SDKs and Unity ARFoundation
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Holoportation and Telepresence
\begin_inset CommandInset label
LatexCommand label
name "subsec:Holoportation-and-Telepresence"

\end_inset


\end_layout

\begin_layout Standard
The term Holoportation is defined and exemplified in a
\noun on
Microsoft Research
\noun default
 paper by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "holoportation"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "holoportation"
literal "false"

\end_inset

 where an end-to-end pipeline is laid out for the acquisition, transmission
 and display of 3D video facilitating real-time AR and VR experiences.
 The
\noun on
Microsoft Research
\noun default
 paper builds on works including by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "Immersive-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "Immersive-telepresence"
literal "false"

\end_inset

 2 years earlier which describes attempts at achieving
\begin_inset Quotes eld
\end_inset


\emph on
telepresence
\emph default

\begin_inset Quotes erd
\end_inset

, a term coined by Marvin Minksy to describe the transparent and intuitive
 remote control of robot arms as if they were the controllers own
\begin_inset CommandInset citation
LatexCommand cite
key "marvin-minksy"
literal "false"

\end_inset

.
 The term was broadened by Bill Buxton
\begin_inset CommandInset citation
LatexCommand cite
key "buxton-telepresence"
literal "false"

\end_inset

 to include the space of telecommunications to describe technology being
 used to make someone feel present in a different environment.

\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Is telepresence relevant here? reverse telepresence for something else being
 telepresent in your space?
\end_layout

\end_inset

In the context of holoportation this is through the use of 3D video reconstructi
on.
 The aforementioned work by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "Immersive-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "Immersive-telepresence"
literal "false"

\end_inset

 used 10
\noun on
Kinect
\noun default
 cameras to capture a room before virtually reconstructing the models.

\end_layout

\begin_layout Standard
In service of demonstrating it's applicability to achieving
\emph on
telepresence
\emph default
, a figure was isolated from the surroundings and stereoscopically rear-projecte
d onto a screen for a single participant, a result of this can be seen in
 figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:stereoscopic"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/telepresence-stereoscopic.png
	lyxscale 30
	width 40col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
An example of stereoscopic projection of depth aware footage captured by

\begin_inset CommandInset citation
LatexCommand citeauthor
key "Immersive-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "Immersive-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset label
LatexCommand label
name "fig:stereoscopic"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
The
\noun on
Microsoft Research
\noun default
 paper demonstrates a system using 8 cameras surrounding a space.
 Each camera captured both Near Infra-Red and colour images to construct
 a colour-depth video stream, a more complex camera configuration than in
 the others cited.
\end_layout

\begin_layout Standard
\begin_inset CommandInset citation
LatexCommand citeauthor
key "velt"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "velt"
literal "false"

\end_inset

 demonstrates a similar holoportation experience to
\noun on
LiveScan3D
\noun default
 capable of supporting multi-view configurations, it also supports both
 point clouds and meshes.
 Calibrating multiple view points is completed using the extrinsics and
 intrinsics of the camera.
 The extrinsics are the relative positions of each
\noun on
Kinect
\noun default
 camera while the intrinsics describe the internal properties of each camera,
 the focal length and optical centre.

\end_layout

\begin_layout Standard
The intrinsics of the
\noun on
Kinect
\noun default
 camera can be retrieved from the
\noun on
Kinect
\noun default
 SDK while the extrinsics are obtained in one of two ways.
 Extrinsics can be imported and parsed from XML for manual selection or
 estimated using
\noun on
OpenCV
\noun default
 and a checkerboard pattern.
 When considering holoportation systems of this kind, comparatively few
 implement multiple views as a result the increased complexity involved
 in calibration.
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Link to livescan?
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Multi-Source Holoportation
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
More?
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The space of multi-source holoportation has been explored by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "group-to-group-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "group-to-group-telepresence"
literal "false"

\end_inset

 in the context of shared architectural design spaces in virtual reality
 similar to a conference call.
 Two groups of people were captured in 3D using clusters of
\noun on
Kinect
\noun default
 cameras before having these renders transmitted to the other group.
 Each group reconstructs the other's render for display in virtual reality
 in conjunction with their own.
 In doing so a shared virtual space for the two groups has been created
 and it can be seen to implement the process of holoportation.
 The strength of the system as a shared architectural design experience
 is emergent of the semantics of the virtual space where a World in Miniature
 (WIM) metaphor is used.
\end_layout

\begin_layout Standard
The Worlds in Miniature is described by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "wim"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "wim"
literal "false"

\end_inset

 as a set of interfaces between the user and the virtual space they experience
 using tactile and visual tools.
 The interface involves providing the user with a miniature render of the
 world they are inhabiting that can interacted with in order to affect the
 full scale environment around them.
\end_layout

\begin_layout Standard
This navigation tool maps well to
\begin_inset CommandInset citation
LatexCommand citeauthor
key "group-to-group-telepresence"
literal "false"

\end_inset

's
\begin_inset CommandInset citation
LatexCommand cite
key "group-to-group-telepresence"
literal "false"

\end_inset

 architecture groupware design, an image captured during the work can be
 seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:World-in-Miniature-group-by-group"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/group-by-group.png
	lyxscale 30
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
World in Miniature render demonstrated in a multi-source holoportation context
 by
\begin_inset CommandInset citation
LatexCommand citeauthor
key "group-to-group-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset citation
LatexCommand cite
key "group-to-group-telepresence"
literal "false"

\end_inset


\begin_inset CommandInset label
LatexCommand label
name "fig:World-in-Miniature-group-by-group"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Subsection
High Bandwidth Media Streaming
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
RTP
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
UDP
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
4K media streaming?
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Compression? ZSTD
\end_layout

\end_inset


\end_layout

\begin_layout Subsection
Summary
\end_layout

\begin_layout Section
LiveScan3D
\begin_inset CommandInset label
LatexCommand label
name "sec:LiveScan3D"

\end_inset


\end_layout

\begin_layout Standard

\noun on
LiveScan3D
\noun default
 is a suite of software developed by Marek Kowalski, Jacek Naruniec and
 Michal Daniluk of the Warsaw University of Technology in 2015
\begin_inset CommandInset citation
LatexCommand cite
key "livescan3d"
literal "false"

\end_inset

.
 The suite utilises the
\noun on
Xbox Kinect
\noun default
 v2 camera to record and transmit 3D renders over an IP network.
 A server can manage multiple clients simultaneously in order to facilitate
 multi-view configurations, it is then responsible for displaying the renderings
 in real-time and/or transmitting composite renders to a user experience
 or UE.
 This architecture can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:LiveScanArchitecture"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
These renderings take the form of a point cloud, a collection of 3D co-ordinates
 each with an associated RGB colour value.
 There are many methods by which point clouds can be used to construct surfaces
 suited for traditional computer graphics applications
\begin_inset CommandInset citation
LatexCommand cite
key "point-cloud-surface"
literal "false"

\end_inset

 however for the purposes of an interactive or real-time application the
 plotting of each point of the cloud in a 3D space using a suitable point
 size can create a coloured mesh visually representing the captured object
 while keeping the processing pipeline streamlined.
 This is the approach taken in
\noun on
LiveScan
\noun default
.
\end_layout

\begin_layout Standard
As a result of it's analogous nature to a traditional frame of 2D video,
 the terms
\begin_inset Quotes eld
\end_inset

render
\begin_inset Quotes erd
\end_inset

,
\begin_inset Quotes eld
\end_inset

point cloud
\begin_inset Quotes erd
\end_inset

 and
\begin_inset Quotes eld
\end_inset

frame
\begin_inset Quotes erd
\end_inset

 are used interchangeably from here.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename ../media/LiveScanArchitecture.png
	lyxscale 50
	width 70col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
The architecture of the
\noun on
LiveScan3D
\noun default
 suite
\begin_inset CommandInset label
LatexCommand label
name "fig:LiveScanArchitecture"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Subsection

\noun on
LiveScan
\noun default
 Client
\end_layout

\begin_layout Standard
The
\noun on
LiveScan
\noun default
 Client is responsible for interfacing with the
\noun on
Kinect
\noun default
 sensor via the
\noun on
Kinect
\noun default
 v2 SDK and transmitting frames to the
\noun on
LiveScan
\noun default
 Server.
 Body detection takes place client side, as does calibration when using
 multiple sensors.
\end_layout

\begin_layout Standard
Only one
\noun on
Kinect
\noun default
 sensor can be connected to each computer as a result of the SDK's restrictions.
 A system used by multiple clients in this way lends itself well to multi-source
 configurations over the internet.
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Extend
\end_layout

\end_inset


\end_layout

\begin_layout Subsection

\noun on
LiveScan
\noun default
 Server
\end_layout

\begin_layout Standard
The server component of the
\noun on
LiveScan
\noun default
 suite is responsible for managing and receiving 3D renders from connected
 clients.
 These holograms are reconstructed in an interactive
\noun on
OpenGL
\noun default
window functioning in a similar fashion to that of a traditional camera
 allowing.
 Holograms can then be transmitted to the user experience or UE, constituting
 an XR client such as the
\noun on
Hololens
\noun default
 or
\noun on
Android
\noun default
 app.
 When considering the code architecture of this application there are three
 main components.

\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Less depth? Move below to appendix?
\end_layout

\end_inset


\end_layout

\begin_layout Description
OpenGLWindow Presentation layer of the application.
 Separate window spawned by the
\noun on
LiveScanServer
\noun default
 responsible for drawing point clouds and responding to user control.

\end_layout

\begin_layout Description
KinectServer Network layer of the application.
 The main window make requests of this component to receive transmitted
 point clouds.
\end_layout

\begin_layout Description
KinectSocket
\noun on

\noun default
Child objects contained within the
\noun on
KinectServer
\noun default
.
 A traditional network socket object representing a single TCP connection
 between the server and a client.
\end_layout

\begin_layout Description
TransferServer
\end_layout

\begin_layout Description
TransferSocket
\end_layout

\begin_layout Standard
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
Populate
\end_layout

\end_inset


\end_layout

\begin_layout Standard
This structure can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:server-structure"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/initial-state.png
	lyxscale 30
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Initial architecture of the
\noun on
LiveScan3D
\noun default
 server
\begin_inset CommandInset label
LatexCommand label
name "fig:server-structure"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Standard
Received frames in the form of lists of vertices, RGB values, camera poses
 and bodies overwrite shared variables between the main window and the
\noun on
OpenGL
\noun default
 window.
\end_layout

\begin_layout Subsection
Calibration & Multi-View Configurations
\end_layout

\begin_layout Standard
When using a single client setup, frames are transmitted in their own coordinate
 space with the origin defined as the
\noun on
Kinect
\noun default
 camera and the captured scene rendered in front of it.
\end_layout

\begin_layout Standard
When using multiple sensors, the server would be unable to combine these
 unique Euclidean spaces without knowledge of the sensors positions relative
 to each other, the extrinsics of the system.
\end_layout

\begin_layout Standard
In order to make a composite frame a calibration process is completed client
 side following instruction by the server.
\end_layout

\begin_layout Standard
Calibration is completed in two steps, an initial estimation followed by
 a refinement process.
 The initial estimation is completed by informing the server of which calibratio
n marker layouts are being used within the space.
 Client's identify possible visible markers like that seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:calibration-marker"
plural "false"
caps "false"
noprefix "false"

\end_inset

 using thresholding.
 Following this identification, the location of the marker can be found
 within the sensors coordinate system.

\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
	filename /home/andy/uni/dissertation/media/calibration.png
	lyxscale 30
	width 20col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Example marker used within the LiveScan3D calibration process
\begin_inset CommandInset label
LatexCommand label
name "fig:calibration-marker"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Standard
This information can be used to transform points from the cameras coordinate
 system to the markers frame of reference.
 As the relative locations of different markers are defined at the server,
 a world coordinate system can be defined as the centre of these markers.
 Typically 4 different markers are placed on the faces around the vertical
 axis of a cuboid allowing views in 360°.
\end_layout

\begin_layout Standard
This world coordinate space has shifted the origin from being the position
 of the single
\noun on
Kinect
\noun default
 sensor to being a point in the centre of the calibration markers that each
 camera now orbits.
 As part of this calibration process the server distributes transformations
 to each client defining where they sit within this world coordinate space.
 Client's can now transform acquired renders from their own frame of reference
 to the world coordinate system at the point of capture and each point cloud
 can be merged coherently.
\end_layout

\begin_layout Standard
The refinement process is completed server side by requesting a single frame
 from each connected client and using Iterative Closest Points
\begin_inset CommandInset citation
LatexCommand cite
key "ICP"
literal "false"

\end_inset

 (ICP) to improve the inter-camera relationships.
\end_layout

\begin_layout Standard
The
\noun on
OpenGL
\noun default
 display space has it's origin within the centre of the visible box, this
 means that for single sensor setups this is also the location of the camera.
\end_layout

\begin_layout Subsection
Buffers and a non-blocking Network
\end_layout

\begin_layout Subsection

\noun on
LiveScan
\noun default
 Hololens
\end_layout

\begin_layout Subsection

\noun on
LiveScan
\noun default
 Android
\end_layout

\begin_layout Subsection
Design Considerations
\end_layout

\begin_layout Standard
When assessing
\noun on
LiveScan
\noun default
's suitability for extension to a multi-source context, the original network
 design should be investigated.
\end_layout

\begin_layout Standard
The original applications were best suited to a local environment as a result
 of many of the network functions being blocking.
 Should any delays or interruptions have occurred during a network operation,
 then the application would need to stop and wait for remediation before
 continuing.
 Interruptions of this type are more common when moving from a LAN environment
 to communicating over the open internet.
\end_layout

\begin_layout Standard
From a network perspective the need to make these actions non-blocking would
 present benefits for both multi-source and multi-view configurations.
\end_layout

\begin_layout Standard
Additionally, the network polling rates are higher than the frame rate of
 the produced video, when the server requests a frame before a new one has
 been captured by the client, the same previous frame is resent.
 This presents unnecessary bandwidth usage.
\end_layout

\begin_layout Standard
Moving to a multi-source context implies transmitting over the internet
 as opposed to local operation, this will make blocking actions and bloated
 bandwidth more dangerous to user experience.
\end_layout

\begin_layout Standard
Work has been undertaken that allows multiple concurrent TCP connections
 to be used by each client to increase bandwidth.
 Further work is being undertaken to un-block network actions.
\end_layout

\begin_layout Section
Developments
\end_layout

\begin_layout Subsection
Server
\end_layout

\begin_layout Standard
The required development to take the existing
\noun on
LiveScan
\noun default
 codebase to the desired multi-source result can be split into two areas
 of concern.
\end_layout

\begin_layout Labeling
\labelwidthstring 00.00.0000

\series bold
\emph on
Network
\series default
\emph default
 The network layer of the
\noun on
LiveScan
\noun default
 server must be updated in order to accommodate multiple clients logically
 grouped into
\begin_inset Quotes eld
\end_inset

sources
\begin_inset Quotes erd
\end_inset

 for which separate frames are collected for display.
\end_layout

\begin_layout Labeling
\labelwidthstring 00.00.0000

\series bold
\emph on
Display
\series default
\emph default
 Finally the display element of the server should be extended to allow the
 simultaneous presentation of multiple point clouds.
 These objects should be individually arrangeable in the display space allowing
 both movement and rotation.
\end_layout

\begin_layout Standard
As of January 2020 the native method for displaying renderings, the server's

\noun on
OpenGL
\noun default
 window, has been modified such that it can construct and render point clouds
 from multiple sources.
 To do so a dynamic sub-system of geometric transformations has been written
 in order to coherently arrange sources within the space when reconstructed.
 The default arrangements can be overridden with keyboard controls facilitating
 arbitrary placement and rotation of separate sources within the window's
 co-ordinate space.
\end_layout

\begin_layout Subsubsection
Geometric Transformations
\end_layout

\begin_layout Standard
Within the
\noun on
LiveScan3D
\noun default
 server source code are utility structures and classes which were extended
 in order to develop a wider geometric manipulation system.
 Structures defining Cartesian coordinates in both 2D and 3D spaces called

\noun on
Point2f
\noun default
 and
\noun on
Point3f
\noun default
 respectively are used in drawing skeletons as captured by the
\noun on
Kinect
\noun default
 camera.
 There is also a class defining an affine transformation, the definitions
 for all three can be seen in appendix
\begin_inset CommandInset ref
LatexCommand ref
reference "sec:Existing-Data-Structures"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
Affine transformations are a family of geometric transformations that preserve
 parallel lines within geometric spaces.
 Some examples of affine transformations include scaling, reflection, rotation,
 translation and shearing.
\end_layout

\begin_layout Standard
The class definition is made up of a three-by-three transformation matrix
 and single 3D vector for translation, within the native codebase it is
 used for both camera poses and world transformations.

\end_layout

\begin_layout Standard
A camera pose is the affine transformation defining the position and orientation
 of the
\noun on
Kinect
\noun default
 camera when drawn in the
\noun on
OpenGL
\noun default
 space as a green cross.
 The world transformations are used as part of the calibration process when
 using multi-view configurations.

\end_layout

\begin_layout Standard
When considering how each source's render would be arranged in the space,
 the use of this class definition was extended.
 As the use of affine transformations is mostly limited to use as a data
 structure within the base source code, some utility classes and functions
 were required in order to fully maximise their effectiveness.
\end_layout

\begin_layout Paragraph
Transformer
\end_layout

\begin_layout Standard
The motivation in writing the
\noun on
Transformer
\noun default
 was to create a generic framework of geometric transformations that could
 be utilised by the
\noun on
OpenGL
\noun default
 display to arrange separate point clouds.
 At a high level this is done by implementing matrix arithmetic functions
 in the context of their use for applying linear transformations to Cartesian
 coordinates.

\end_layout

\begin_layout Standard
The
\noun on
Transformer
\noun default
 class has static methods to apply
\noun on
AffineTransform
\noun default
s to both
\noun on
Point3f
\noun default
 structures and lists of raw vertices as received from
\noun on
LiveScan
\noun default
 clients.
\end_layout

\begin_layout Standard
Additionally there are utility functions to bidirectionally cast between

\noun on
Point3f
\noun default
 data structures and the lists of received vertices.
\end_layout

\begin_layout Standard
Finally static methods generate common rotation transformations about each
 axis given an arbitrary angle.
 This provided a foundation on which to define how the
\noun on
OpenGL
\noun default
 space would arrange separate sources within it's combined co-ordinate space.
\end_layout

\begin_layout Standard
Currently missing is the ability to combine transformations into compound
 matrices.
 Applying multiple transformations to large numbers of coordinates would
 be more computationally expensive than applying one compound matrix and
 when running in realtime this should be considered.
 This is not yet included due to the current lack of need to apply multiple
 successive transformations.
 If the need were to arise following further refinements, it would be implemente
d as described here.
\end_layout

\begin_layout Subsubsection
Separation of Network and Presentation Layer
\end_layout

\begin_layout Standard
During initial testing frames received from a live sensor were intercepted
 and serialized to XML in local storage.
 These frames were loaded into memory as the server was started and merged
 with those received live before display.
\end_layout

\begin_layout Standard
The composite frame can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:Initial-composite-frame"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/pretransform.jpg
	lyxscale 10
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Initial multi-source composite testing frame
\begin_inset CommandInset label
LatexCommand label
name "fig:Initial-composite-frame"

\end_inset


\end_layout

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Standard
The objects can be seen to be occupying the same space due to their similar
 positions in the frame during capture.
 This is not a sufficient solution for displaying separate sources and so
 geometric transformations like those described above were employed to separate
 the two.
 The change in software structure at this stage can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:Initial-testing-layout"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
 A rotation of 180° in the vertical (
\begin_inset Formula $y$
\end_inset

) axis pivoted the frames such that they faced those being received live,
 the results can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:180-degree-rotation"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/local-testing.png
	lyxscale 30
	width 70col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Initial testing process transforming frames loaded from local storage
\begin_inset CommandInset label
LatexCommand label
name "fig:Initial-testing-layout"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/180flip.jpg
	lyxscale 10
	width 50col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Composite testing frame following 180° rotation of recorded source in
\begin_inset Formula $y$
\end_inset

 axis
\begin_inset CommandInset label
LatexCommand label
name "fig:180-degree-rotation"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
At this point it was noted that transforming and arranging figures within
 the main window before passing the
\noun on
OpenGL
\noun default
 window a complete point cloud spreads responsibility for the display logic
 to the main window.
\end_layout

\begin_layout Standard

\noun on
LiveScan3D
\noun default
 is capable of supporting more display methods than just the native
\noun on
OpenGL
\noun default
 implementation with versions available for both
\noun on
Microsoft Hololens
\noun default
 and Mobile AR applications.
 Therefore when designing the multi-source capabilities, the separation
 of logic between the network and presentation layer is important.

\end_layout

\begin_layout Standard
The way in which the
\noun on
OpenGL
\noun default
 window arranges the figures in it's display space should be defined by
 the
\noun on
OpenGL
\noun default
 window itself.
 The network layer should be display agnostic and not make assumptions about
 how the display will process figures.
\end_layout

\begin_layout Standard
In order to follow this design the transformations were moved to instead
 occur within the
\noun on
OpenGL
\noun default
 window class.
 To allow this the shared variables between the
\noun on
MainWindow
\noun default
 and
\noun on
OpenGL
\noun default
 were changed.
 A Frame structure was defined to wrap an individual point cloud with a
 client ID to allow differentiation, the definition can be seen in appendix

\begin_inset CommandInset ref
LatexCommand ref
reference "subsec:Frame"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
 The structure holds fields for each of the lists previously shared between
 the two objects including a list of vertices (co-ordinates) and the RGB
 values for each as well as the camera poses and bodies.
\end_layout

\begin_layout Standard
The original
\noun on
LiveScan3D
\noun default
 cleared each of these variables before retrieving a new frame, when moving
 to a multi-source architecture the ability to individually update source
 point clouds was prioritised.
 This would avoid blocking the entire display when unable to receive frames
 from a specific client, other clients would still be able to have frames
 updated promptly.
\end_layout

\begin_layout Standard
To accomplish this a dictionary was used as the shared variable with each
 client's frame referenced by it's client ID.
 In doing so only one frame per client is kept and each new frame overrides
 the last.
 During rendering the dictionary is iterated through and each point cloud
 combined.
 During combination a client specific transformation is retrieved from an
 instance of the
\noun on
DisplayFrameTransformer
\noun default
 class.
 This object is a member of the
\noun on
OpenGL
\noun default
 window and is responsible for defining the orientation and position of
 each point cloud.
\end_layout

\begin_layout Subsubsection
DisplayFrameTransformer
\end_layout

\begin_layout Standard
The
\noun on
DisplayFrameTransformer
\noun default
 is responsible for generating transformations for the sources displayed
 within the
\noun on
OpenGL
\noun default
 window, a UML diagram for the class can be seen in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:UML-displayframetransformer"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/DisplayFrameTransformer.png
	lyxscale 50
	width 60col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
UML diagram for
\noun on
DisplayFrameTransformer
\noun default

\begin_inset CommandInset label
LatexCommand label
name "fig:UML-displayframetransformer"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Standard
Each client is assigned a default transformation which can be overridden
 using keyboard controls.
\end_layout

\begin_layout Standard
Clients are initially arranged in a circle around the origin in the center
 of the space.
 This is done by retrieving a transformation from the
\noun on
Transformer
\noun default
 for a rotation in the
\begin_inset Formula $y$
\end_inset

 axis for each client,
\begin_inset Formula $n$
\end_inset

.
 Each angle of rotation,
\begin_inset Formula $\alpha$
\end_inset

, is calculated using the below,
\end_layout

\begin_layout Standard
\begin_inset Formula
\[
\alpha\left(n\right)=\frac{n}{\sum clients}\cdotp360\textdegree
\]

\end_inset


\end_layout

\begin_layout Standard
Similar to the shared variables between the
\noun on
MainWindow
\noun default
 and
\noun on
OpenGL
\noun default
 window, client transformations are stored within a dictionary indexed by
 client ID.
\end_layout

\begin_layout Standard
The
\noun on
DisplayFrameTransformer
\noun default
 also has methods to override these initial transforms with the RotateClient()
 and TranslateClient() methods.
 When these methods are called for the first time on a point cloud, an object
 defining the position and rotation is populated using the default rotation.
 From here the presence of a client override results in applied transforms
 being defined by these values as opposed to the default orientation.
\end_layout

\begin_layout Standard
This leaves the current architecture of the server application as described
 in figure
\begin_inset CommandInset ref
LatexCommand ref
reference "fig:current-state-diagram"
plural "false"
caps "false"
noprefix "false"

\end_inset

.
\end_layout

\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open

\begin_layout Plain Layout
\align center
\begin_inset Graphics
	filename ../media/december-state.png
	lyxscale 30
	width 60col%

\end_inset


\end_layout

\begin_layout Plain Layout
\begin_inset Caption Standard

\begin_layout Plain Layout
Current state of
\noun on
LiveScan
\noun default
 server structure with
\noun on
OpenGL
\noun default
 window-based transformer
\begin_inset CommandInset label
LatexCommand label
name "fig:current-state-diagram"

\end_inset


\end_layout

\end_inset


\end_layout

\begin_layout Plain Layout

\end_layout

\end_inset


\end_layout

\begin_layout Subsubsection
Control Scheme
\end_layout

\begin_layout Standard
The movement of objects within the
\noun on
OpenGL
\noun default
 space is implemented through keyboard controls.
 While using the mouse would allow fine-grained and intuitive control, the
 number of axes for motion and rotation available to objects makes defining
 specific keys for each more flexible.
 This additionally removes the need to redefine or overload the camera controls.
\end_layout

\begin_layout Standard
The
\begin_inset Quotes eld
\end_inset

I
\begin_inset Quotes erd
\end_inset

 key is used to cycle through displayed sources, the currently selected
 source is the subject of each of the movement actions.
 Sources are moved across the horizontal plane (
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none

\begin_inset Formula $x$
\end_inset


\family default
\series default
\shape default
\size default
\emph default
\bar default
\strikeout default
\xout default
\uuline default
\uwave default
\noun default
\color inherit
,
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\strikeout off
\xout off
\uuline off
\uwave off
\noun off
\color none

\begin_inset Formula $z$
\end_inset


\family default
\series default
\shape default
\size default
\emph default
\bar default
\strikeout default
\xout default
\uuline default
\uwave default
\noun default
\color inherit
) of the display space using a WASD-esque layout of the UHJK keys.
 Objects can be rotated about the vertical (
\begin_inset Formula $y$
\end_inset

) axis using the B and N keys.
 Finally the placement of an object can be reset to default using the R
 key, the addition of the shift modifier resets all clients.
\end_layout

\begin_layout Standard
Worth noting is that this represents arbitrary placement of sources in only
 two axes of position and one of rotation.
 This was a conscious choice as these are the most common and intuitive
 axes with which sources will need to be manipulated.
 The ability to allow movement in all axes would require only binding these
 actions to keys.
\end_layout

\begin_layout Standard
There is room to improve these controls as the directions of movement are
 in relation to the fixed axes of the display space as opposed to the view
 of the viewpoint camera.
 In practice this means that when moving objects in the display space the
 orientation of the space must be considered in order to identify which
 direction the object should be moved.

\end_layout

\begin_layout Standard
This is less intuitive than could be expected in other areas where such
 a control scheme is used such as video games or modelling software.
 In such implementations when moving objects the directions are typically
 taken from the camera's frame of reference.
 The feasibility of employing a similar control philosophy should be considered.
\end_layout

\begin_layout Subsubsection
Challenges
\end_layout

\begin_layout Standard
The main challenge encountered throughout the project so far was initially
 intercepting the live frames and serializing these as XML files in local
 storage.
 With no previous experience developing in C#, the opening steps of the
 project involved both getting to grips with the language based on previous
 work in C-like languages (Java, C) and understanding the layout of the
 codebase.
\end_layout

\begin_layout Standard
Initial attempts to serialize the frame structures resulted in no output
 to the file system and the multi-threaded nature of the graphical application
 led to no feedback for debugging.
 This was fixed by removing the affine transformations representing camera
 poses from the frame structure for the testing process.

\end_layout

\begin_layout Standard
This would imply that something about the nature of the
\noun on
AffineTransform
\noun default
 class type is causing errors when serializing.
 Java requires classes implement a
\emph on
serializable
\emph default
 interface in order to successfully save to file and further work will be
 required in order to determine whether the same concept is to blame in
 this situation.
 However for now the camera poses of local frames are not displayed in the

\noun on
OpenGL
\noun default
 window.
\end_layout

\begin_layout Subsubsection
Future Work
\end_layout

\begin_layout Standard
Following the extension of the
\noun on
OpenGL
\noun default
 window, the network layer of the
\noun on
KinectServer
\noun default
 can now be developed with the advantage of a fully functional display method
 for debugging.
\end_layout

\begin_layout Standard
The aim of this aspect of the project will be to alter the the
\noun on
KinectServer
\noun default
 in order to allow the logical grouping of connected clients into sources
 for separate display.
\end_layout

\begin_layout Standard
When integrated together the server as a whole will then be able to collect
 discrete point clouds from different sources and coherently display them
 separately in the space, achieving the objectives for this project.
\end_layout

\begin_layout Subsubsection
Network Layer Design Considerations
\end_layout

\begin_layout Standard
Some thought as to the design for the network layer has been undertaken.
 Although this has not yielded a final design for implementation, it has
 made apparent some of the conditions and constraints which must be considered.
\end_layout

\begin_layout Standard
When considering the initial steps for the project, it was thought that
 the network layer should be developed first.
 The design would involve separating much of the logic contained within
 the
\noun on
KinectServer
\noun default
 object into a new
\noun on
KinectSource
\noun default
 object which would represent a group of clients acting as a single source.
 It would function as a group of
\noun on
KinectSocket
\noun default
s that could be individually polled for new frames using the same interface
 currently being used by the
\noun on
KinectServer
\noun default
.
 The
\noun on
KinectServer
\noun default
 object itself would be reduced to simply managing these
\noun on
KinectSource
\noun default
s.
\end_layout

\begin_layout Standard
An advantage of this approach would be that it provide a suitable location
 to store additional information which should exist per source such as the
 calibration data and settings.
\end_layout

\begin_layout Standard
However it would also have represented a significant architecture change
 in the entire server application and without a functioning display method
 it would have been challenging to debug.
 This was the motivation for initially working on the display method.
\end_layout

\begin_layout Standard
Coming back to the network design following this work, a different design
 has been considered.
 A separate body of work currently being undertaken is investigating the
 network behaviour of the suite with a focus on unblocking the network sockets
 to aid in parallel operation.
\end_layout

\begin_layout Standard
In order to ease integration with developments in this work a less disruptive
 design was proposed.
\end_layout

\begin_layout Paragraph
Socket Handshake
\end_layout

\begin_layout Standard
The aim is to implement a method by which clients are grouped into sources
 that also allows them to identify themselves consistently when communicating
 over multiple sockets.
 Multiple sockets can be used by clients in order to make simultaneous connectio
ns to the server and increase bandwidth.
 However when doing so it is important to be able to identify which sockets
 represent which client when some may be at the IP address.
\end_layout

\begin_layout Standard
A method for doing so would involve a handshake process when new clients
 connect to the
\noun on
KinectServer
\noun default
.
 The proposed handshake would be initiated by the client when connecting
 to the server, at this point they include which source they should be grouped
 with using an integer ID.
 The server groups the socket as such and, if one has not been received,
 responds with a random identifier string that should be used across all
 sockets to identify the client.
 Should the newly connected socket be for a client that is already connected
 then the client will respond with it's existing identifier to inform the
 server that this ID has been ignored.
 In doing so the client now has a method of identifying itself agnostic
 of socket, and the server has a way of identifying the source which each
 socket is representing.
\end_layout

\begin_layout Subsection
Network
\end_layout

\begin_layout Subsection
Mobile AR
\end_layout

\begin_layout Section
Testing Methodology
\end_layout

\begin_layout Section
Results
\end_layout

\begin_layout Section
Summary
\end_layout

\begin_layout Standard
Within this piece the process of extending the
\noun on
LiveScan3D
\noun default
 software to include multi-source holoportation has been introduced.
 The use of such a system has many applications from those inherited from
 traditional 2D video such as conference calls to new utilisations that
 are wholly unique to the environment.
\end_layout

\begin_layout Standard
The literature review contextualises the
\noun on
LiveScan
\noun default
 suite and the wider spaces of AR, VR, 3D video and multi-source holoportation
 itself.
 Previous examples of holoportation are described and their aims of achieving
 telepresence are discussed.
\end_layout

\begin_layout Standard
The current state of the project is laid out showing good progress through
 the required areas of development.
 Of these areas of concern, the display element has been extended in order
 to allow the rendering of multiple environments simultaneously with a dynamic
 sub-system of geometric transformations.
 The transformations are responsive to user input allowing arbitrary placement
 and orientation of individual sources within the display space.
 While this control interface allows free movement in the most naturally
 traversed axes it could use some additional tuning to make it feel more
 intuitive.
\end_layout

\begin_layout Standard
The next steps for the project leading up to its completion are presented,
 the initial and current plans for the remaining work is described and additiona
l stretch goals are defined for any additional time.
 How the work will be presented in a final report is also described.
\end_layout

\begin_layout Section
Conclusions
\end_layout

\begin_layout Standard
Holoportation and multi-source configurations thereof are important technologies
 for cross reality experiences with broad appeal to many applications.
 The use of consumer hardware, specifically the
\noun on
Kinect
\noun default
, has accelerated the space.
\end_layout

\begin_layout Standard
At roughly halfway through the time allowed for this project the native
 display has successfully been extended to meet the deliverable specification.
 This has resulted in the
\noun on
OpenGL
\noun default
 window being capable of simultaneously rendering multiple sources with
 arbitrary placement and orientation within the display space.
\end_layout

\begin_layout Standard
From this point the network layer of the suite will be developed to also
 match the specification, allowing connected clients to be grouped into
 sources for polling and processing.
\end_layout

\begin_layout Standard
Following the development of the two, testing methodologies will be defined
 and carried out to gather quantitative results for the final product.
 A final report on the results will be available in May 2020.
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset bibtex
LatexCommand bibtex
btprint "btPrintCited"
bibfiles "/home/andy/uni/dissertation/references"
options "bibtotoc"

\end_inset


\end_layout

\begin_layout Standard
\start_of_appendix
\begin_inset Flex TODO Note (inline)
status open

\begin_layout Plain Layout
I reckon this is all unnecessary, if any code goes in, it's not struct definitio
ns
\end_layout

\end_inset


\end_layout

\begin_layout Section
Existing Data Structures
\begin_inset CommandInset label
LatexCommand label
name "sec:Existing-Data-Structures"

\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../snippets/point2f.cs"
lstparams "language={[Sharp]C},caption={Cartesian coordinate in 2 dimensions}"

\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../snippets/point3f.cs"
lstparams "language={[Sharp]C},caption={Cartesian coordinate in 3 dimensions}"

\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../snippets/affinetransform.cs"
lstparams "language={[Sharp]C},caption={Affine transformation matrix with translation}"

\end_inset


\end_layout

\begin_layout Section
New Data Structures
\end_layout

\begin_layout Subsection
Frame
\begin_inset CommandInset label
LatexCommand label
name "subsec:Frame"

\end_inset


\end_layout

\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../snippets/frame.cs"
lstparams "language={[Sharp]C},caption={Point cloud with Client ID}"

\end_inset


\end_layout

\end_body
\end_document