From e82e9457dad1c4b4ff22d33cd61829009b4ba07e Mon Sep 17 00:00:00 2001 From: aj Date: Sun, 19 Jan 2020 09:56:04 +0000 Subject: [PATCH] ready for starting final --- dissertation/dissertation.lyx | 2852 +++++++++++++++++ midyear report/midyear.lyx | 2 +- .../references.bib => references.bib | 0 3 files changed, 2853 insertions(+), 1 deletion(-) create mode 100644 dissertation/dissertation.lyx rename midyear report/references.bib => references.bib (100%) diff --git a/dissertation/dissertation.lyx b/dissertation/dissertation.lyx new file mode 100644 index 0000000..eda2069 --- /dev/null +++ b/dissertation/dissertation.lyx @@ -0,0 +1,2852 @@ +#LyX 2.3 created this file. For more info see http://www.lyx.org/ +\lyxformat 544 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass article +\use_default_options true +\begin_modules +customHeadersFooters +minimalistic +todonotes +\end_modules +\maintain_unincluded_children false +\language english +\language_package default +\inputencoding auto +\fontencoding global +\font_roman "utopia" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command biber +\index_command default +\paperfontsize default +\spacing single +\use_hyperref false +\pdf_title "Holoportation" +\pdf_author "Andy Pack" +\pdf_subject "The use of Kinect cameras to stream 3D video from client to server" +\pdf_bookmarks true +\pdf_bookmarksnumbered false +\pdf_bookmarksopen false +\pdf_bookmarksopenlevel 1 +\pdf_breaklinks false +\pdf_pdfborder false +\pdf_colorlinks false +\pdf_backref false +\pdf_pdfusetitle true +\papersize default +\use_geometry true +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine biblatex +\cite_engine_type authoryear +\biblio_style plain +\biblatex_bibstyle ieee +\biblatex_citestyle ieee +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date true +\justification true +\use_refstyle 1 +\use_minted 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\leftmargin 2cm +\topmargin 2cm +\rightmargin 2cm +\bottommargin 2cm +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation skip +\defskip medskip +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 2 +\paperpagestyle fancy +\bullet 1 0 9 -1 +\tracking_changes false +\output_changes false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\end_header + +\begin_body + +\begin_layout Title + +\size giant +Multi-Source Holoportation +\end_layout + +\begin_layout Author +Andy Pack +\end_layout + +\begin_layout Standard +\begin_inset VSpace bigskip +\end_inset + + +\end_layout + +\begin_layout Standard +\align center +\begin_inset Graphics + filename ../surreylogo.png + lyxscale 30 + width 60col% + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace 10pheight% +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center + +\size large +A dissertation submitted to the Department of Electronic Engineering in + partial fulfilment of the Degree of Bachelor of Engineering in Electronic + Engineering. +\end_layout + +\begin_layout Standard +\begin_inset VSpace vfill +\end_inset + + +\end_layout + +\begin_layout Standard +\noindent +\align center +May 2020 +\size large + +\begin_inset Newline newline +\end_inset + +Department of Electrical and Electronic Engineering +\begin_inset Newline newline +\end_inset + +Faculty of Engineering and Physical Sciences +\begin_inset Newline newline +\end_inset + +University of Surrey +\end_layout + +\begin_layout Standard +\begin_inset Newpage newpage +\end_inset + + +\end_layout + +\begin_layout Abstract +abstract +\end_layout + +\begin_layout Standard +\begin_inset CommandInset toc +LatexCommand tableofcontents + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace medskip +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset FloatList figure + +\end_inset + + +\begin_inset CommandInset toc +LatexCommand lstlistoflistings + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset VSpace vfill +\end_inset + + +\end_layout + +\begin_layout Section* +Acknowledgements +\end_layout + +\begin_layout Standard +\noindent +\align center +acknowledgements +\end_layout + +\begin_layout Standard +\begin_inset Newpage newpage +\end_inset + + +\end_layout + +\begin_layout Right Footer +Andy Pack / 6420013 +\end_layout + +\begin_layout Left Footer +January 2020 +\end_layout + +\begin_layout Section +Introduction +\end_layout + +\begin_layout Standard +The aim of this project is to develop a piece of software capable of supporting + multi-source holoportation (hologram teleportation) using the +\emph on +\noun on +LiveScan3D +\emph default +\noun default + +\begin_inset CommandInset citation +LatexCommand cite +key "livescan3d" +literal "false" + +\end_inset + + suite of software as a base. +\end_layout + +\begin_layout Standard +As the spaces of augmented and virtual reality become more commonplace and + mature, the ability to capture and stream 3D renders of objects and people + over the internet using consumer-grade hardware has many possible applications. +\end_layout + +\begin_layout Standard +This represents one of the most direct evolutions of traditional video streaming + when applied to this new technological space. +\end_layout + +\begin_layout Standard +A view of what multi-source achieves can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:premise" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + Both single and multi-view configurations of cameras are shown, the latter + allowing more complete renders of the subject to be acquired. + Both shapes are presented through the +\emph on +user experience +\emph default +, control schemes and visual language can vary between implementations across + AR/VR and traditional 2D displays. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../media/premise.png + lyxscale 30 + width 70col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Demonstration of a multi-source holoportation system including single and + multiple view camera configurations +\begin_inset CommandInset label +LatexCommand label +name "fig:premise" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard + +\noun on +LiveScan3D +\noun default + is a suite of 3D video software capable of recording and transmitting video + from client to server for rendering. + The suite is fast and uses consumer grade hardware for capture in the form + of +\noun on +Xbox Kinect +\noun default + cameras, it is used in various projects at the +\noun on +University of Surrey +\noun default + and has multiple setups in dedicated lab space. +\end_layout + +\begin_layout Standard + +\noun on +LiveScan3D's +\noun default + use +\noun on + +\noun default +of +\noun on +Xbox Kinect +\noun default + cameras allows the capture and stream of 3D renders in single or multi-view + configurations using calibrated cameras however the server is only able + to process and reconstruct one environment at a time. +\end_layout + +\begin_layout Standard +The capability to concurrently receive and reconstruct streams of different + objects further broadens the landscape of possible applications, analogous + to the movement from traditional phone calls to conference calling. +\end_layout + +\begin_layout Section +Literature Review +\end_layout + +\begin_layout Standard +The significance of 3D video like that captured and relayed using the +\noun on +LiveScan +\noun default + suite is related to the development of new technologies able to immersively + display such video content. + Therefore before discussing the specific extension that this project will + make to the +\noun on +LiveScan +\noun default + software it is important to contextualise it within the space of 3D video + capture while also considering it's implications for AR and VR applications. +\end_layout + +\begin_layout Subsection +Cross Reality (XR) +\end_layout + +\begin_layout Standard +Cross reality is a broad term describing the combination of technology with + a user's experience of their surroundings in order to alter the experience + of reality. + It is used as an umbrella term for virtual, mixed and augmented reality + experiences and technology. + Before continuing, the differences between these technologies is considered. +\end_layout + +\begin_layout Description +Virtual The replacement of a user's experience of their surroundings, rendering + a new space that the user appears to inhabit. + Typically achieved through face mounted headsets ( +\emph on +Facebook Oculus, HTC Vive, Playstation VR, Valve Index +\emph default +). +\end_layout + +\begin_layout Description +Augmented The augmentation of a users surroundings by overlaying the environment + with digital alterations. + Can be achieved with translucent/transparent headsets +\emph on +(Microsoft Hololens, Google Glass) +\emph default + or through mobile experiences +\emph on +(Android ARCore, Apple ARKit) +\emph default + both when head mounted +\emph on +(Google Cardboard, Google Daydream, Samsung Gear VR) +\emph default + and handheld +\emph on +(Pokemon GO) +\emph default +. +\end_layout + +\begin_layout Description +Mixed A combination of virtual and augmented elements in order to allow + interaction with an augmented reality. + Can be achieved in different ways typically starting with either a typical + AR or VR experience and including aspects of the other. + At a higher level, mixed reality can be described as a continuous scale + between the entirely real and entirely virtual with augmented reality occurring + in between. +\end_layout + +\begin_layout Standard +The burgeoning of these three forms of XR via consumer hardware such as + the +\noun on +Microsoft Hololens +\noun default + and +\noun on +Oculus Rift +\noun default + represents a new space for the consumption of interactive media experiences. +\end_layout + +\begin_layout Standard +Although VR and AR headsets have accelerated the development of XR technology, + they are not the only way to construct XR experiences. + +\begin_inset CommandInset citation +LatexCommand citeauthor +key "roomalive" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "roomalive" +literal "false" + +\end_inset + + demonstrate +\emph on +RoomAlive +\emph default +, an AR experience using depth cameras and projectors (refereed to as +\emph on +procams +\emph default +) to construct experiences in any room. + This is presented through games and visual alterations to the users surrounding +s. + A strength of the system is it's self contained nature, able to automatically + calibrate the camera arrangements using correspondences found between each + view. + Experience level heuristics are also discussed regarding capturing and + maintaining user attention in an environment where the experience can be + occurring anywhere, including behind the user . + +\end_layout + +\begin_layout Standard +A point is also made about how the nature of this room based experience + breaks much of the typical game-user interaction established by virtual + reality and video games. + In contrast to traditional and virtual reality game experiences where the + game is ultimately in control of the user or user avatar, AR experiences + of this type have no physical control over the user and extra considerations + must be made when designing such systems. +\end_layout + +\begin_layout Standard +Traditional media consumption is not the only area of interest for developing + interactive experiences, an investigation into the value of AR and VR for + improving construction safety is presented by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "ar/vr-construction" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "ar/vr-construction" +literal "false" + +\end_inset + +. + A broad look at the applicability is taken with assessments including VR + experiences for developing worker balance to aid in working at elevation + and AR experiences incorporated into the workplace for aiding in task sequencin +g to reduce the effect of memory on safety. +\end_layout + +\begin_layout Standard +\begin_inset CommandInset citation +LatexCommand citeauthor +key "remixed-reality" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "remixed-reality" +literal "false" + +\end_inset + + demonstrate an example of mixed reality through the use of +\noun on +Kinect +\noun default + cameras and a virtual reality headset. + Users are placed in a virtual space constructed from 3D renders of the + physical environment around the user. + Virtual manipulation of the space can then be achieved with visual, spatial + and temporal changes supported. + Objects can be scaled and sculpted in realtime while the environment can + be paused and rewinded. + The strength of mixed reality comes with the immersion of being virtually + placed in a version of the physical surroundings, tactile feedback from + the environment compounds this. +\end_layout + +\begin_layout Subsection +Kinect and RGB-D Cameras +\end_layout + +\begin_layout Standard +Initially designed as a motion control accessory for the +\noun on +Xbox +\noun default +, the +\noun on +Kinect +\noun default + is a series of depth aware cameras produced my +\noun on +Microsoft +\noun default +. + The device uses additional infrared lights and sensors alongside an RGB + camera in a configuration referred to as a time of flight camera to generate + 3D renders of a surroundings. + The device also includes motion tracking and skeleton isolation for figures + in view. +\end_layout + +\begin_layout Standard +Following the release of an SDK for Windows in 2012, +\begin_inset CommandInset citation +LatexCommand citeauthor +key "original-kinect-microsoft" +literal "false" + +\end_inset + + at +\noun on +Microsoft Research +\noun default + reflects on the original camera's capabilities and the applications to + computer vision research in +\begin_inset CommandInset citation +LatexCommand cite +key "original-kinect-microsoft" +literal "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +Here 3D conference calling of the type described in the introduction without + AR or VR applications is presented, instead users watch a composite conference + space on a screen with all participants rendered within. + Work was undertaken to achieve mutual gaze between participants, a marked + advantage over traditional conference calls where the lack of such aspects + of group interaction make the experience more impersonal. + Methods of achieving more natural virtual interactions or +\emph on +telepresence +\emph default + are covered in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Holoportation-and-Telepresence" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +A second version of the camera, v2, was released alongside the +\noun on +Xbox One +\noun default + in 2013 and presented many improvements over the original. + A higher quality RGB camera captures 1080p video at up to 30 frames per + second with a wider field of view than the original +\begin_inset CommandInset citation +LatexCommand cite +key "kinect-specs" +literal "false" + +\end_inset + +. + The physical capabilities of the camera are discussed by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "new-kinect" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "new-kinect" +literal "false" + +\end_inset + +. + The second version of the camera was found to gather more accurate depth + data than the original and was less sensitive to daylight. + +\begin_inset CommandInset citation +LatexCommand citeauthor +key "kinectv1/v2-accuracy-precision" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "kinectv1/v2-accuracy-precision" +literal "false" + +\end_inset + + found similar results with the v2 achieving higher accuracy results over + the original. + The second version did, however, achieve lower precision results than the + v1 with recommendations made to include pre-processing on acquired depth + images to control for random noise, +\emph on +flying pixels +\emph default + and +\emph on +multipath interference +\emph default +. +\end_layout + +\begin_layout Standard +The +\noun on +Kinect +\noun default + is used successfully by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "greenhouse-kinect" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "greenhouse-kinect" +literal "false" + +\end_inset + + for object detection in the context of an autonomous vehicle navigating + a greenhouse. + The depth information was used in conjunction with the RGB information + to identify obstacles, while the paper lays out some limitations of the + camera it was found to be effective in it's aim and was capable of running + on a reasonable computer. +\end_layout + +\begin_layout Standard +This second iteration on the +\noun on +Kinect +\noun default + is frequently used in computer vision experiments with many of the works + cited here using it for acquisition. +\end_layout + +\begin_layout Subsection +Holoportation and Telepresence +\begin_inset CommandInset label +LatexCommand label +name "subsec:Holoportation-and-Telepresence" + +\end_inset + + +\end_layout + +\begin_layout Standard +The term Holoportation is defined and exemplified in a +\noun on +Microsoft Research +\noun default + paper by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "holoportation" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "holoportation" +literal "false" + +\end_inset + + where an end-to-end pipeline is laid out for the acquisition, transmission + and display of 3D video facilitating real-time AR and VR experiences. + The +\noun on +Microsoft Research +\noun default + paper builds on works including by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "Immersive-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "Immersive-telepresence" +literal "false" + +\end_inset + + 2 years earlier which describes attempts at achieving +\begin_inset Quotes eld +\end_inset + + +\emph on +telepresence +\emph default + +\begin_inset Quotes erd +\end_inset + +, a term coined by Marvin Minksy to describe the transparent and intuitive + remote control of robot arms as if they were the controllers own +\begin_inset CommandInset citation +LatexCommand cite +key "marvin-minksy" +literal "false" + +\end_inset + +. + The term was broadened by Bill Buxton +\begin_inset CommandInset citation +LatexCommand cite +key "buxton-telepresence" +literal "false" + +\end_inset + + to include the space of telecommunications to describe technology being + used to make someone feel present in a different environment. + In the context of holoportation this is through the use of 3D video reconstruct +ion. + The aforementioned work by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "Immersive-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "Immersive-telepresence" +literal "false" + +\end_inset + + used 10 +\noun on +Kinect +\noun default + cameras to capture a room before virtually reconstructing the models. + +\end_layout + +\begin_layout Standard +In service of demonstrating it's applicability to achieving +\emph on +telepresence +\emph default +, a figure was isolated from the surroundings and stereoscopically rear-projecte +d onto a screen for a single participant, a result of this can be seen in + figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:stereoscopic" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/telepresence-stereoscopic.png + lyxscale 30 + width 40col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +An example of stereoscopic projection of depth aware footage captured by + +\begin_inset CommandInset citation +LatexCommand citeauthor +key "Immersive-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "Immersive-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset label +LatexCommand label +name "fig:stereoscopic" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +The +\noun on +Microsoft Research +\noun default + paper demonstrates a system using 8 cameras surrounding a space. + Each camera captured both Near Infra-Red and colour images to construct + a colour-depth video stream, a more complex camera configuration than in + the others cited. +\end_layout + +\begin_layout Standard +\begin_inset CommandInset citation +LatexCommand citeauthor +key "velt" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "velt" +literal "false" + +\end_inset + + demonstrates a similar holoportation experience to +\noun on +LiveScan3D +\noun default + capable of supporting multi-view configurations, it also supports both + point clouds and meshes. + Calibrating multiple view points is completed using the extrinsics and + intrinsics of the camera. + The extrinsics are the relative positions of each +\noun on +Kinect +\noun default + camera while the intrinsics describe the internal properties of each camera, + the focal length and optical centre. + +\end_layout + +\begin_layout Standard +The intrinsics of the +\noun on +Kinect +\noun default + camera can be retrieved from the +\noun on +Kinect +\noun default + SDK while the extrinsics are obtained in one of two ways. + Extrinsics can be imported and parsed from XML for manual selection or + estimated using +\noun on +OpenCV +\noun default + and a checkerboard pattern. + When considering holoportation systems of this kind, comparatively few + implement multiple views as a result the increased complexity involved + in calibration. +\end_layout + +\begin_layout Subsection +Multi-Source Holoportation +\end_layout + +\begin_layout Standard +The space of multi-source holoportation has been explored by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "group-to-group-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "group-to-group-telepresence" +literal "false" + +\end_inset + + in the context of shared architectural design spaces in virtual reality + similar to a conference call. + Two groups of people were captured in 3D using clusters of +\noun on +Kinect +\noun default + cameras before having these renders transmitted to the other group. + Each group reconstructs the other's render for display in virtual reality + in conjunction with their own. + In doing so a shared virtual space for the two groups has been created + and it can be seen to implement the process of holoportation. + The strength of the system as a shared architectural design experience + is emergent of the semantics of the virtual space where a World in Miniature + (WIM) metaphor is used. +\end_layout + +\begin_layout Standard +The Worlds in Miniature is described by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "wim" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "wim" +literal "false" + +\end_inset + + as a set of interfaces between the user and the virtual space they experience + using tactile and visual tools. + The interface involves providing the user with a miniature render of the + world they are inhabiting that can interacted with in order to affect the + full scale environment around them. +\end_layout + +\begin_layout Standard +This navigation tool maps well to +\begin_inset CommandInset citation +LatexCommand citeauthor +key "group-to-group-telepresence" +literal "false" + +\end_inset + +'s +\begin_inset CommandInset citation +LatexCommand cite +key "group-to-group-telepresence" +literal "false" + +\end_inset + + architecture groupware design, an image captured during the work can be + seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:World-in-Miniature-group-by-group" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/group-by-group.png + lyxscale 30 + width 50col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +World in Miniature render demonstrated in a multi-source holoportation context + by +\begin_inset CommandInset citation +LatexCommand citeauthor +key "group-to-group-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset citation +LatexCommand cite +key "group-to-group-telepresence" +literal "false" + +\end_inset + + +\begin_inset CommandInset label +LatexCommand label +name "fig:World-in-Miniature-group-by-group" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Section +LiveScan3D +\end_layout + +\begin_layout Standard + +\noun on +LiveScan3D +\noun default + is a suite of software developed by Marek Kowalski, Jacek Naruniec and + Michal Daniluk of the Warsaw University of Technology in 2015 +\begin_inset CommandInset citation +LatexCommand cite +key "livescan3d" +literal "false" + +\end_inset + +. + The suite utilises the +\noun on +Xbox Kinect +\noun default + v2 camera to record and transmit 3D renders over an IP network. + A server can manage multiple clients simultaneously and is responsible + for processing, reconstructing and displaying the renderings in real-time. +\end_layout + +\begin_layout Standard +These renderings take the form of a point cloud, a collection of 3D co-ordinates + indicating the position of each voxel (3D pixel) and it's associated RGB + colour value. + As a result of it's analogous nature to a traditional frame of 2D video, + the terms +\begin_inset Quotes eld +\end_inset + +render +\begin_inset Quotes erd +\end_inset + +, +\begin_inset Quotes eld +\end_inset + +point cloud +\begin_inset Quotes erd +\end_inset + + and +\begin_inset Quotes eld +\end_inset + +frame +\begin_inset Quotes erd +\end_inset + + are used interchangeably from here. +\end_layout + +\begin_layout Standard +The majority of the development being conducted in this project is regarding + the server component of the software and as such this is covered in more + detail. +\end_layout + +\begin_layout Subsection + +\noun on +LiveScan +\noun default + Client +\end_layout + +\begin_layout Standard +The +\noun on +LiveScan +\noun default + Client is responsible for interfacing with the +\noun on +Kinect +\noun default + sensor via the +\noun on +Kinect +\noun default + v2 SDK and transmitting frames to the +\noun on +LiveScan +\noun default + Server. + Body detection takes place client side, as does calibration when using + multiple sensors. +\end_layout + +\begin_layout Standard +Only one +\noun on +Kinect +\noun default + sensor can be connected to each computer as a result of the SDK's restrictions. + A system used by multiple clients in this way lends itself well to multi-source + configurations over the internet. +\end_layout + +\begin_layout Subsection + +\noun on +LiveScan +\noun default + Server +\end_layout + +\begin_layout Standard +The server component of the +\noun on +LiveScan +\noun default + suite is responsible for managing and receiving 3D renders from connected + clients. + These renders are reconstructed in an interactive +\noun on +OpenGL +\noun default +window. + When considering the code architecture of this application there are three + main components. + +\end_layout + +\begin_layout Description +OpenGLWindow Presentation layer of the application. + Separate window spawned by the +\noun on +LiveScanServer +\noun default + responsible for drawing point clouds and responding to user control. + +\end_layout + +\begin_layout Description +KinectServer Network layer of the application. + The main window make requests of this component to receive transmitted + point clouds. +\end_layout + +\begin_layout Description +KinectSocket +\noun on + +\noun default +Child objects contained within the +\noun on +KinectServer +\noun default +. + A traditional network socket object representing a single TCP connection + between the server and a client. +\end_layout + +\begin_layout Standard +This structure can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:server-structure" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/initial-state.png + lyxscale 30 + width 50col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Initial architecture of the +\noun on +LiveScan3D +\noun default + server +\begin_inset CommandInset label +LatexCommand label +name "fig:server-structure" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Received frames in the form of lists of vertices, RGB values, camera poses + and bodies overwrite shared variables between the main window and the +\noun on +OpenGL +\noun default + window. +\end_layout + +\begin_layout Subsection +Calibration & Multi-View Configurations +\end_layout + +\begin_layout Standard +When using a single client setup, frames are transmitted in their own coordinate + space with the origin defined as the +\noun on +Kinect +\noun default + camera and the captured scene rendered in front of it. +\end_layout + +\begin_layout Standard +When using multiple sensors, the server would be unable to combine these + unique Euclidean spaces without knowledge of the sensors positions relative + to each other, the extrinsics of the system. +\end_layout + +\begin_layout Standard +In order to make a composite frame a calibration process is completed client + side following instruction by the server. +\end_layout + +\begin_layout Standard +Calibration is completed in two steps, an initial estimation followed by + a refinement process. + The initial estimation is completed by informing the server of which calibratio +n marker layouts are being used within the space. + Client's identify possible visible markers like that seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:calibration-marker" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + using thresholding. + Following this identification, the location of the marker can be found + within the sensors coordinate system. + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename /home/andy/uni/dissertation/media/calibration.png + lyxscale 30 + width 20col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Example marker used within the LiveScan3D calibration process +\begin_inset CommandInset label +LatexCommand label +name "fig:calibration-marker" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +This information can be used to transform points from the cameras coordinate + system to the markers frame of reference. + As the relative locations of different markers are defined at the server, + a world coordinate system can be defined as the centre of these markers. + Typically 4 different markers are placed on the faces around the vertical + axis of a cuboid allowing views in 360°. +\end_layout + +\begin_layout Standard +This world coordinate space has shifted the origin from being the position + of the single +\noun on +Kinect +\noun default + sensor to being a point in the centre of the calibration markers that each + camera now orbits. + As part of this calibration process the server distributes transformations + to each client defining where they sit within this world coordinate space. + Client's can now transform acquired renders from their own frame of reference + to the world coordinate system at the point of capture and each point cloud + can be merged coherently. +\end_layout + +\begin_layout Standard +The refinement process is completed server side by requesting a single frame + from each connected client and using Iterative Closest Points +\begin_inset CommandInset citation +LatexCommand cite +key "ICP" +literal "false" + +\end_inset + + (ICP) to improve the inter-camera relationships. +\end_layout + +\begin_layout Standard +The +\noun on +OpenGL +\noun default + display space has it's origin within the centre of the visible box, this + means that for single sensor setups this is also the location of the camera. +\end_layout + +\begin_layout Subsection +Design Considerations +\end_layout + +\begin_layout Standard +When assessing +\noun on +LiveScan +\noun default +'s suitability for extension to a multi-source context, the original network + design should be investigated. +\end_layout + +\begin_layout Standard +The original applications were best suited to a local environment as a result + of many of the network functions being blocking. + Should any delays or interruptions have occurred during a network operation, + then the application would need to stop and wait for remediation before + continuing. + Interruptions of this type are more common when moving from a LAN environment + to communicating over the open internet. +\end_layout + +\begin_layout Standard +From a network perspective the need to make these actions non-blocking would + present benefits for both multi-source and multi-view configurations. +\end_layout + +\begin_layout Standard +Additionally, the network polling rates are higher than the frame rate of + the produced video, when the server requests a frame before a new one has + been captured by the client, the same previous frame is resent. + This presents unnecessary bandwidth usage. +\end_layout + +\begin_layout Standard +Moving to a multi-source context implies transmitting over the internet + as opposed to local operation, this will make blocking actions and bloated + bandwidth more dangerous to user experience. +\end_layout + +\begin_layout Standard +Work has been undertaken that allows multiple concurrent TCP connections + to be used by each client to increase bandwidth. + Further work is being undertaken to un-block network actions. +\end_layout + +\begin_layout Section +Methodology and Developments +\end_layout + +\begin_layout Standard +The required development to take the existing +\noun on +LiveScan +\noun default + codebase to the desired multi-source result can be split into two areas + of concern. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\series bold +\emph on +Network +\series default +\emph default + The network layer of the +\noun on +LiveScan +\noun default + server must be updated in order to accommodate multiple clients logically + grouped into +\begin_inset Quotes eld +\end_inset + +sources +\begin_inset Quotes erd +\end_inset + + for which separate frames are collected for display. +\end_layout + +\begin_layout Labeling +\labelwidthstring 00.00.0000 + +\series bold +\emph on +Display +\series default +\emph default + Finally the display element of the server should be extended to allow the + simultaneous presentation of multiple point clouds. + These objects should be individually arrangeable in the display space allowing + both movement and rotation. +\end_layout + +\begin_layout Standard +As of January 2020 the native method for displaying renderings, the server's + +\noun on +OpenGL +\noun default + window, has been modified such that it can construct and render point clouds + from multiple sources. + To do so a dynamic sub-system of geometric transformations has been written + in order to coherently arrange sources within the space when reconstructed. + The default arrangements can be overridden with keyboard controls facilitating + arbitrary placement and rotation of separate sources within the window's + co-ordinate space. +\end_layout + +\begin_layout Subsection +Geometric Transformations +\end_layout + +\begin_layout Standard +Within the +\noun on +LiveScan3D +\noun default + server source code are utility structures and classes which were extended + in order to develop a wider geometric manipulation system. + Structures defining Cartesian coordinates in both 2D and 3D spaces called + +\noun on +Point2f +\noun default + and +\noun on +Point3f +\noun default + respectively are used in drawing skeletons as captured by the +\noun on +Kinect +\noun default + camera. + There is also a class defining an affine transformation, the definitions + for all three can be seen in appendix +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Existing-Data-Structures" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +Affine transformations are a family of geometric transformations that preserve + parallel lines within geometric spaces. + Some examples of affine transformations include scaling, reflection, rotation, + translation and shearing. +\end_layout + +\begin_layout Standard +The class definition is made up of a three-by-three transformation matrix + and single 3D vector for translation, within the native codebase it is + used for both camera poses and world transformations. + +\end_layout + +\begin_layout Standard +A camera pose is the affine transformation defining the position and orientation + of the +\noun on +Kinect +\noun default + camera when drawn in the +\noun on +OpenGL +\noun default + space as a green cross. + The world transformations are used as part of the calibration process when + using multi-view configurations. + +\end_layout + +\begin_layout Standard +When considering how each source's render would be arranged in the space, + the use of this class definition was extended. + As the use of affine transformations is mostly limited to use as a data + structure within the base source code, some utility classes and functions + were required in order to fully maximise their effectiveness. +\end_layout + +\begin_layout Subsubsection +Transformer +\end_layout + +\begin_layout Standard +The motivation in writing the +\noun on +Transformer +\noun default + was to create a generic framework of geometric transformations that could + be utilised by the +\noun on +OpenGL +\noun default + display to arrange separate point clouds. + At a high level this is done by implementing matrix arithmetic functions + in the context of their use for applying linear transformations to Cartesian + coordinates. + +\end_layout + +\begin_layout Standard +The +\noun on +Transformer +\noun default + class has static methods to apply +\noun on +AffineTransform +\noun default +s to both +\noun on +Point3f +\noun default + structures and lists of raw vertices as received from +\noun on +LiveScan +\noun default + clients. +\end_layout + +\begin_layout Standard +Additionally there are utility functions to bidirectionally cast between + +\noun on +Point3f +\noun default + data structures and the lists of received vertices. +\end_layout + +\begin_layout Standard +Finally static methods generate common rotation transformations about each + axis given an arbitrary angle. + This provided a foundation on which to define how the +\noun on +OpenGL +\noun default + space would arrange separate sources within it's combined co-ordinate space. +\end_layout + +\begin_layout Standard +Currently missing is the ability to combine transformations into compound + matrices. + Applying multiple transformations to large numbers of coordinates would + be more computationally expensive than applying one compound matrix and + when running in realtime this should be considered. + This is not yet included due to the current lack of need to apply multiple + successive transformations. + If the need were to arise following further refinements, it would be implemente +d as described here. +\end_layout + +\begin_layout Subsection +Separation of Network and Presentation Layer +\end_layout + +\begin_layout Standard +During initial testing frames received from a live sensor were intercepted + and serialized to XML in local storage. + These frames were loaded into memory as the server was started and merged + with those received live before display. +\end_layout + +\begin_layout Standard +The composite frame can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Initial-composite-frame" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/pretransform.jpg + lyxscale 10 + width 50col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Initial multi-source composite testing frame +\begin_inset CommandInset label +LatexCommand label +name "fig:Initial-composite-frame" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +The objects can be seen to be occupying the same space due to their similar + positions in the frame during capture. + This is not a sufficient solution for displaying separate sources and so + geometric transformations like those described above were employed to separate + the two. + The change in software structure at this stage can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Initial-testing-layout" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + A rotation of 180° in the vertical ( +\begin_inset Formula $y$ +\end_inset + +) axis pivoted the frames such that they faced those being received live, + the results can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:180-degree-rotation" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/local-testing.png + lyxscale 30 + width 70col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Initial testing process transforming frames loaded from local storage +\begin_inset CommandInset label +LatexCommand label +name "fig:Initial-testing-layout" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/180flip.jpg + lyxscale 10 + width 50col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Composite testing frame following 180° rotation of recorded source in +\begin_inset Formula $y$ +\end_inset + + axis +\begin_inset CommandInset label +LatexCommand label +name "fig:180-degree-rotation" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +At this point it was noted that transforming and arranging figures within + the main window before passing the +\noun on +OpenGL +\noun default + window a complete point cloud spreads responsibility for the display logic + to the main window. +\end_layout + +\begin_layout Standard + +\noun on +LiveScan3D +\noun default + is capable of supporting more display methods than just the native +\noun on +OpenGL +\noun default + implementation with versions available for both +\noun on +Microsoft Hololens +\noun default + and Mobile AR applications. + Therefore when designing the multi-source capabilities, the separation + of logic between the network and presentation layer is important. + +\end_layout + +\begin_layout Standard +The way in which the +\noun on +OpenGL +\noun default + window arranges the figures in it's display space should be defined by + the +\noun on +OpenGL +\noun default + window itself. + The network layer should be display agnostic and not make assumptions about + how the display will process figures. +\end_layout + +\begin_layout Standard +In order to follow this design the transformations were moved to instead + occur within the +\noun on +OpenGL +\noun default + window class. + To allow this the shared variables between the +\noun on +MainWindow +\noun default + and +\noun on +OpenGL +\noun default + were changed. + A Frame structure was defined to wrap an individual point cloud with a + client ID to allow differentiation, the definition can be seen in appendix + +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Frame" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + The structure holds fields for each of the lists previously shared between + the two objects including a list of vertices (co-ordinates) and the RGB + values for each as well as the camera poses and bodies. +\end_layout + +\begin_layout Standard +The original +\noun on +LiveScan3D +\noun default + cleared each of these variables before retrieving a new frame, when moving + to a multi-source architecture the ability to individually update source + point clouds was prioritised. + This would avoid blocking the entire display when unable to receive frames + from a specific client, other clients would still be able to have frames + updated promptly. +\end_layout + +\begin_layout Standard +To accomplish this a dictionary was used as the shared variable with each + client's frame referenced by it's client ID. + In doing so only one frame per client is kept and each new frame overrides + the last. + During rendering the dictionary is iterated through and each point cloud + combined. + During combination a client specific transformation is retrieved from an + instance of the +\noun on +DisplayFrameTransformer +\noun default + class. + This object is a member of the +\noun on +OpenGL +\noun default + window and is responsible for defining the orientation and position of + each point cloud. +\end_layout + +\begin_layout Subsection +DisplayFrameTransformer +\end_layout + +\begin_layout Standard +The +\noun on +DisplayFrameTransformer +\noun default + is responsible for generating transformations for the sources displayed + within the +\noun on +OpenGL +\noun default + window, a UML diagram for the class can be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:UML-displayframetransformer" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/DisplayFrameTransformer.png + lyxscale 50 + width 60col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +UML diagram for +\noun on +DisplayFrameTransformer +\noun default + +\begin_inset CommandInset label +LatexCommand label +name "fig:UML-displayframetransformer" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Each client is assigned a default transformation which can be overridden + using keyboard controls. +\end_layout + +\begin_layout Standard +Clients are initially arranged in a circle around the origin in the center + of the space. + This is done by retrieving a transformation from the +\noun on +Transformer +\noun default + for a rotation in the +\begin_inset Formula $y$ +\end_inset + + axis for each client, +\begin_inset Formula $n$ +\end_inset + +. + Each angle of rotation, +\begin_inset Formula $\alpha$ +\end_inset + +, is calculated using the below, +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +\alpha\left(n\right)=\frac{n}{\sum clients}\cdotp360\textdegree +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +Similar to the shared variables between the +\noun on +MainWindow +\noun default + and +\noun on +OpenGL +\noun default + window, client transformations are stored within a dictionary indexed by + client ID. +\end_layout + +\begin_layout Standard +The +\noun on +DisplayFrameTransformer +\noun default + also has methods to override these initial transforms with the RotateClient() + and TranslateClient() methods. + When these methods are called for the first time on a point cloud, an object + defining the position and rotation is populated using the default rotation. + From here the presence of a client override results in applied transforms + being defined by these values as opposed to the default orientation. +\end_layout + +\begin_layout Standard +This leaves the current architecture of the server application as described + in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:current-state-diagram" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\align center +\begin_inset Graphics + filename ../media/december-state.png + lyxscale 30 + width 60col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Current state of +\noun on +LiveScan +\noun default + server structure with +\noun on +OpenGL +\noun default + window-based transformer +\begin_inset CommandInset label +LatexCommand label +name "fig:current-state-diagram" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsection +Control Scheme +\end_layout + +\begin_layout Standard +The movement of objects within the +\noun on +OpenGL +\noun default + space is implemented through keyboard controls. + While using the mouse would allow fine-grained and intuitive control, the + number of axes for motion and rotation available to objects makes defining + specific keys for each more flexible. + This additionally removes the need to redefine or overload the camera controls. +\end_layout + +\begin_layout Standard +The +\begin_inset Quotes eld +\end_inset + +I +\begin_inset Quotes erd +\end_inset + + key is used to cycle through displayed sources, the currently selected + source is the subject of each of the movement actions. + Sources are moved across the horizontal plane ( +\family roman +\series medium +\shape up +\size normal +\emph off +\bar no +\strikeout off +\xout off +\uuline off +\uwave off +\noun off +\color none + +\begin_inset Formula $x$ +\end_inset + + +\family default +\series default +\shape default +\size default +\emph default +\bar default +\strikeout default +\xout default +\uuline default +\uwave default +\noun default +\color inherit +, +\family roman +\series medium +\shape up +\size normal +\emph off +\bar no +\strikeout off +\xout off +\uuline off +\uwave off +\noun off +\color none + +\begin_inset Formula $z$ +\end_inset + + +\family default +\series default +\shape default +\size default +\emph default +\bar default +\strikeout default +\xout default +\uuline default +\uwave default +\noun default +\color inherit +) of the display space using a WASD-esque layout of the UHJK keys. + Objects can be rotated about the vertical ( +\begin_inset Formula $y$ +\end_inset + +) axis using the B and N keys. + Finally the placement of an object can be reset to default using the R + key, the addition of the shift modifier resets all clients. +\end_layout + +\begin_layout Standard +Worth noting is that this represents arbitrary placement of sources in only + two axes of position and one of rotation. + This was a conscious choice as these are the most common and intuitive + axes with which sources will need to be manipulated. + The ability to allow movement in all axes would require only binding these + actions to keys. +\end_layout + +\begin_layout Standard +There is room to improve these controls as the directions of movement are + in relation to the fixed axes of the display space as opposed to the view + of the viewpoint camera. + In practice this means that when moving objects in the display space the + orientation of the space must be considered in order to identify which + direction the object should be moved. + +\end_layout + +\begin_layout Standard +This is less intuitive than could be expected in other areas where such + a control scheme is used such as video games or modelling software. + In such implementations when moving objects the directions are typically + taken from the camera's frame of reference. + The feasibility of employing a similar control philosophy should be considered. +\end_layout + +\begin_layout Subsection +Challenges +\end_layout + +\begin_layout Standard +The main challenge encountered throughout the project so far was initially + intercepting the live frames and serializing these as XML files in local + storage. + With no previous experience developing in C#, the opening steps of the + project involved both getting to grips with the language based on previous + work in C-like languages (Java, C) and understanding the layout of the + codebase. +\end_layout + +\begin_layout Standard +Initial attempts to serialize the frame structures resulted in no output + to the file system and the multi-threaded nature of the graphical application + led to no feedback for debugging. + This was fixed by removing the affine transformations representing camera + poses from the frame structure for the testing process. + +\end_layout + +\begin_layout Standard +This would imply that something about the nature of the +\noun on +AffineTransform +\noun default + class type is causing errors when serializing. + Java requires classes implement a +\emph on +serializable +\emph default + interface in order to successfully save to file and further work will be + required in order to determine whether the same concept is to blame in + this situation. + However for now the camera poses of local frames are not displayed in the + +\noun on +OpenGL +\noun default + window. +\end_layout + +\begin_layout Subsection +Future Work +\end_layout + +\begin_layout Standard +Following the extension of the +\noun on +OpenGL +\noun default + window, the network layer of the +\noun on +KinectServer +\noun default + can now be developed with the advantage of a fully functional display method + for debugging. +\end_layout + +\begin_layout Standard +The aim of this aspect of the project will be to alter the the +\noun on +KinectServer +\noun default + in order to allow the logical grouping of connected clients into sources + for separate display. +\end_layout + +\begin_layout Standard +When integrated together the server as a whole will then be able to collect + discrete point clouds from different sources and coherently display them + separately in the space, achieving the objectives for this project. +\end_layout + +\begin_layout Subsection +Network Layer Design Considerations +\end_layout + +\begin_layout Standard +Some thought as to the design for the network layer has been undertaken. + Although this has not yielded a final design for implementation, it has + made apparent some of the conditions and constraints which must be considered. +\end_layout + +\begin_layout Standard +When considering the initial steps for the project, it was thought that + the network layer should be developed first. + The design would involve separating much of the logic contained within + the +\noun on +KinectServer +\noun default + object into a new +\noun on +KinectSource +\noun default + object which would represent a group of clients acting as a single source. + It would function as a group of +\noun on +KinectSocket +\noun default +s that could be individually polled for new frames using the same interface + currently being used by the +\noun on +KinectServer +\noun default +. + The +\noun on +KinectServer +\noun default + object itself would be reduced to simply managing these +\noun on +KinectSource +\noun default +s. +\end_layout + +\begin_layout Standard +An advantage of this approach would be that it provide a suitable location + to store additional information which should exist per source such as the + calibration data and settings. +\end_layout + +\begin_layout Standard +However it would also have represented a significant architecture change + in the entire server application and without a functioning display method + it would have been challenging to debug. + This was the motivation for initially working on the display method. +\end_layout + +\begin_layout Standard +Coming back to the network design following this work, a different design + has been considered. + A separate body of work currently being undertaken is investigating the + network behaviour of the suite with a focus on unblocking the network sockets + to aid in parallel operation. +\end_layout + +\begin_layout Standard +In order to ease integration with developments in this work a less disruptive + design was proposed. +\end_layout + +\begin_layout Subsubsection +Socket Handshake +\end_layout + +\begin_layout Standard +The aim is to implement a method by which clients are grouped into sources + that also allows them to identify themselves consistently when communicating + over multiple sockets. + Multiple sockets can be used by clients in order to make simultaneous connectio +ns to the server and increase bandwidth. + However when doing so it is important to be able to identify which sockets + represent which client when some may be at the IP address. +\end_layout + +\begin_layout Standard +A method for doing so would involve a handshake process when new clients + connect to the +\noun on +KinectServer +\noun default +. + The proposed handshake would be initiated by the client when connecting + to the server, at this point they include which source they should be grouped + with using an integer ID. + The server groups the socket as such and, if one has not been received, + responds with a random identifier string that should be used across all + sockets to identify the client. + Should the newly connected socket be for a client that is already connected + then the client will respond with it's existing identifier to inform the + server that this ID has been ignored. + In doing so the client now has a method of identifying itself agnostic + of socket, and the server has a way of identifying the source which each + socket is representing. +\end_layout + +\begin_layout Subsection +Deliverables and Additional Goals +\end_layout + +\begin_layout Standard +At this point in the project it is worth considering the viability of the + final deliverables with relation to the time remaining. + Based on the work completed so far the original objectives of multi-source + holoportation remain viable with a round of complete testing undertaken. +\end_layout + +\begin_layout Standard +This testing suite is yet to be defined but will comprise performance evaluation + for both the network and display aspects of the software. +\end_layout + +\begin_layout Standard +Should the original specification be delivered and evaluated with time remaining +, additional goals and investigations should be examined. + Initially, aspects already completed should be investigated for further + refinement, namely the control scheme as mentioned above. + +\end_layout + +\begin_layout Standard +When considering the design principle of network and presentation separation + in combination with the relevance of the technology to the spaces of AR + and VR, an interesting analysis could be made into the applicability of + multi-source network developments to additional display methods. + Mobile AR and +\noun on +Hololens +\noun default + display for +\noun on +LiveScan +\noun default + have both been demonstrated and either could prove interesting when considered + in a multi-source context. +\end_layout + +\begin_layout Section +Results +\end_layout + +\begin_layout Section +Summary +\end_layout + +\begin_layout Standard +Within this piece the process of extending the +\noun on +LiveScan3D +\noun default + software to include multi-source holoportation has been introduced. + The use of such a system has many applications from those inherited from + traditional 2D video such as conference calls to new utilisations that + are wholly unique to the environment. +\end_layout + +\begin_layout Standard +The literature review contextualises the +\noun on +LiveScan +\noun default + suite and the wider spaces of AR, VR, 3D video and multi-source holoportation + itself. + Previous examples of holoportation are described and their aims of achieving + telepresence are discussed. +\end_layout + +\begin_layout Standard +The current state of the project is laid out showing good progress through + the required areas of development. + Of these areas of concern, the display element has been extended in order + to allow the rendering of multiple environments simultaneously with a dynamic + sub-system of geometric transformations. + The transformations are responsive to user input allowing arbitrary placement + and orientation of individual sources within the display space. + While this control interface allows free movement in the most naturally + traversed axes it could use some additional tuning to make it feel more + intuitive. +\end_layout + +\begin_layout Standard +The next steps for the project leading up to its completion are presented, + the initial and current plans for the remaining work is described and additiona +l stretch goals are defined for any additional time. + How the work will be presented in a final report is also described. +\end_layout + +\begin_layout Section +Conclusions +\end_layout + +\begin_layout Standard +Holoportation and multi-source configurations thereof are important technologies + for cross reality experiences with broad appeal to many applications. + The use of consumer hardware, specifically the +\noun on +Kinect +\noun default +, have accelerated the space. +\end_layout + +\begin_layout Standard +At roughly halfway through the time allowed for this project the native + display has successfully been extended to meet the deliverable specification. + This has resulted in the +\noun on +OpenGL +\noun default + window being capable of simultaneously rendering multiple sources with + arbitrary placement and orientation within the display space. +\end_layout + +\begin_layout Standard +From this point the network layer of the suite will be developed to also + match the specification, allowing connected clients to be grouped into + sources for polling and processing. +\end_layout + +\begin_layout Standard +Following the development of the two, testing methodologies will be defined + and carried out to gather quantitative results for the final product. + A final report on the results will be available in May 2020. +\end_layout + +\begin_layout Standard +\begin_inset Newpage newpage +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset bibtex +LatexCommand bibtex +btprint "btPrintCited" +bibfiles "/home/andy/uni/dissertation/references" +options "bibtotoc" + +\end_inset + + +\end_layout + +\begin_layout Section +\start_of_appendix +Existing Data Structures +\begin_inset CommandInset label +LatexCommand label +name "sec:Existing-Data-Structures" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../snippets/point2f.cs" +lstparams "language={[Sharp]C},caption={Cartesian coordinate in 2 dimensions}" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../snippets/point3f.cs" +lstparams "language={[Sharp]C},caption={Cartesian coordinate in 3 dimensions}" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../snippets/affinetransform.cs" +lstparams "language={[Sharp]C},caption={Affine transformation matrix with translation}" + +\end_inset + + +\end_layout + +\begin_layout Section +New Data Structures +\end_layout + +\begin_layout Subsection +Frame +\begin_inset CommandInset label +LatexCommand label +name "subsec:Frame" + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand lstinputlisting +filename "../snippets/frame.cs" +lstparams "language={[Sharp]C},caption={Point cloud with Client ID}" + +\end_inset + + +\end_layout + +\end_body +\end_document diff --git a/midyear report/midyear.lyx b/midyear report/midyear.lyx index d4b1a14..44065ca 100644 --- a/midyear report/midyear.lyx +++ b/midyear report/midyear.lyx @@ -2839,7 +2839,7 @@ Following the development of the two, testing methodologies will be defined \begin_inset CommandInset bibtex LatexCommand bibtex btprint "btPrintCited" -bibfiles "references" +bibfiles "/home/andy/uni/dissertation/references" options "bibtotoc" \end_inset diff --git a/midyear report/references.bib b/references.bib similarity index 100% rename from midyear report/references.bib rename to references.bib