linear-predictive-speech-synth/report/report.lyx
2020-11-06 19:08:42 +00:00

564 lines
13 KiB
Plaintext

#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\begin_preamble
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
\let\endchangemargin=\endlist
\pagenumbering{roman}
\usepackage{color}
\definecolor{commentgreen}{RGB}{0,94,11}
\end_preamble
\use_default_options true
\begin_modules
customHeadersFooters
minimalistic
todonotes
figs-within-sections
\end_modules
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype true
\use_dash_ligatures true
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command biber
\index_command default
\paperfontsize 11
\spacing onehalf
\use_hyperref true
\pdf_title "Linear Predictive Speech Synthesizer"
\pdf_author "Andy Pack"
\pdf_subject "EEEM030 Speech & Audio Processing & Recognition"
\pdf_keywords "EEEM030"
\pdf_bookmarks true
\pdf_bookmarksnumbered false
\pdf_bookmarksopen false
\pdf_bookmarksopenlevel 1
\pdf_breaklinks false
\pdf_pdfborder true
\pdf_colorlinks false
\pdf_backref false
\pdf_pdfusetitle true
\papersize default
\use_geometry true
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine biblatex
\cite_engine_type authoryear
\biblio_style plain
\biblio_options urldate=long
\biblatex_bibstyle ieee
\biblatex_citestyle ieee
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date true
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 1.8cm
\topmargin 2cm
\rightmargin 1.8cm
\bottommargin 2cm
\secnumdepth 3
\tocdepth 3
\paragraph_separation skip
\defskip medskip
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle fancy
\bullet 1 0 9 -1
\bullet 2 0 24 -1
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Title
\size giant
Linear Predictive Speech Synthesizer
\end_layout
\begin_layout Author
Andy Pack
\end_layout
\begin_layout Standard
\begin_inset VSpace 15pheight%
\end_inset
\end_layout
\begin_layout Standard
\align center
\begin_inset Graphics
filename surrey.png
lyxscale 15
width 40col%
\end_inset
\end_layout
\begin_layout Standard
\begin_inset VSpace vfill
\end_inset
\end_layout
\begin_layout Standard
\noindent
\align center
EEEM030
\begin_inset Newline newline
\end_inset
November 2020
\size large
\begin_inset Newline newline
\end_inset
Department of Electrical and Electronic Engineering
\begin_inset Newline newline
\end_inset
Faculty of Engineering and Physical Sciences
\begin_inset Newline newline
\end_inset
University of Surrey
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Abstract
Abstract
\end_layout
\begin_layout Standard
\begin_inset CommandInset toc
LatexCommand tableofcontents
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Standard
\begin_inset FloatList figure
\end_inset
\begin_inset CommandInset toc
LatexCommand lstlistoflistings
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Right Footer
Andy Pack / 6420013
\end_layout
\begin_layout Left Footer
November 2020
\end_layout
\begin_layout Left Header
EEEM030 Coursework 1
\end_layout
\begin_layout Standard
\begin_inset ERT
status open
\begin_layout Plain Layout
\backslash
pagenumbering{arabic}
\end_layout
\begin_layout Plain Layout
\backslash
setcounter{page}{1}
\end_layout
\end_inset
\end_layout
\begin_layout Section
Introduction
\end_layout
\begin_layout Section
Brief
\end_layout
\begin_layout Standard
The aim of this report is to demonstrate how digital signal processing technique
s can be used to analyse, model and synthesise speech.
The task will be considered as two areas of concern, that of modelling
and synthesis.
\end_layout
\begin_layout Standard
The modelling stage will utilise Linear Predictive Coding and the source-filter
model of speech to construct a filter that acts similarly to the vocal
tract's effect on sound produced by the vocal chords.
Comparisons of the frequency response for both the estimated filter and
the original sound will be presented, the effect of different filter orders
will also be demonstrated.
Relevant parameters of the original vowel speech segment will be presented
including the fundamental frequency and formant frequencies.
\end_layout
\begin_layout Standard
The synthesis stage will complete the source-filter model of speech by creating
a suitable periodic sound source to be modulated by the previous filter.
With a complete source-filter model, artificial vowel sounds will be synthesise
d and analysed.
Subjective assessments will be made as to the differences between the original
sound and the final product of the model when system parameters are varied.
\end_layout
\begin_layout Section
Implementation
\end_layout
\begin_layout Standard
The implementation of this system was completed using
\noun on
Matlab
\noun default
with aid from functions in the digital signal processing toolbox among
others.
Following loading a vowel sample, a segment of changing length (100ms was
standard) was clipped for processing.
The clip optionally also underwent pre-emphasis using a high pass filter.
As speech spectra can tend to have higher energy at lower frequencies,
the use of pre-emphasis can balance the magnitude across the spectrum.
A first order filter was used and the coefficient varied, over-use could
prove excessive for higher frequencies including fricative sounds.
\end_layout
\begin_layout Subsection
Modelling
\end_layout
\begin_layout Standard
In order to estimate the filter state of the vocal tract, the linear predictive
coding coefficients of varying orders were calculated using the
\begin_inset listings
lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4"
inline true
status open
\begin_layout Plain Layout
lpc(signal, order)
\end_layout
\end_inset
function.
In order to compare the frequency response of the LPC filter with the original
signal, the Fourier transform of the signal was calculated.
The frequency domain representation of the LPC filter was found using the
\begin_inset listings
lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4"
inline true
status open
\begin_layout Plain Layout
freqz(b, a, n, f)
\end_layout
\end_inset
function and co-plotted with the original signal.
This frequency plot of the LPC filter constitutes the spectral envelope
of the signal and the vowel formant frequencies can be found at the maxima
of the spectrum.
Due to the smooth profile of the LPC spectrum, formant frequencies were
estimated by identifying the local maxima of the function.
\end_layout
\begin_layout Standard
In order to find the fundamental frequency of the signal, the cepstrum was
used.
The use of a low pass filter was investigated in order to smooth the cepstrum
before programmatically finding pitch period candidates by applying
\begin_inset Formula $x$
\end_inset
and
\begin_inset Formula $y$
\end_inset
thresholds.
\end_layout
\begin_layout Subsection
Synthesis
\end_layout
\begin_layout Standard
In order to synthesise speech, a periodic impulse train at the identified
fundamental frequency of the original vowel was generated.
The impulse train was sampled at the same frequency as the original sound.
\end_layout
\begin_layout Section
Results
\end_layout
\begin_layout Subsection
LPC Filter
\end_layout
\begin_layout Subsubsection
Order Variation
\end_layout
\begin_layout Subsection
Spectral Analysis
\end_layout
\begin_layout Subsubsection
Fundamental Frequency
\end_layout
\begin_layout Subsubsection
Formant Frequencies
\end_layout
\begin_layout Subsubsection
Cepstrum Smoothing
\end_layout
\begin_layout Subsection
Synthesis
\end_layout
\begin_layout Section
Discussion
\end_layout
\begin_layout Section
Conclusion
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset label
LatexCommand label
name "sec:bibliography"
\end_inset
\begin_inset CommandInset bibtex
LatexCommand bibtex
btprint "btPrintCited"
bibfiles "references"
options "bibtotoc"
\end_inset
\end_layout
\begin_layout Section
\start_of_appendix
Source Code
\begin_inset CommandInset label
LatexCommand label
name "sec:Code"
\end_inset
\end_layout
\begin_layout Standard
While much of the code was developed in individual scripts in order to experimen
t with separate aspects of the system, for collecting results a script which
constitutes the entire system was written,
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open
\begin_layout Plain Layout
lpss.m
\end_layout
\end_inset
.
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../lpss.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples, rceps, cceps, ones, audioplayer, play, get_impulse_train, lpc},caption={Main script including source-filter model and spectral analysis},label={main_script}"
\end_inset
\begin_inset Newpage pagebreak
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/spectro.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Spectrogram plotting wrapper function},label={spectrogram_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/fft_.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Fast Fourier transform wrapper function},label={fft_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/autocorr.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Autocorrelation plotting wrapper function},label={autocorr_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/clip_segment.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/ms_to_samples.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={ms_to_samples_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/get_impulse_train.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples, repmat},caption={Generate an impulse rate of given fundamental frequency at a provided sampling frequency for a given length of time},label={get_impulse_train_function}"
\end_inset
\end_layout
\end_body
\end_document