diff --git a/lpss.m b/lpss.m index f02e09f..f295780 100644 --- a/lpss.m +++ b/lpss.m @@ -5,16 +5,16 @@ close all;clear all;clc; SEGMENT_LENGTH = 100; % ms -SEGMENT_OFFSET = 0; % ms from start +SEGMENT_OFFSET = 20; % ms from start -LPC_ORDER = 20; +LPC_ORDER = 25; AC_DISP_SAMPLES = 1000; % autocorrelation display samples WINDOW_NUMBER = 10; % number of windows for spectrogram -WINDOW_OVERLAP = 5; % ms -SYNTH_WINDOW_NUMBER = 100; % number of windows for spectrogram -SYNTH_WINDOW_OVERLAP = 10; % ms +WINDOW_OVERLAP = 10; % ms +SYNTH_WINDOW_NUMBER = 60; % number of windows for spectrogram +SYNTH_WINDOW_OVERLAP = 20; % ms -PREEMPHASIS_COEFFS = [1 -0.8]; % first order zero coeff for pre-emphasis +PREEMPHASIS_COEFFS = [1 -0.9]; % first order zero coeff for pre-emphasis F0 = 60; % low-pitched male speech % F0 = 600; % children @@ -36,8 +36,9 @@ ORIG_LPC_T_COMPARE = false; ORIG_SPECTROGRAM = true; SYNTH_SPECTROGRAM = true; -SYNTHESISED_SOUND_LENGTH = 500; % ms +SYNTHESISED_SOUND_LENGTH = 1000; % ms +WRITE = false; PLAY = false; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -128,7 +129,7 @@ if PREEMPHASIS prefilter_plot = plot(prefilter_freqs, 20*log10(abs(prefilter_vals)), 'g'); prefilter_plot.Color(4) = 0.8; - prefilter_plot.LineWidth = 1; + prefilter_plot.LineWidth = 1.5; end %% PLOT @@ -191,7 +192,7 @@ maxima_plot.MarkerSize = 8; maxima_plot.LineWidth = 1.5; grid -xlabel('Quefrency') +xlabel('Quefrency (samples)') ylabel('ceps(x[n])') xlim([0 L / 2]) title('Speech Signal Cepstrum') @@ -202,7 +203,7 @@ end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % CEPSTRUM if CEPSTRUM_PLOT && length(cep_maxima_times) >= 1 - pitch_period = cep_maxima_times(c == max(c)); + pitch_period = cep_maxima_times(c == max(c)) fundamental_freq = 1 / (pitch_period / Fs) else disp('pitch periods not identified') @@ -216,7 +217,9 @@ if exist('fundamental_freq') synth_sound = filter(1, a, excitation); + if WRITE audiowrite('out.wav', synth_sound, Fs); + end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% diff --git a/lpss_lpc.m b/lpss_lpc.m index 4442ee4..e44e3ec 100644 --- a/lpss_lpc.m +++ b/lpss_lpc.m @@ -15,8 +15,12 @@ DISPLAY_SAMPLES = min([DISPLAY_SAMPLES L]); % CALCULATE FFT [freq_dom_freqs, freq_dom_vals] = fft_(y, Fs); +index = 1; + for ITER=1:5:ORDER + subplot(4, 2, index); + % LPC a = lpc(y,ITER); % signal, filter order @@ -24,12 +28,56 @@ for ITER=1:5:ORDER [h, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs); figure(1) - plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'r', filter_freqs, 20*log10(abs(h)), 'b') + %% SIGNAL FFT RESPONSE + orig_freq_plot = plot(freq_dom_freqs, 20*log10(abs(freq_dom_vals)), 'black'); + orig_freq_plot.Color(4) = 0.1; + orig_freq_plot.LineWidth = 1; + hold on + + %% LPC FILTER RESPONSE + [filter_vals, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs); + filter_vals_db = 20*log10(abs(filter_vals)); + + lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b'); + lpc_freq_plot.LineWidth = 2; + + %% ARYULE FILTER RESPONSE + ary = aryule(y, ITER); + [filter_vals, filter_freqs] = freqz(1, ary, length(freq_dom_freqs), Fs); + filter_vals_db = 20*log10(abs(filter_vals)); + + lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'r'); + lpc_freq_plot.LineWidth = 1; + + %% ARCOV FILTER RESPONSE + arc = arcov(y, ITER); + [filter_vals, filter_freqs] = freqz(1, arc, length(freq_dom_freqs), Fs); + filter_vals_db = 20*log10(abs(filter_vals)); + + lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'g'); + lpc_freq_plot.LineWidth = 1; + +% % MAXIMA +% % estimate formant frequencies from maxima of LPC filter freq response +% maxima = islocalmax(filter_vals_db); +% maxima_freqs = filter_freqs(maxima) +% maxima_db = filter_vals_db(maxima); +% +% if length(maxima_freqs) ~= 0 +% maxima_plot = plot(maxima_freqs, maxima_db, 'rx'); +% maxima_plot.MarkerSize = 12; +% maxima_plot.LineWidth = 2; +% end + + % plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'r', filter_freqs, 20*log10(abs(h)), 'b') % plot(w/pi, 20*log10(abs(h))) + hold off grid xlabel('Frequency (Hz)') ylabel('Magnitude (dB)') - legend('Original Signal', 'LPC Filter') +% legend('Original Signal', 'LPC Filter', 'Local Maxima') + legend('Original Signal', 'LPC Filter', 'Aryule LPC Filter', 'Arcov LPC Filter') + title(strcat(['LPC Spectra: Order ' num2str(ITER)])); % COMPARE TWO SIGNALS TIME DOMAIN % est_y = filter(0.02, a, y); @@ -43,7 +91,6 @@ for ITER=1:5:ORDER % ylabel('Amplitude') % legend('Original signal','LPC estimate') - - pause(0.5) + index = index + 1; end \ No newline at end of file diff --git a/report/references.bib b/report/references.bib index e69de29..8a31229 100644 --- a/report/references.bib +++ b/report/references.bib @@ -0,0 +1,12 @@ +@article{formant-frequencies, + author = {Scherer, Stefan and Lucas, Gale and Gratch, Jonathan and Rizzo, Albert and Morency, Louis-Philippe}, + doi = {10.1109/TAFFC.2015.2440264}, + journal = {IEEE Transactions on Affective Computing}, + month = {01}, + pages = {1--1}, + title = {Self-Reported Symptoms of Depression and PTSD Are Associated with Reduced Vowel Space in Screening Interviews}, + url = {https://www.researchgate.net/publication/279164505_Self-Reported_Symptoms_of_Depression_and_PTSD_Are_Associated_with_Reduced_Vowel_Space_in_Screening_Interviews}, + volume = {7}, + year = {2015} +} + diff --git a/report/report.lyx b/report/report.lyx index 189720b..8845823 100644 --- a/report/report.lyx +++ b/report/report.lyx @@ -19,7 +19,6 @@ customHeadersFooters minimalistic todonotes -figs-within-sections \end_modules \maintain_unincluded_children false \language english @@ -207,6 +206,17 @@ LatexCommand tableofcontents \end_inset +\end_layout + +\begin_layout Standard +\begin_inset FloatList table + +\end_inset + + +\end_layout + +\begin_layout Standard \begin_inset CommandInset toc LatexCommand lstlistoflistings @@ -231,7 +241,7 @@ November 2020 \end_layout \begin_layout Left Header -EEEM030 Coursework 1 +EEEM030 Coursework \end_layout \begin_layout Standard @@ -261,6 +271,12 @@ setcounter{page}{1} Introduction \end_layout +\begin_layout Standard +The ability to process and analyse speech signals has facilitated developments + throughout their use in the digital space with applications from data compressi +on to speech recognition. +\end_layout + \begin_layout Section Brief \end_layout @@ -274,8 +290,8 @@ s can be used to analyse, model and synthesise speech. \begin_layout Standard The modelling stage will utilise Linear Predictive Coding and the source-filter - model of speech to construct a filter that acts similarly to the vocal - tract's effect on sound produced by the vocal chords. + model of speech to construct an all-pole filter that acts similarly to + the vocal tract's effect on sound produced by the vocal chords. Comparisons of the frequency response for both the estimated filter and the original sound will be presented, the effect of different filter orders will also be demonstrated. @@ -303,17 +319,51 @@ Matlab \noun default with aid from functions in the digital signal processing toolbox among others. - Following loading a vowel sample, a segment of changing length (100ms was - standard) was clipped for processing. + Following loading a vowel sample, a segment of given length (100ms was + typical) was clipped for processing. The clip optionally also underwent pre-emphasis using a high pass filter. As speech spectra can tend to have higher energy at lower frequencies, the use of pre-emphasis can balance the magnitude across the spectrum. A first order filter was used and the coefficient varied, over-use could prove excessive for higher frequencies including fricative sounds. + The majority of the investigations were conducted on two samples, +\begin_inset listings +lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4" +inline true +status open + +\begin_layout Plain Layout + +hood_m.wav +\end_layout + +\end_inset + + and +\begin_inset listings +lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4" +inline true +status open + +\begin_layout Plain Layout + +head_f.wav +\end_layout + +\end_inset + +, any results from other samples are identified as such. \end_layout \begin_layout Subsection Modelling +\begin_inset CommandInset label +LatexCommand label +name "subsec:Modelling" + +\end_inset + + \end_layout \begin_layout Standard @@ -352,14 +402,16 @@ freqz(b, a, n, f) This frequency plot of the LPC filter constitutes the spectral envelope of the signal and the vowel formant frequencies can be found at the maxima of the spectrum. - Due to the smooth profile of the LPC spectrum, formant frequencies were - estimated by identifying the local maxima of the function. + The smooth profile of the LPC spectrum allowed the formant frequencies + to be estimated by identifying the local maxima of the function. \end_layout \begin_layout Standard In order to find the fundamental frequency of the signal, the cepstrum was used. - The use of a low pass filter was investigated in order to smooth the cepstrum + Regular periodic frequencies in the time domain present as a peak in the + quefrency domain, this can also be achieved with an auto-corelation function. + The use of a low-pass filter was investigated in order to smooth the cepstrum before programmatically finding pitch period candidates by applying \begin_inset Formula $x$ \end_inset @@ -369,6 +421,32 @@ In order to find the fundamental frequency of the signal, the cepstrum was \end_inset thresholds. + Firstly, local maxima of the cepstrum function were found using the +\begin_inset listings +lstparams "language=Matlab,basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +islocalmax(x) +\end_layout + +\end_inset + + function. + A minimum quefrency threshold of 20 was applied to ignore the transient-like + oscillations at small +\begin_inset Formula $x$ +\end_inset + + values. + Lowering the quefrency corresponds to an increase in frequency, thus it + is reasonable to discard these values when 20 samples represents 1200Hz + sampled at 24kHz, a frequency higher than that of the fundamental frequency + being investigated. + Additionally a minimum cepstrum threshold of 0.075 was used, from here the + maximum value was used as the pitch period. \end_layout \begin_layout Subsection @@ -378,7 +456,47 @@ Synthesis \begin_layout Standard In order to synthesise speech, a periodic impulse train at the identified fundamental frequency of the original vowel was generated. - The impulse train was sampled at the same frequency as the original sound. + As the fundamental frequency of speech is far lower than a typical audio + signal would be sampled at, a carrier signal of the same sampling frequency + as the original sound was modulated by the lower frequency impulse train, + see listing +\begin_inset CommandInset ref +LatexCommand ref +reference "get_impulse_train_function" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + In order to produce the final synthesised speech, the generated impulse + train must be convolved (in the time domain) with the transfer function + of the LPC filter representing the vocal tract. + In +\noun on +Matlab +\noun default + this can be completed with the +\begin_inset listings +lstparams "language=Matlab,basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +filter(b, a, x) +\end_layout + +\end_inset + + which takes the provided coefficients ( +\begin_inset Formula $a,b$ +\end_inset + +) and applies the transfer function these describe. + This final signal was written to disk and played for comparison to the + original. \end_layout \begin_layout Section @@ -389,34 +507,2366 @@ Results LPC Filter \end_layout +\begin_layout Standard +LPC filter coefficients of varying orders were calculated, the values for + each vowel sample at order 20 can be seen in table +\begin_inset CommandInset ref +LatexCommand ref +reference "tab:Order-20-LPC-Coeffs" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + The frequency response for the filters these coefficients represent can + be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:stacked-spectra" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +, as described in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Modelling" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + the local maxima of the filter response were also plotted as red crosses. +\end_layout + +\begin_layout Standard +\begin_inset Float table +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center + +\size small +\begin_inset Tabular + + + + + + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +5 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +6 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +7 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +8 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +9 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +10 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +11 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +head_f +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-1.8275 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.6130 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.7424 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.2264 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-1.0744 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.8921 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.4595 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.8184 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.3913 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +1.2207 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +hood_m +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +1 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-1.9166 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.9014 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.1898 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.5570 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.9309 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.4874 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +1.0068 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.0966 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.4469 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.1029 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\size small +\begin_inset VSpace medskip +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\noindent +\align center + +\size small +\begin_inset Tabular + + + + + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +12 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +13 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +14 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +15 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +16 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +17 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +18 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +19 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +20 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +21 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +head_f +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.3812 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.5842 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.2820 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.7351 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.8951 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.1172 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.4359 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.1220 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.3546 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.1977 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +hood_m +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.6152 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.7490 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.1002 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.3020 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.1184 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.0494 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.6293 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.3474 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +-0.2172 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\size small +0.2164 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Order 20 LPC coefficients for both investigated samples +\begin_inset CommandInset label +LatexCommand label +name "tab:Order-20-LPC-Coeffs" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/head_f_spect_25.png + lyxscale 10 + width 90col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + +, order 25 +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/hood_m_spect_25.png + lyxscale 10 + width 90col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + +, order 25 +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +LPC filter and vowel segment spectra for both investigated samples +\begin_inset CommandInset label +LatexCommand label +name "fig:stacked-spectra" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +As the spectra are plotted with the same frequency bounds, the peaks of + the filter response corresponding to estimations of the formant frequencies + can be compared between the male and females voice. + In general the male's formant frequencies are lower than for the female's + sample, this can be seen specifically with the first few peaks. + It's worth noting that the first local maxima identified in the +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + sample does not appear to have identified a peak that would be considered + a formant. +\end_layout + \begin_layout Subsubsection Order Variation \end_layout +\begin_layout Standard +The effect of increasing the order of the LPC filter can be seen in figure + +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Spectrum-Tile" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +, where the order of the +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + filter is repeatably incremented by 5. + In general, as the order of the filter is increased, the spectral response + of the LPC filter closer fits the spectrum of the original vowel segment. + At lower orders, the filter's response can smooth over multiple peaks and + valleys in the original signal as can be seen at order 6, whereas by order + 36 the LPC spectrum follows all of the major motions of the speech signal. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/hood_m_lpc_tile.png + lyxscale 20 + width 100col% + +\end_inset + + +\begin_inset Caption Standard + +\begin_layout Plain Layout +Effect of increasing LPC filter order on the +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + sample +\begin_inset CommandInset label +LatexCommand label +name "fig:Spectrum-Tile" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + \begin_layout Subsection Spectral Analysis \end_layout \begin_layout Subsubsection -Fundamental Frequency +Formant Frequencies \end_layout -\begin_layout Subsubsection -Formant Frequencies +\begin_layout Standard +As described previously, the smooth profile of the LPC filter spectra makes + the use of the local maxima of this curve reasonable estimations as to + the peaks. + The first three formants for the order 25 filters seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:stacked-spectra" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + can be seen in table +\begin_inset CommandInset ref +LatexCommand ref +reference "tab:formant-frequencies" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +, as described above the first local maxima for the female +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + sample was not included as +\begin_inset Formula $f_{1}$ +\end_inset + + as it did not refer to a peak in the way that would indicate a formant. +\end_layout + +\begin_layout Standard +\begin_inset Float table +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +719.4 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +369.7 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2,218.2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1,578.7 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{3}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +3,197.3 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2,278.1 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}-f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1,498.8 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1,209 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +First formant frequencies at order 25, Hz +\begin_inset CommandInset label +LatexCommand label +name "tab:formant-frequencies" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +Table +\begin_inset CommandInset ref +LatexCommand ref +reference "tab:IPA-vowels" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + presents average formant frequencies for the investigated vowel sounds + as displayed in +\begin_inset CommandInset citation +LatexCommand cite +key "formant-frequencies" +literal "false" + +\end_inset + +. + The percentage difference between these averages and the calculated estimations + are also presented. + The female sample was closer to the averages than the male sample. +\end_layout + +\begin_layout Standard +\begin_inset Float table +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Tabular + + + + + + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Average Frequency +\begin_inset CommandInset citation +LatexCommand cite +key "formant-frequencies" +literal "false" + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Measured % Difference +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Sample +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +Vowel +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}-f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}$ +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset Formula $f_{2}-f_{1}$ +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +/ɛ/ +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +731 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +2,058 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1,327 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1.6 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +7.8 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +12.9 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +/ʊ/ +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +469 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +1,122 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +653 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +21.2 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +40.7 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +85.1 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Relevant IPA vowels and their average formant frequencies +\begin_inset CommandInset citation +LatexCommand cite +key "formant-frequencies" +literal "false" + +\end_inset + + +\begin_inset CommandInset label +LatexCommand label +name "tab:IPA-vowels" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + \end_layout \begin_layout Subsubsection Cepstrum Smoothing \end_layout +\begin_layout Standard +The effect of smoothing the cepstrum with a low-pass filter is presented + in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:smoothed-cepstrum" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + When employing smoothing, the peak corresponding to the pitch period has + been amplified compared to the unsmoothed curve where the pitch period + does not reach far beyond the noise of the rest of the function. + Following this, smoothing was employed when identifying the fundamental + frequency. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/head_f_rcep_~smooth.png + lyxscale 10 + width 50col% + +\end_inset + + +\begin_inset Graphics + filename ../resources/head_f_rcep_smooth.png + lyxscale 10 + width 50col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Real cepstrum for +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + with and without low-pass filtering, thresholded local maxima crossed +\begin_inset CommandInset label +LatexCommand label +name "fig:smoothed-cepstrum" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Fundamental Frequency +\end_layout + +\begin_layout Standard +The fundamental frequency was calculated by identifying the pitch period + in the real cepstrum. + The cepstrums for either sample were thresholded and the candidates can + be seen in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:cepstrums-w-pitch-period" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + The identified pitch period, +\begin_inset Formula $t_{p}$ +\end_inset + +, and the corresponding fundamental frequency, +\begin_inset Formula $f_{f}$ +\end_inset + +, can be seen in table +\begin_inset CommandInset ref +LatexCommand ref +reference "tab:fund-freq" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + +\begin_inset Formula $f_{f}$ +\end_inset + + was calculated using the following where +\begin_inset Formula $f_{s}$ +\end_inset + + is the sample frequency, +\end_layout + +\begin_layout Standard +\begin_inset Formula +\[ +f_{f}=\frac{1}{\nicefrac{t_{p}}{f_{s}}} +\] + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/head_f_rcep_smooth_X.png + lyxscale 10 + width 50col% + +\end_inset + + +\begin_inset Graphics + filename ../resources/hood_m_rcep_smooth_X.png + lyxscale 10 + width 50col% + +\end_inset + + +\begin_inset Caption Standard + +\begin_layout Plain Layout +Real cepstrums with candidate pitch periods highlighted +\begin_inset CommandInset label +LatexCommand label +name "fig:cepstrums-w-pitch-period" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset Float table +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Tabular + + + + + + + +\begin_inset Text + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +head_f +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Pitch Period, samples +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +105 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +255 +\end_layout + +\end_inset + + + + +\begin_inset Text + +\begin_layout Plain Layout +Fundamental Frequency, Hz +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +228.57 +\end_layout + +\end_inset + + +\begin_inset Text + +\begin_layout Plain Layout +94.12 +\end_layout + +\end_inset + + + + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +Pitch period and fundamental frequency as calculated from the real cepstrum +\begin_inset CommandInset label +LatexCommand label +name "tab:fund-freq" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Subsubsection +Pre-emphasis +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/hood_m_spect_25_premph_0.9.png + lyxscale 20 + width 80col% + +\end_inset + + +\end_layout + +\begin_layout Plain Layout +\begin_inset Caption Standard + +\begin_layout Plain Layout +LPC spectra for +\begin_inset listings +lstparams "basicstyle={\ttfamily}" +inline true +status open + +\begin_layout Plain Layout + +hood_m +\end_layout + +\end_inset + + following pre-emphasis using coefficients, [1 -0.9] +\begin_inset CommandInset label +LatexCommand label +name "fig:pre-emph-spectrum" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Plain Layout + +\end_layout + +\end_inset + + +\end_layout + \begin_layout Subsection Synthesis \end_layout +\begin_layout Standard +Following the convolution of the impulse train and the LPC filter, the synthesis +ed sound and the original can be seen presented in figure +\begin_inset CommandInset ref +LatexCommand ref +reference "fig:Spectrograms-synth" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +. + The circled areas highlight similar portions, the formant frequencies can + be seen in both. + Despite being quasi-stationary, variation can be seen in time for the original + signal. + The stationary synthesised signal, however, has a flat profile in time. +\end_layout + +\begin_layout Standard +\begin_inset Float figure +wide false +sideways false +status open + +\begin_layout Plain Layout +\noindent +\align center +\begin_inset Graphics + filename ../resources/hood_m_gram.png + lyxscale 10 + width 50col% + +\end_inset + + +\begin_inset Graphics + filename ../resources/hood_m_gram_synth.png + lyxscale 10 + width 50col% + +\end_inset + + +\begin_inset Caption Standard + +\begin_layout Plain Layout +Spectrograms for the original and synthesised vowel segment, areas of comparison + highlighted +\begin_inset CommandInset label +LatexCommand label +name "fig:Spectrograms-synth" + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + +\end_inset + + +\end_layout + \begin_layout Section Discussion \end_layout +\begin_layout Standard +\begin_inset Flex TODO Note (inline) +status open + +\begin_layout Plain Layout +do numbers on compression +\end_layout + +\end_inset + + +\end_layout + \begin_layout Section Conclusion \end_layout @@ -478,6 +2928,10 @@ lpss.m . \end_layout +\begin_layout Standard +Additional helper functions were written to plot and manipulate data. +\end_layout + \begin_layout Standard \begin_inset CommandInset include LatexCommand lstinputlisting diff --git a/resources/head_f_rcep_smooth.png b/resources/head_f_rcep_smooth.png new file mode 100644 index 0000000..95d98b2 Binary files /dev/null and b/resources/head_f_rcep_smooth.png differ diff --git a/resources/head_f_rcep_smooth_X.png b/resources/head_f_rcep_smooth_X.png new file mode 100644 index 0000000..6abd528 Binary files /dev/null and b/resources/head_f_rcep_smooth_X.png differ diff --git a/resources/head_f_rcep_~smooth.png b/resources/head_f_rcep_~smooth.png new file mode 100644 index 0000000..58db9fa Binary files /dev/null and b/resources/head_f_rcep_~smooth.png differ diff --git a/resources/head_f_spect_25.png b/resources/head_f_spect_25.png new file mode 100644 index 0000000..38ecdee Binary files /dev/null and b/resources/head_f_spect_25.png differ diff --git a/resources/hood_m_ccep_~smooth.png b/resources/hood_m_ccep_~smooth.png new file mode 100644 index 0000000..363682d Binary files /dev/null and b/resources/hood_m_ccep_~smooth.png differ diff --git a/resources/hood_m_gram.png b/resources/hood_m_gram.png new file mode 100644 index 0000000..a44291c Binary files /dev/null and b/resources/hood_m_gram.png differ diff --git a/resources/hood_m_gram_synth.png b/resources/hood_m_gram_synth.png new file mode 100644 index 0000000..e231d73 Binary files /dev/null and b/resources/hood_m_gram_synth.png differ diff --git a/resources/hood_m_lpc_tile.png b/resources/hood_m_lpc_tile.png new file mode 100644 index 0000000..971f8b2 Binary files /dev/null and b/resources/hood_m_lpc_tile.png differ diff --git a/resources/hood_m_rcep_smooth_X.png b/resources/hood_m_rcep_smooth_X.png new file mode 100644 index 0000000..9a3f076 Binary files /dev/null and b/resources/hood_m_rcep_smooth_X.png differ diff --git a/resources/hood_m_rcep_~smooth.png b/resources/hood_m_rcep_~smooth.png new file mode 100644 index 0000000..a3d159d Binary files /dev/null and b/resources/hood_m_rcep_~smooth.png differ diff --git a/resources/hood_m_spect_25.png b/resources/hood_m_spect_25.png new file mode 100644 index 0000000..4e45e1e Binary files /dev/null and b/resources/hood_m_spect_25.png differ diff --git a/resources/hood_m_spect_25_premph_0.9.png b/resources/hood_m_spect_25_premph_0.9.png new file mode 100644 index 0000000..23b120d Binary files /dev/null and b/resources/hood_m_spect_25_premph_0.9.png differ