wrote brief, added cepstrum, added formant freqs

2020-11-04 19:22:05 +00:00 · 2020-11-04 19:22:05 +00:00 · 438f25c7ca
commit 438f25c7ca
parent 5eb36e7af8
8 changed files with 316 additions and 56 deletions
--- a/func/autocorr.m
+++ b/func/autocorr.m
@ -0,0 +1,22 @@
+function [cep_autocorr, cep_lags] = autocorr(signal, max_lags, time, Fs)
+
+[cep_autocorr, cep_lags] = xcorr(signal, max_lags, 'coeff');
+% [cep_autocorr, cep_lags] = xcorr(signal, 'coeff');
+
+if time
+    cep_lags = 1000*cep_lags/Fs; % turn samples into ms
+end
+
+plot(cep_lags, cep_autocorr)
+grid
+if time
+    xlabel('Delay (ms)')
+else
+    xlabel('Delay (samples)')
+end
+ylabel('Normalized Autocorrelation')
+title('Autocorrelation')
+xlim([min(cep_lags) max(cep_lags)]);
+
+end
+
--- a/func/clip_segment.m
+++ b/func/clip_segment.m
@ -0,0 +1,16 @@
+function output = clip_segment(signal, Fs, seg_length, offset)
+
+signal_length_samples = length(signal);
+seg_length_samples = min(ms_to_samples(seg_length, Fs), signal_length_samples);
+offset_samples = max(ms_to_samples(offset, Fs), 1);
+
+seg_length_samples = min(seg_length_samples, signal_length_samples - 1);
+
+if signal_length_samples < seg_length_samples + offset_samples
+    offset_samples = signal_length_samples - seg_length_samples;
+end
+
+output = signal(offset_samples:offset_samples + seg_length_samples);
+
+end
+
--- a/func/fft_.m
+++ b/func/fft_.m
@ -5,7 +5,7 @@ L=length(signal);
 Y = fft(signal);
 P2 = abs(Y); % two-sided spectrum
 % P2 = abs(Y/L); % two-sided spectrum
-P1 = P2(1:L/2+1); % single-sided spectrum
+P1 = P2(1:floor(L/2+1)); % single-sided spectrum
 P1(2:end-1) = 2*P1(2:end-1);
 frequencies = sample_frequency*(0:(L/2))/L;
 values = P1;
--- a/func/ms_to_samples.m
+++ b/func/ms_to_samples.m
@ -0,0 +1,3 @@
+function samples = ms_to_samples(time_in, sample_freq)
+    samples = (time_in / 1000) * sample_freq;
+end
--- a/func/spectro.m
+++ b/func/spectro.m
@ -1,6 +1,6 @@
 function spectro(signal, sample_frequency, windows, overlap_interval)

-sample_overlap = (overlap_interval / 1000) * sample_frequency;
+sample_overlap = ms_to_samples(overlap_interval, sample_frequency);

 sample_size = size(signal);
 %window_size = round(sample_size(1) / ((windows + 1)/2))
--- a/lpss.m
+++ b/lpss.m
@ -4,80 +4,159 @@

 close all;clear all;clc;

-LPC_ORDER = 8;
-DISPLAY_SAMPLES = 1000;
+SEGMENT_LENGTH = 100; % ms
+SEGMENT_OFFSET = 0; % ms from start
+
+LPC_ORDER = 20;
+AC_DISP_SAMPLES = 1000; % autocorrelation display samples
 WINDOW_NUMBER = 10;
 WINDOW_OVERLAP = 5; % ms

-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% READ SIGNAL
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-[y, Fs] = audioread('samples/hood_m.wav');
-L = length(y) % number of samples
-DISPLAY_SAMPLES = min([DISPLAY_SAMPLES L]);
+F0 = 60; % low-pitched male speech
+% F0 = 600; % children
+
+% flags for selective running
+FREQ_RESPONSE = ~false;
+AUTOCORRELATION = false;
+CEPSTRUM_PLOT = false;
+CEPSTRUM_ONE_SIDED = true;
+ORIG_LPC_T_COMPARE = false;
+ORIG_SPECTROGRAM = false;
+PLAY = false;

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% LPC
+%% READ SIGNAL
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-a = aryule(y, LPC_ORDER) % signal, filter order
+[y, Fs] = audioread('samples/hood_m.wav');
+y = clip_segment(y, Fs, SEGMENT_LENGTH, SEGMENT_OFFSET);
+
+L = length(y) % number of samples
+
+max_lag = Fs/ F0;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% LPC
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+a = lpc(y, LPC_ORDER) % signal, filter order
 est_y = filter(0.02, a, y);

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% PREDICTION ERROR
+%% COMPARE ORIGINAL SIGNAL WITH LPC (T DOMAIN)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-e = y - est_y;
-[acs, lags] = xcorr(e,'coeff');
+if ORIG_LPC_T_COMPARE
+x = 1:AC_DISP_SAMPLES;
+AC_DISP_SAMPLES = min([AC_DISP_SAMPLES L]);
+
+figure(1)
+plot(x, y(end-AC_DISP_SAMPLES+1:end), x, est_y(end-AC_DISP_SAMPLES+1:end), '--')
+
+% plot(x, y(end-DISPLAY_SAMPLES+1:end))
+% plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
+
+grid
+xlabel('Sample Number')
+ylabel('Amplitude')
+legend('Original signal','LPC estimate')
+end

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% COMPARE TWO SIGNALS
+%% T DOMAIN PREDICTION ERROR
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% x = 1:DISPLAY_SAMPLES;
-% figure(1)
-% plot(x, y(end-DISPLAY_SAMPLES+1:end), x, est_y(end-DISPLAY_SAMPLES+1:end), '--')
+t_domain_err = y - est_y;

-%plot(x, y(end-DISPLAY_SAMPLES+1:end))
-%plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
-
-% grid
-% xlabel('Sample Number')
-% ylabel('Amplitude')
-% legend('Original signal','LPC estimate')
+if AUTOCORRELATION
+figure(2)
+[acs, lags] = autocorr(t_domain_err, max_lag, true, Fs);
+title('Autocorrelation for error in  Time domain')
+end

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% PLOT AUTOCORRELATION
+%% FREQUENCY RESPONSE
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% figure(2)
-% plot(lags, acs)
-% grid
-% xlabel('Lags')
-% ylabel('Normalized Autocorrelation')
-%ylim([-0.1 1.1])
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% PLOT FREQUENCY RESPONSE
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% CALCULATE FFT
-[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
- 
-% GET FILTER RESPONSE
-[h, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
-
+if FREQ_RESPONSE
 figure(3)
-plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'r', filter_freqs, 20*log10(abs(h)), 'b')
-% plot(w/pi, 20*log10(abs(h)))
+
+%% ORIGINAL FFT
+[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
+
+orig_freq_plot = plot(freq_dom_freqs, 20*log10(abs(freq_dom_vals)), 'black');
+orig_freq_plot.Color(4) = 0.25;
+orig_freq_plot.LineWidth = 1;
+hold on
+
+%% LPC FILTER RESPONSE
+[filter_vals, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
+filter_vals_db = 20*log10(abs(filter_vals));
+
+lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b');
+lpc_freq_plot.LineWidth = 2;
+
+% MAXIMA
+maxima = islocalmax(filter_vals_db);
+maxima_freqs = filter_freqs(maxima)
+maxima_db = filter_vals_db(maxima)
+
+maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
+maxima_plot.MarkerSize = 12;
+maxima_plot.LineWidth = 2;
+
+%% PLOT
+hold off
 grid
 xlabel('Frequency (Hz)')
 ylabel('Magnitude (dB)')
-legend('Original Signal', 'LPC Filter')
+legend('Original Signal', 'LPC Filter', 'LPC Maxima')
+title('Frequency Response For Speech Signal and LPC Filter')
+end

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% PLOT ORIGINAL SPECTROGRAM
+%% CEPSTRUM
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% figure(4)
-% spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
-% colormap bone
+cep = rceps(y);
+% cep = cceps(y);
+
+if CEPSTRUM_PLOT
+ceps_t = (0:L - 1);
+
+figure(4)
+if CEPSTRUM_ONE_SIDED
+    plot(ceps_t(1:L / 2), cep(1:L / 2))
+else
+    plot(ceps_t(1:L), cep(1:L))
+end
+
+grid
+xlabel('Quefrency')
+ylabel('ceps(x[n])')
+if CEPSTRUM_ONE_SIDED
+    xlim([0 L / 2])
+    title('One-sided Speech Signal Cepstrum')
+else
+    xlim([0 L])
+    title('Speech Signal Cepstrum')
+end
+end
+
+%% AUTOCORRELATION
+if AUTOCORRELATION
+figure(5)
+[cep_autocorr, cep_lags] = autocorr(cep(1:L/2), max_lag, true, Fs);
+title('One-sided Cepstrum Autocorrelation')
+end

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% PLAY
+%% PLOT ORIGINAL SPECTROGRAM
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%sound(y, Fs);
+if ORIG_SPECTROGRAM
+figure(6)
+spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
+colormap bone
+title('Speech Signal Spectrogram')
+end
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% PLAY
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+if PLAY
+sound(y, Fs);
+end
--- a/lpss_cepstrum.m
+++ b/lpss_cepstrum.m
@ -4,11 +4,34 @@

 close all;clear all;clc;

+CEPSTRUM_COEFFS = 100;
+
 % READ SIGNAL
 [y, Fs] = audioread('samples/hood_m.wav');
+sample_length = length(y);
+half = sample_length / 2;

-t = (0:length(y) - 1);
+t = (0:sample_length - 1);
 c = rceps(y);
 % c = cceps(y);
-plot(t, c)
-xlabel('Quefrency')
+% plot(t(1:sample_length), c(1:sample_length))
+plot(t(1:half), c(1:half))
+
+xlabel('Quefrency')
+ylabel('ceps(x[n])')
+% xlim([0 sample_length])
+xlim([0 half])
+
+%% PLOT FFT
+
+c = cceps(y);
+c(CEPSTRUM_COEFFS:end) = 0;
+% [cep_freqs, cep_vals] = fft_(c, Fs);
+cep_vals = fft(c);
+cep_vals = cep_vals(1:floor(sample_length/2+1));
+cep_freqs = Fs*(0:(sample_length/2))/sample_length;
+
+figure(2)
+cep_plot = plot(cep_freqs, 20*log10(abs(cep_vals)), 'g');
+cep_plot.LineWidth = 2;
+hold off
--- a/report/report.lyx
+++ b/report/report.lyx
@ -9,6 +9,10 @@
 \def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
 \let\endchangemargin=\endlist 
 \pagenumbering{roman}
+
+\usepackage{color}
+
+\definecolor{commentgreen}{RGB}{0,94,11}
 \end_preamble
 \use_default_options true
 \begin_modules
@ -257,6 +261,53 @@ setcounter{page}{1}
 Introduction
 \end_layout

+\begin_layout Section
+Brief
+\end_layout
+
+\begin_layout Standard
+The aim of this report is to demonstrate how digital signal processing technique
+s can be used to analyse, model and synthesise speech.
+ The task will take will be considered as two areas of concern, that of
+ modelling and synthesis.
+\end_layout
+
+\begin_layout Standard
+The modelling stage will utilise Linear Predictive Coding and the source-filter
+ model of speech to construct a filter that acts similarly to the vocal
+ tract's effect on sound produced by the vocal chords.
+ Comparisons of the frequency response for both the estimated filter and
+ the original sound will be presented, the effect of different filter orders
+ will also be demonstrated.
+ Relevant parameters of the original vowel speech segment will be presented
+ including the fundamental frequency and further formant frequencies.
+\end_layout
+
+\begin_layout Standard
+The synthesis stage will complete the source-filter model of speech by creating
+ a suitable periodic sound source to be modulated by the previous filter.
+ With a complete source-filter model, artificial vowel sounds will be synthesise
+d and analysed.
+ Subjective assessments will be made as to the differences between the original
+ sound and the final product of the model when system parameters are varied.
+\end_layout
+
+\begin_layout Section
+Implementation
+\end_layout
+
+\begin_layout Section
+Results
+\end_layout
+
+\begin_layout Section
+Discussion
+\end_layout
+
+\begin_layout Section
+Conclusion
+\end_layout
+
 \begin_layout Standard
 \begin_inset Newpage newpage
 \end_inset
@ -285,7 +336,7 @@ options "bibtotoc"

 \begin_layout Section
 \start_of_appendix
-Code
+Source Code
 \begin_inset CommandInset label
 LatexCommand label
 name "sec:Code"
@ -293,6 +344,72 @@ name "sec:Code"
 \end_inset


+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../lpss.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples},caption={Main script},label={main_script}"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../func/spectro.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Spectrogram plotting wrapper function},label={spectrogram_function}"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../func/fft_.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Fast Fourier transform wrapper function},label={fft_function}"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../func/autocorr.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Autocorrelation plotting wrapper function},label={autocorr_function}"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../func/clip_segment.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset CommandInset include
+LatexCommand lstinputlisting
+filename "../func/ms_to_samples.m"
+lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={clip_segment_function-1}"
+
+\end_inset
+
+
 \end_layout

 \end_body