wrote brief, added cepstrum, added formant freqs

This commit is contained in:
aj 2020-11-04 19:22:05 +00:00
parent 5eb36e7af8
commit 438f25c7ca
8 changed files with 316 additions and 56 deletions

22
func/autocorr.m Normal file
View File

@ -0,0 +1,22 @@
function [cep_autocorr, cep_lags] = autocorr(signal, max_lags, time, Fs)
[cep_autocorr, cep_lags] = xcorr(signal, max_lags, 'coeff');
% [cep_autocorr, cep_lags] = xcorr(signal, 'coeff');
if time
cep_lags = 1000*cep_lags/Fs; % turn samples into ms
end
plot(cep_lags, cep_autocorr)
grid
if time
xlabel('Delay (ms)')
else
xlabel('Delay (samples)')
end
ylabel('Normalized Autocorrelation')
title('Autocorrelation')
xlim([min(cep_lags) max(cep_lags)]);
end

16
func/clip_segment.m Normal file
View File

@ -0,0 +1,16 @@
function output = clip_segment(signal, Fs, seg_length, offset)
signal_length_samples = length(signal);
seg_length_samples = min(ms_to_samples(seg_length, Fs), signal_length_samples);
offset_samples = max(ms_to_samples(offset, Fs), 1);
seg_length_samples = min(seg_length_samples, signal_length_samples - 1);
if signal_length_samples < seg_length_samples + offset_samples
offset_samples = signal_length_samples - seg_length_samples;
end
output = signal(offset_samples:offset_samples + seg_length_samples);
end

View File

@ -5,7 +5,7 @@ L=length(signal);
Y = fft(signal);
P2 = abs(Y); % two-sided spectrum
% P2 = abs(Y/L); % two-sided spectrum
P1 = P2(1:L/2+1); % single-sided spectrum
P1 = P2(1:floor(L/2+1)); % single-sided spectrum
P1(2:end-1) = 2*P1(2:end-1);
frequencies = sample_frequency*(0:(L/2))/L;
values = P1;

3
func/ms_to_samples.m Normal file
View File

@ -0,0 +1,3 @@
function samples = ms_to_samples(time_in, sample_freq)
samples = (time_in / 1000) * sample_freq;
end

View File

@ -1,6 +1,6 @@
function spectro(signal, sample_frequency, windows, overlap_interval)
sample_overlap = (overlap_interval / 1000) * sample_frequency;
sample_overlap = ms_to_samples(overlap_interval, sample_frequency);
sample_size = size(signal);
%window_size = round(sample_size(1) / ((windows + 1)/2))

179
lpss.m
View File

@ -4,80 +4,159 @@
close all;clear all;clc;
LPC_ORDER = 8;
DISPLAY_SAMPLES = 1000;
SEGMENT_LENGTH = 100; % ms
SEGMENT_OFFSET = 0; % ms from start
LPC_ORDER = 20;
AC_DISP_SAMPLES = 1000; % autocorrelation display samples
WINDOW_NUMBER = 10;
WINDOW_OVERLAP = 5; % ms
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% READ SIGNAL
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[y, Fs] = audioread('samples/hood_m.wav');
L = length(y) % number of samples
DISPLAY_SAMPLES = min([DISPLAY_SAMPLES L]);
F0 = 60; % low-pitched male speech
% F0 = 600; % children
% flags for selective running
FREQ_RESPONSE = ~false;
AUTOCORRELATION = false;
CEPSTRUM_PLOT = false;
CEPSTRUM_ONE_SIDED = true;
ORIG_LPC_T_COMPARE = false;
ORIG_SPECTROGRAM = false;
PLAY = false;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% LPC
%% READ SIGNAL
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a = aryule(y, LPC_ORDER) % signal, filter order
[y, Fs] = audioread('samples/hood_m.wav');
y = clip_segment(y, Fs, SEGMENT_LENGTH, SEGMENT_OFFSET);
L = length(y) % number of samples
max_lag = Fs/ F0;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% LPC
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a = lpc(y, LPC_ORDER) % signal, filter order
est_y = filter(0.02, a, y);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PREDICTION ERROR
%% COMPARE ORIGINAL SIGNAL WITH LPC (T DOMAIN)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
e = y - est_y;
[acs, lags] = xcorr(e,'coeff');
if ORIG_LPC_T_COMPARE
x = 1:AC_DISP_SAMPLES;
AC_DISP_SAMPLES = min([AC_DISP_SAMPLES L]);
figure(1)
plot(x, y(end-AC_DISP_SAMPLES+1:end), x, est_y(end-AC_DISP_SAMPLES+1:end), '--')
% plot(x, y(end-DISPLAY_SAMPLES+1:end))
% plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
grid
xlabel('Sample Number')
ylabel('Amplitude')
legend('Original signal','LPC estimate')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% COMPARE TWO SIGNALS
%% T DOMAIN PREDICTION ERROR
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% x = 1:DISPLAY_SAMPLES;
% figure(1)
% plot(x, y(end-DISPLAY_SAMPLES+1:end), x, est_y(end-DISPLAY_SAMPLES+1:end), '--')
t_domain_err = y - est_y;
%plot(x, y(end-DISPLAY_SAMPLES+1:end))
%plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
% grid
% xlabel('Sample Number')
% ylabel('Amplitude')
% legend('Original signal','LPC estimate')
if AUTOCORRELATION
figure(2)
[acs, lags] = autocorr(t_domain_err, max_lag, true, Fs);
title('Autocorrelation for error in Time domain')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PLOT AUTOCORRELATION
%% FREQUENCY RESPONSE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% figure(2)
% plot(lags, acs)
% grid
% xlabel('Lags')
% ylabel('Normalized Autocorrelation')
%ylim([-0.1 1.1])
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PLOT FREQUENCY RESPONSE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% CALCULATE FFT
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
% GET FILTER RESPONSE
[h, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
if FREQ_RESPONSE
figure(3)
plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'r', filter_freqs, 20*log10(abs(h)), 'b')
% plot(w/pi, 20*log10(abs(h)))
%% ORIGINAL FFT
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
orig_freq_plot = plot(freq_dom_freqs, 20*log10(abs(freq_dom_vals)), 'black');
orig_freq_plot.Color(4) = 0.25;
orig_freq_plot.LineWidth = 1;
hold on
%% LPC FILTER RESPONSE
[filter_vals, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
filter_vals_db = 20*log10(abs(filter_vals));
lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b');
lpc_freq_plot.LineWidth = 2;
% MAXIMA
maxima = islocalmax(filter_vals_db);
maxima_freqs = filter_freqs(maxima)
maxima_db = filter_vals_db(maxima)
maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
maxima_plot.MarkerSize = 12;
maxima_plot.LineWidth = 2;
%% PLOT
hold off
grid
xlabel('Frequency (Hz)')
ylabel('Magnitude (dB)')
legend('Original Signal', 'LPC Filter')
legend('Original Signal', 'LPC Filter', 'LPC Maxima')
title('Frequency Response For Speech Signal and LPC Filter')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PLOT ORIGINAL SPECTROGRAM
%% CEPSTRUM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% figure(4)
% spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
% colormap bone
cep = rceps(y);
% cep = cceps(y);
if CEPSTRUM_PLOT
ceps_t = (0:L - 1);
figure(4)
if CEPSTRUM_ONE_SIDED
plot(ceps_t(1:L / 2), cep(1:L / 2))
else
plot(ceps_t(1:L), cep(1:L))
end
grid
xlabel('Quefrency')
ylabel('ceps(x[n])')
if CEPSTRUM_ONE_SIDED
xlim([0 L / 2])
title('One-sided Speech Signal Cepstrum')
else
xlim([0 L])
title('Speech Signal Cepstrum')
end
end
%% AUTOCORRELATION
if AUTOCORRELATION
figure(5)
[cep_autocorr, cep_lags] = autocorr(cep(1:L/2), max_lag, true, Fs);
title('One-sided Cepstrum Autocorrelation')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% PLAY
%% PLOT ORIGINAL SPECTROGRAM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%sound(y, Fs);
if ORIG_SPECTROGRAM
figure(6)
spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
colormap bone
title('Speech Signal Spectrogram')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% PLAY
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if PLAY
sound(y, Fs);
end

View File

@ -4,11 +4,34 @@
close all;clear all;clc;
CEPSTRUM_COEFFS = 100;
% READ SIGNAL
[y, Fs] = audioread('samples/hood_m.wav');
sample_length = length(y);
half = sample_length / 2;
t = (0:length(y) - 1);
t = (0:sample_length - 1);
c = rceps(y);
% c = cceps(y);
plot(t, c)
xlabel('Quefrency')
% plot(t(1:sample_length), c(1:sample_length))
plot(t(1:half), c(1:half))
xlabel('Quefrency')
ylabel('ceps(x[n])')
% xlim([0 sample_length])
xlim([0 half])
%% PLOT FFT
c = cceps(y);
c(CEPSTRUM_COEFFS:end) = 0;
% [cep_freqs, cep_vals] = fft_(c, Fs);
cep_vals = fft(c);
cep_vals = cep_vals(1:floor(sample_length/2+1));
cep_freqs = Fs*(0:(sample_length/2))/sample_length;
figure(2)
cep_plot = plot(cep_freqs, 20*log10(abs(cep_vals)), 'g');
cep_plot.LineWidth = 2;
hold off

View File

@ -9,6 +9,10 @@
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
\let\endchangemargin=\endlist
\pagenumbering{roman}
\usepackage{color}
\definecolor{commentgreen}{RGB}{0,94,11}
\end_preamble
\use_default_options true
\begin_modules
@ -257,6 +261,53 @@ setcounter{page}{1}
Introduction
\end_layout
\begin_layout Section
Brief
\end_layout
\begin_layout Standard
The aim of this report is to demonstrate how digital signal processing technique
s can be used to analyse, model and synthesise speech.
The task will take will be considered as two areas of concern, that of
modelling and synthesis.
\end_layout
\begin_layout Standard
The modelling stage will utilise Linear Predictive Coding and the source-filter
model of speech to construct a filter that acts similarly to the vocal
tract's effect on sound produced by the vocal chords.
Comparisons of the frequency response for both the estimated filter and
the original sound will be presented, the effect of different filter orders
will also be demonstrated.
Relevant parameters of the original vowel speech segment will be presented
including the fundamental frequency and further formant frequencies.
\end_layout
\begin_layout Standard
The synthesis stage will complete the source-filter model of speech by creating
a suitable periodic sound source to be modulated by the previous filter.
With a complete source-filter model, artificial vowel sounds will be synthesise
d and analysed.
Subjective assessments will be made as to the differences between the original
sound and the final product of the model when system parameters are varied.
\end_layout
\begin_layout Section
Implementation
\end_layout
\begin_layout Section
Results
\end_layout
\begin_layout Section
Discussion
\end_layout
\begin_layout Section
Conclusion
\end_layout
\begin_layout Standard
\begin_inset Newpage newpage
\end_inset
@ -285,7 +336,7 @@ options "bibtotoc"
\begin_layout Section
\start_of_appendix
Code
Source Code
\begin_inset CommandInset label
LatexCommand label
name "sec:Code"
@ -293,6 +344,72 @@ name "sec:Code"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../lpss.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples},caption={Main script},label={main_script}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/spectro.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Spectrogram plotting wrapper function},label={spectrogram_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/fft_.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Fast Fourier transform wrapper function},label={fft_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/autocorr.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Autocorrelation plotting wrapper function},label={autocorr_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/clip_segment.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
\end_inset
\end_layout
\begin_layout Standard
\begin_inset CommandInset include
LatexCommand lstinputlisting
filename "../func/ms_to_samples.m"
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={clip_segment_function-1}"
\end_inset
\end_layout
\end_body