wrote brief, added cepstrum, added formant freqs
This commit is contained in:
parent
5eb36e7af8
commit
438f25c7ca
22
func/autocorr.m
Normal file
22
func/autocorr.m
Normal file
@ -0,0 +1,22 @@
|
||||
function [cep_autocorr, cep_lags] = autocorr(signal, max_lags, time, Fs)
|
||||
|
||||
[cep_autocorr, cep_lags] = xcorr(signal, max_lags, 'coeff');
|
||||
% [cep_autocorr, cep_lags] = xcorr(signal, 'coeff');
|
||||
|
||||
if time
|
||||
cep_lags = 1000*cep_lags/Fs; % turn samples into ms
|
||||
end
|
||||
|
||||
plot(cep_lags, cep_autocorr)
|
||||
grid
|
||||
if time
|
||||
xlabel('Delay (ms)')
|
||||
else
|
||||
xlabel('Delay (samples)')
|
||||
end
|
||||
ylabel('Normalized Autocorrelation')
|
||||
title('Autocorrelation')
|
||||
xlim([min(cep_lags) max(cep_lags)]);
|
||||
|
||||
end
|
||||
|
16
func/clip_segment.m
Normal file
16
func/clip_segment.m
Normal file
@ -0,0 +1,16 @@
|
||||
function output = clip_segment(signal, Fs, seg_length, offset)
|
||||
|
||||
signal_length_samples = length(signal);
|
||||
seg_length_samples = min(ms_to_samples(seg_length, Fs), signal_length_samples);
|
||||
offset_samples = max(ms_to_samples(offset, Fs), 1);
|
||||
|
||||
seg_length_samples = min(seg_length_samples, signal_length_samples - 1);
|
||||
|
||||
if signal_length_samples < seg_length_samples + offset_samples
|
||||
offset_samples = signal_length_samples - seg_length_samples;
|
||||
end
|
||||
|
||||
output = signal(offset_samples:offset_samples + seg_length_samples);
|
||||
|
||||
end
|
||||
|
@ -5,7 +5,7 @@ L=length(signal);
|
||||
Y = fft(signal);
|
||||
P2 = abs(Y); % two-sided spectrum
|
||||
% P2 = abs(Y/L); % two-sided spectrum
|
||||
P1 = P2(1:L/2+1); % single-sided spectrum
|
||||
P1 = P2(1:floor(L/2+1)); % single-sided spectrum
|
||||
P1(2:end-1) = 2*P1(2:end-1);
|
||||
frequencies = sample_frequency*(0:(L/2))/L;
|
||||
values = P1;
|
||||
|
3
func/ms_to_samples.m
Normal file
3
func/ms_to_samples.m
Normal file
@ -0,0 +1,3 @@
|
||||
function samples = ms_to_samples(time_in, sample_freq)
|
||||
samples = (time_in / 1000) * sample_freq;
|
||||
end
|
@ -1,6 +1,6 @@
|
||||
function spectro(signal, sample_frequency, windows, overlap_interval)
|
||||
|
||||
sample_overlap = (overlap_interval / 1000) * sample_frequency;
|
||||
sample_overlap = ms_to_samples(overlap_interval, sample_frequency);
|
||||
|
||||
sample_size = size(signal);
|
||||
%window_size = round(sample_size(1) / ((windows + 1)/2))
|
||||
|
179
lpss.m
179
lpss.m
@ -4,80 +4,159 @@
|
||||
|
||||
close all;clear all;clc;
|
||||
|
||||
LPC_ORDER = 8;
|
||||
DISPLAY_SAMPLES = 1000;
|
||||
SEGMENT_LENGTH = 100; % ms
|
||||
SEGMENT_OFFSET = 0; % ms from start
|
||||
|
||||
LPC_ORDER = 20;
|
||||
AC_DISP_SAMPLES = 1000; % autocorrelation display samples
|
||||
WINDOW_NUMBER = 10;
|
||||
WINDOW_OVERLAP = 5; % ms
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% READ SIGNAL
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
L = length(y) % number of samples
|
||||
DISPLAY_SAMPLES = min([DISPLAY_SAMPLES L]);
|
||||
F0 = 60; % low-pitched male speech
|
||||
% F0 = 600; % children
|
||||
|
||||
% flags for selective running
|
||||
FREQ_RESPONSE = ~false;
|
||||
AUTOCORRELATION = false;
|
||||
CEPSTRUM_PLOT = false;
|
||||
CEPSTRUM_ONE_SIDED = true;
|
||||
ORIG_LPC_T_COMPARE = false;
|
||||
ORIG_SPECTROGRAM = false;
|
||||
PLAY = false;
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% LPC
|
||||
%% READ SIGNAL
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
a = aryule(y, LPC_ORDER) % signal, filter order
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
y = clip_segment(y, Fs, SEGMENT_LENGTH, SEGMENT_OFFSET);
|
||||
|
||||
L = length(y) % number of samples
|
||||
|
||||
max_lag = Fs/ F0;
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% LPC
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
a = lpc(y, LPC_ORDER) % signal, filter order
|
||||
est_y = filter(0.02, a, y);
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% PREDICTION ERROR
|
||||
%% COMPARE ORIGINAL SIGNAL WITH LPC (T DOMAIN)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
e = y - est_y;
|
||||
[acs, lags] = xcorr(e,'coeff');
|
||||
if ORIG_LPC_T_COMPARE
|
||||
x = 1:AC_DISP_SAMPLES;
|
||||
AC_DISP_SAMPLES = min([AC_DISP_SAMPLES L]);
|
||||
|
||||
figure(1)
|
||||
plot(x, y(end-AC_DISP_SAMPLES+1:end), x, est_y(end-AC_DISP_SAMPLES+1:end), '--')
|
||||
|
||||
% plot(x, y(end-DISPLAY_SAMPLES+1:end))
|
||||
% plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
|
||||
|
||||
grid
|
||||
xlabel('Sample Number')
|
||||
ylabel('Amplitude')
|
||||
legend('Original signal','LPC estimate')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% COMPARE TWO SIGNALS
|
||||
%% T DOMAIN PREDICTION ERROR
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% x = 1:DISPLAY_SAMPLES;
|
||||
% figure(1)
|
||||
% plot(x, y(end-DISPLAY_SAMPLES+1:end), x, est_y(end-DISPLAY_SAMPLES+1:end), '--')
|
||||
t_domain_err = y - est_y;
|
||||
|
||||
%plot(x, y(end-DISPLAY_SAMPLES+1:end))
|
||||
%plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
|
||||
|
||||
% grid
|
||||
% xlabel('Sample Number')
|
||||
% ylabel('Amplitude')
|
||||
% legend('Original signal','LPC estimate')
|
||||
if AUTOCORRELATION
|
||||
figure(2)
|
||||
[acs, lags] = autocorr(t_domain_err, max_lag, true, Fs);
|
||||
title('Autocorrelation for error in Time domain')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% PLOT AUTOCORRELATION
|
||||
%% FREQUENCY RESPONSE
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% figure(2)
|
||||
% plot(lags, acs)
|
||||
% grid
|
||||
% xlabel('Lags')
|
||||
% ylabel('Normalized Autocorrelation')
|
||||
%ylim([-0.1 1.1])
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% PLOT FREQUENCY RESPONSE
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% CALCULATE FFT
|
||||
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
|
||||
|
||||
% GET FILTER RESPONSE
|
||||
[h, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
|
||||
|
||||
if FREQ_RESPONSE
|
||||
figure(3)
|
||||
plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'r', filter_freqs, 20*log10(abs(h)), 'b')
|
||||
% plot(w/pi, 20*log10(abs(h)))
|
||||
|
||||
%% ORIGINAL FFT
|
||||
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
|
||||
|
||||
orig_freq_plot = plot(freq_dom_freqs, 20*log10(abs(freq_dom_vals)), 'black');
|
||||
orig_freq_plot.Color(4) = 0.25;
|
||||
orig_freq_plot.LineWidth = 1;
|
||||
hold on
|
||||
|
||||
%% LPC FILTER RESPONSE
|
||||
[filter_vals, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
|
||||
filter_vals_db = 20*log10(abs(filter_vals));
|
||||
|
||||
lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b');
|
||||
lpc_freq_plot.LineWidth = 2;
|
||||
|
||||
% MAXIMA
|
||||
maxima = islocalmax(filter_vals_db);
|
||||
maxima_freqs = filter_freqs(maxima)
|
||||
maxima_db = filter_vals_db(maxima)
|
||||
|
||||
maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
|
||||
maxima_plot.MarkerSize = 12;
|
||||
maxima_plot.LineWidth = 2;
|
||||
|
||||
%% PLOT
|
||||
hold off
|
||||
grid
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Magnitude (dB)')
|
||||
legend('Original Signal', 'LPC Filter')
|
||||
legend('Original Signal', 'LPC Filter', 'LPC Maxima')
|
||||
title('Frequency Response For Speech Signal and LPC Filter')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% PLOT ORIGINAL SPECTROGRAM
|
||||
%% CEPSTRUM
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% figure(4)
|
||||
% spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
|
||||
% colormap bone
|
||||
cep = rceps(y);
|
||||
% cep = cceps(y);
|
||||
|
||||
if CEPSTRUM_PLOT
|
||||
ceps_t = (0:L - 1);
|
||||
|
||||
figure(4)
|
||||
if CEPSTRUM_ONE_SIDED
|
||||
plot(ceps_t(1:L / 2), cep(1:L / 2))
|
||||
else
|
||||
plot(ceps_t(1:L), cep(1:L))
|
||||
end
|
||||
|
||||
grid
|
||||
xlabel('Quefrency')
|
||||
ylabel('ceps(x[n])')
|
||||
if CEPSTRUM_ONE_SIDED
|
||||
xlim([0 L / 2])
|
||||
title('One-sided Speech Signal Cepstrum')
|
||||
else
|
||||
xlim([0 L])
|
||||
title('Speech Signal Cepstrum')
|
||||
end
|
||||
end
|
||||
|
||||
%% AUTOCORRELATION
|
||||
if AUTOCORRELATION
|
||||
figure(5)
|
||||
[cep_autocorr, cep_lags] = autocorr(cep(1:L/2), max_lag, true, Fs);
|
||||
title('One-sided Cepstrum Autocorrelation')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% PLAY
|
||||
%% PLOT ORIGINAL SPECTROGRAM
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%sound(y, Fs);
|
||||
if ORIG_SPECTROGRAM
|
||||
figure(6)
|
||||
spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
|
||||
colormap bone
|
||||
title('Speech Signal Spectrogram')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% PLAY
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if PLAY
|
||||
sound(y, Fs);
|
||||
end
|
@ -4,11 +4,34 @@
|
||||
|
||||
close all;clear all;clc;
|
||||
|
||||
CEPSTRUM_COEFFS = 100;
|
||||
|
||||
% READ SIGNAL
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
sample_length = length(y);
|
||||
half = sample_length / 2;
|
||||
|
||||
t = (0:length(y) - 1);
|
||||
t = (0:sample_length - 1);
|
||||
c = rceps(y);
|
||||
% c = cceps(y);
|
||||
plot(t, c)
|
||||
xlabel('Quefrency')
|
||||
% plot(t(1:sample_length), c(1:sample_length))
|
||||
plot(t(1:half), c(1:half))
|
||||
|
||||
xlabel('Quefrency')
|
||||
ylabel('ceps(x[n])')
|
||||
% xlim([0 sample_length])
|
||||
xlim([0 half])
|
||||
|
||||
%% PLOT FFT
|
||||
|
||||
c = cceps(y);
|
||||
c(CEPSTRUM_COEFFS:end) = 0;
|
||||
% [cep_freqs, cep_vals] = fft_(c, Fs);
|
||||
cep_vals = fft(c);
|
||||
cep_vals = cep_vals(1:floor(sample_length/2+1));
|
||||
cep_freqs = Fs*(0:(sample_length/2))/sample_length;
|
||||
|
||||
figure(2)
|
||||
cep_plot = plot(cep_freqs, 20*log10(abs(cep_vals)), 'g');
|
||||
cep_plot.LineWidth = 2;
|
||||
hold off
|
@ -9,6 +9,10 @@
|
||||
\def\changemargin#1#2{\list{}{\rightmargin#2\leftmargin#1}\item[]}
|
||||
\let\endchangemargin=\endlist
|
||||
\pagenumbering{roman}
|
||||
|
||||
\usepackage{color}
|
||||
|
||||
\definecolor{commentgreen}{RGB}{0,94,11}
|
||||
\end_preamble
|
||||
\use_default_options true
|
||||
\begin_modules
|
||||
@ -257,6 +261,53 @@ setcounter{page}{1}
|
||||
Introduction
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Brief
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
The aim of this report is to demonstrate how digital signal processing technique
|
||||
s can be used to analyse, model and synthesise speech.
|
||||
The task will take will be considered as two areas of concern, that of
|
||||
modelling and synthesis.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
The modelling stage will utilise Linear Predictive Coding and the source-filter
|
||||
model of speech to construct a filter that acts similarly to the vocal
|
||||
tract's effect on sound produced by the vocal chords.
|
||||
Comparisons of the frequency response for both the estimated filter and
|
||||
the original sound will be presented, the effect of different filter orders
|
||||
will also be demonstrated.
|
||||
Relevant parameters of the original vowel speech segment will be presented
|
||||
including the fundamental frequency and further formant frequencies.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
The synthesis stage will complete the source-filter model of speech by creating
|
||||
a suitable periodic sound source to be modulated by the previous filter.
|
||||
With a complete source-filter model, artificial vowel sounds will be synthesise
|
||||
d and analysed.
|
||||
Subjective assessments will be made as to the differences between the original
|
||||
sound and the final product of the model when system parameters are varied.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Implementation
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Results
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Discussion
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Conclusion
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset Newpage newpage
|
||||
\end_inset
|
||||
@ -285,7 +336,7 @@ options "bibtotoc"
|
||||
|
||||
\begin_layout Section
|
||||
\start_of_appendix
|
||||
Code
|
||||
Source Code
|
||||
\begin_inset CommandInset label
|
||||
LatexCommand label
|
||||
name "sec:Code"
|
||||
@ -293,6 +344,72 @@ name "sec:Code"
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../lpss.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples},caption={Main script},label={main_script}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/spectro.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Spectrogram plotting wrapper function},label={spectrogram_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/fft_.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Fast Fourier transform wrapper function},label={fft_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/autocorr.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Autocorrelation plotting wrapper function},label={autocorr_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/clip_segment.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/ms_to_samples.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={clip_segment_function-1}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\end_body
|
||||
|
Loading…
Reference in New Issue
Block a user