working end to end code
This commit is contained in:
parent
438f25c7ca
commit
05dd7680b6
@ -1,7 +1,7 @@
|
||||
function [cep_autocorr, cep_lags] = autocorr(signal, max_lags, time, Fs)
|
||||
|
||||
[cep_autocorr, cep_lags] = xcorr(signal, max_lags, 'coeff');
|
||||
% [cep_autocorr, cep_lags] = xcorr(signal, 'coeff');
|
||||
% [cep_autocorr, cep_lags] = xcorr(signal, round(max_lags), 'coeff');
|
||||
[cep_autocorr, cep_lags] = xcorr(signal, 'coeff');
|
||||
|
||||
if time
|
||||
cep_lags = 1000*cep_lags/Fs; % turn samples into ms
|
||||
|
27
func/get_impulse_train.m
Normal file
27
func/get_impulse_train.m
Normal file
@ -0,0 +1,27 @@
|
||||
%% get_impulse_train.m
|
||||
%%
|
||||
%% Generate periodic impulse train for use in speech synth
|
||||
%%
|
||||
%% Signal of pitch fundamental_freq sampled at sampling_freq
|
||||
%% for time length_ms
|
||||
function signal = get_impulse_train(fundamental_freq, sampling_freq, length_ms)
|
||||
|
||||
if fundamental_freq > sampling_freq
|
||||
disp('Fundamental frequency greater than sampling_freq')
|
||||
signal = [];
|
||||
return
|
||||
end
|
||||
|
||||
required_samples = ms_to_samples(length_ms, sampling_freq);
|
||||
pitch_period = 1 / fundamental_freq;
|
||||
sample_period = 1 / sampling_freq;
|
||||
|
||||
cell_length = round(pitch_period / sample_period);
|
||||
|
||||
% cell to be repeated into periodic signal
|
||||
pitch_cell = [1 zeros(1, cell_length - 1)];
|
||||
required_cells = ceil(required_samples / cell_length);
|
||||
|
||||
signal = repmat(pitch_cell, 1, required_cells);
|
||||
signal = signal(1:required_samples);
|
||||
end
|
@ -2,13 +2,16 @@ function spectro(signal, sample_frequency, windows, overlap_interval)
|
||||
|
||||
sample_overlap = ms_to_samples(overlap_interval, sample_frequency);
|
||||
|
||||
sample_size = size(signal);
|
||||
%window_size = round(sample_size(1) / ((windows + 1)/2))
|
||||
|
||||
% Turn windows into window width in samples, take into account overlap
|
||||
window_size = round((sample_size(1) + (windows + 1) * sample_overlap) / (windows+1));
|
||||
window_size = round(...
|
||||
(length(signal) + (windows + 1) * sample_overlap) ...
|
||||
/ ...
|
||||
(windows+1) ...
|
||||
);
|
||||
|
||||
spectrogram(signal, window_size, sample_overlap, [], sample_frequency, 'yaxis');
|
||||
spectrogram(signal, window_size, round(sample_overlap), [], sample_frequency, 'yaxis');
|
||||
|
||||
end
|
||||
|
||||
|
160
lpss.m
160
lpss.m
@ -9,30 +9,52 @@ SEGMENT_OFFSET = 0; % ms from start
|
||||
|
||||
LPC_ORDER = 20;
|
||||
AC_DISP_SAMPLES = 1000; % autocorrelation display samples
|
||||
WINDOW_NUMBER = 10;
|
||||
WINDOW_NUMBER = 10; % number of windows for spectrogram
|
||||
WINDOW_OVERLAP = 5; % ms
|
||||
SYNTH_WINDOW_NUMBER = 100; % number of windows for spectrogram
|
||||
SYNTH_WINDOW_OVERLAP = 10; % ms
|
||||
|
||||
PREEMPHASIS_COEFFS = [1 -0.8]; % first order zero coeff for pre-emphasis
|
||||
|
||||
F0 = 60; % low-pitched male speech
|
||||
% F0 = 600; % children
|
||||
|
||||
% flags for selective running
|
||||
FREQ_RESPONSE = ~false;
|
||||
PREEMPHASIS = false;
|
||||
CEPSTRUM_LOW_PASS = true; % smooth cepstrum for fund. freq. isolation
|
||||
CEPSTRUM_LOW_PASS_COEFFS = [1 -0.7];
|
||||
|
||||
FREQ_RESPONSE = true;
|
||||
AUTOCORRELATION = false;
|
||||
CEPSTRUM_PLOT = false;
|
||||
CEPSTRUM_ONE_SIDED = true;
|
||||
|
||||
CEPSTRUM_COMPLEX = false; % else real cepstrum
|
||||
CEPSTRUM_PLOT = true;
|
||||
CEPSTRUM_THRESHOLD = 0.075; % threshold for isolating peaks in cepstrum
|
||||
|
||||
ORIG_LPC_T_COMPARE = false;
|
||||
ORIG_SPECTROGRAM = false;
|
||||
|
||||
ORIG_SPECTROGRAM = true;
|
||||
SYNTH_SPECTROGRAM = true;
|
||||
|
||||
SYNTHESISED_SOUND_LENGTH = 500; % ms
|
||||
|
||||
PLAY = false;
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% READ SIGNAL
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
[y, Fs] = audioread('samples/head_f.wav');
|
||||
% take segment of sample for processing
|
||||
y = clip_segment(y, Fs, SEGMENT_LENGTH, SEGMENT_OFFSET);
|
||||
y_orig = y;
|
||||
|
||||
L = length(y) % number of samples
|
||||
if PREEMPHASIS
|
||||
y = filter(PREEMPHASIS_COEFFS, 1, y);
|
||||
end
|
||||
|
||||
max_lag = Fs/ F0;
|
||||
L = length(y); % number of samples
|
||||
|
||||
max_lag = Fs/ F0; % for autocorrelation
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% LPC
|
||||
@ -47,12 +69,11 @@ if ORIG_LPC_T_COMPARE
|
||||
x = 1:AC_DISP_SAMPLES;
|
||||
AC_DISP_SAMPLES = min([AC_DISP_SAMPLES L]);
|
||||
|
||||
% plot t domain for original signal and estimation using LPC coeffs
|
||||
|
||||
figure(1)
|
||||
plot(x, y(end-AC_DISP_SAMPLES+1:end), x, est_y(end-AC_DISP_SAMPLES+1:end), '--')
|
||||
|
||||
% plot(x, y(end-DISPLAY_SAMPLES+1:end))
|
||||
% plot(x, est_y(end-DISPLAY_SAMPLES+1:end))
|
||||
|
||||
grid
|
||||
xlabel('Sample Number')
|
||||
ylabel('Amplitude')
|
||||
@ -62,12 +83,12 @@ end
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% T DOMAIN PREDICTION ERROR
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
t_domain_err = y - est_y;
|
||||
t_domain_err = y - est_y; % residual?
|
||||
|
||||
if AUTOCORRELATION
|
||||
figure(2)
|
||||
[acs, lags] = autocorr(t_domain_err, max_lag, true, Fs);
|
||||
title('Autocorrelation for error in Time domain')
|
||||
title('Autocorrelation of error in time domain')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
@ -92,60 +113,114 @@ lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b');
|
||||
lpc_freq_plot.LineWidth = 2;
|
||||
|
||||
% MAXIMA
|
||||
% estimate formant frequencies from maxima of LPC filter freq response
|
||||
maxima = islocalmax(filter_vals_db);
|
||||
maxima_freqs = filter_freqs(maxima)
|
||||
maxima_db = filter_vals_db(maxima)
|
||||
maxima_db = filter_vals_db(maxima);
|
||||
|
||||
maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
|
||||
maxima_plot.MarkerSize = 12;
|
||||
maxima_plot.LineWidth = 2;
|
||||
|
||||
%% PRE_FILTER LPC
|
||||
if PREEMPHASIS
|
||||
[prefilter_vals, prefilter_freqs] = freqz(1, lpc(y_orig, LPC_ORDER), length(freq_dom_freqs), Fs);
|
||||
|
||||
prefilter_plot = plot(prefilter_freqs, 20*log10(abs(prefilter_vals)), 'g');
|
||||
prefilter_plot.Color(4) = 0.8;
|
||||
prefilter_plot.LineWidth = 1;
|
||||
end
|
||||
|
||||
%% PLOT
|
||||
hold off
|
||||
grid
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Magnitude (dB)')
|
||||
legend('Original Signal', 'LPC Filter', 'LPC Maxima')
|
||||
if PREEMPHASIS
|
||||
legend('Original Signal', 'LPC Filter', 'LPC Maxima', 'LPC No Pre-emphasis')
|
||||
else
|
||||
legend('Original Signal', 'LPC Filter', 'LPC Maxima')
|
||||
end
|
||||
title('Frequency Response For Speech Signal and LPC Filter')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% CEPSTRUM
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
cep = rceps(y);
|
||||
% cep = cceps(y);
|
||||
if CEPSTRUM_COMPLEX
|
||||
cep = cceps(y);
|
||||
else
|
||||
cep = rceps(y);
|
||||
end
|
||||
cep_filt = filter(1, CEPSTRUM_LOW_PASS_COEFFS, cep);
|
||||
|
||||
if CEPSTRUM_PLOT
|
||||
if CEPSTRUM_PLOT % plot cepstrum in t domain
|
||||
ceps_t = (0:L - 1);
|
||||
|
||||
figure(4)
|
||||
if CEPSTRUM_ONE_SIDED
|
||||
plot(ceps_t(1:L / 2), cep(1:L / 2))
|
||||
if CEPSTRUM_LOW_PASS
|
||||
c = cep_filt;
|
||||
else
|
||||
plot(ceps_t(1:L), cep(1:L))
|
||||
c = cep;
|
||||
end
|
||||
|
||||
figure(4)
|
||||
hold on
|
||||
plot(ceps_t(1:round(L / 2)), c(1:round(L / 2)))
|
||||
|
||||
%% MAXIMA
|
||||
% value threshold
|
||||
c(c < CEPSTRUM_THRESHOLD) = 0;
|
||||
cep_maxima_indexes = islocalmax(c);
|
||||
|
||||
cep_maxima_times = ceps_t(1:round(L / 2));
|
||||
cep_maxima_times = ceps_t(cep_maxima_indexes);
|
||||
c = c(cep_maxima_indexes);
|
||||
|
||||
% quefrency threshold
|
||||
cep_time_indexes = 20 < cep_maxima_times;
|
||||
cep_maxima_times = cep_maxima_times(cep_time_indexes);
|
||||
c = c(cep_time_indexes);
|
||||
|
||||
% 1st half
|
||||
cep_half_indexes = cep_maxima_times <= round(L / 2);
|
||||
cep_maxima_times = cep_maxima_times(cep_half_indexes);
|
||||
c = c(cep_half_indexes);
|
||||
|
||||
maxima_plot = plot(cep_maxima_times, c, 'rx');
|
||||
maxima_plot.MarkerSize = 8;
|
||||
maxima_plot.LineWidth = 1.5;
|
||||
|
||||
grid
|
||||
xlabel('Quefrency')
|
||||
ylabel('ceps(x[n])')
|
||||
if CEPSTRUM_ONE_SIDED
|
||||
xlim([0 L / 2])
|
||||
title('One-sided Speech Signal Cepstrum')
|
||||
else
|
||||
xlim([0 L])
|
||||
title('Speech Signal Cepstrum')
|
||||
end
|
||||
end
|
||||
|
||||
%% AUTOCORRELATION
|
||||
if AUTOCORRELATION
|
||||
figure(5)
|
||||
[cep_autocorr, cep_lags] = autocorr(cep(1:L/2), max_lag, true, Fs);
|
||||
title('One-sided Cepstrum Autocorrelation')
|
||||
xlim([0 L / 2])
|
||||
title('Speech Signal Cepstrum')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% PLOT ORIGINAL SPECTROGRAM
|
||||
%% CALCULATE FUNDAMENTAL FREQUENCY
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% CEPSTRUM
|
||||
if CEPSTRUM_PLOT && length(cep_maxima_times) >= 1
|
||||
pitch_period = cep_maxima_times(c == max(c));
|
||||
fundamental_freq = 1 / (pitch_period / Fs)
|
||||
else
|
||||
disp('pitch periods not identified')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% GENERATE SIGNAL
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if exist('fundamental_freq')
|
||||
excitation = get_impulse_train(fundamental_freq, Fs, SYNTHESISED_SOUND_LENGTH);
|
||||
|
||||
synth_sound = filter(1, a, excitation);
|
||||
|
||||
audiowrite('out.wav', synth_sound, Fs);
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% SPECTROGRAM
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if ORIG_SPECTROGRAM
|
||||
figure(6)
|
||||
@ -154,9 +229,20 @@ colormap bone
|
||||
title('Speech Signal Spectrogram')
|
||||
end
|
||||
|
||||
if SYNTH_SPECTROGRAM
|
||||
figure(7)
|
||||
spectro(synth_sound, Fs, SYNTH_WINDOW_NUMBER, SYNTH_WINDOW_OVERLAP);
|
||||
colormap bone
|
||||
title('Synthesised Vowel Sound Spectrogram')
|
||||
end
|
||||
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
%% PLAY
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
if PLAY
|
||||
sound(y, Fs);
|
||||
pause(1);
|
||||
if exist('synth_sound')
|
||||
sound(synth_sound, Fs);
|
||||
end
|
||||
end
|
@ -5,6 +5,12 @@
|
||||
close all;clear all;clc;
|
||||
|
||||
CEPSTRUM_COEFFS = 100;
|
||||
CEPSTRUM_THRESHOLD = 0.1;
|
||||
LOW_PASS_COEFF = 0.9;
|
||||
F0 = 60; % low-pitched male speech
|
||||
% F0 = 600; % children
|
||||
|
||||
CEPSTRUM_FFT = false;
|
||||
|
||||
% READ SIGNAL
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
@ -21,10 +27,11 @@ xlabel('Quefrency')
|
||||
ylabel('ceps(x[n])')
|
||||
% xlim([0 sample_length])
|
||||
xlim([0 half])
|
||||
title('Cepstrum')
|
||||
|
||||
%% PLOT FFT
|
||||
if CEPSTRUM_FFT
|
||||
|
||||
c = cceps(y);
|
||||
c(CEPSTRUM_COEFFS:end) = 0;
|
||||
% [cep_freqs, cep_vals] = fft_(c, Fs);
|
||||
cep_vals = fft(c);
|
||||
@ -32,6 +39,46 @@ cep_vals = cep_vals(1:floor(sample_length/2+1));
|
||||
cep_freqs = Fs*(0:(sample_length/2))/sample_length;
|
||||
|
||||
figure(2)
|
||||
cep_plot = plot(cep_freqs, 20*log10(abs(cep_vals)), 'g');
|
||||
cep_plot = plot(cep_freqs, 20*log10(abs(cep_vals)));
|
||||
cep_plot.LineWidth = 2;
|
||||
hold off
|
||||
|
||||
end
|
||||
|
||||
%% SMOOTH CEPSTRUM
|
||||
|
||||
a = [1 -LOW_PASS_COEFF];
|
||||
[filter_vals, filter_freqs] = freqz(1, a, 1000, Fs);
|
||||
|
||||
figure(3)
|
||||
plot(filter_freqs, 20*log10(filter_vals));
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Amplitude (dB)')
|
||||
title('Low Pass Filter Response')
|
||||
|
||||
c_filt = filter(1, a, c);
|
||||
|
||||
figure(4)
|
||||
plot(t(1:half), c_filt(1:half));
|
||||
xlabel('Quefrency')
|
||||
ylabel('ceps(x[n])')
|
||||
title('Cepstrum Post-Low-Pass')
|
||||
|
||||
%% AUTOCORELLATION
|
||||
figure(5)
|
||||
autocorr(c(1:half), Fs/F0, true, Fs);
|
||||
title('Cepstrum Autocorrelation')
|
||||
|
||||
figure(6)
|
||||
[smooth_cep_autocorr, smooth_cep_lags] = autocorr(c_filt(1:half), Fs/F0, true, Fs);
|
||||
title('Smoothed Cepstrum Autocorrelation')
|
||||
hold on
|
||||
|
||||
smooth_cep_autocorr(smooth_cep_autocorr < CEPSTRUM_THRESHOLD) = 0;
|
||||
|
||||
maxima = islocalmax(smooth_cep_autocorr);
|
||||
maxima_freqs = smooth_cep_lags(maxima)
|
||||
maxima_db = smooth_cep_autocorr(maxima);
|
||||
|
||||
maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
|
||||
maxima_plot.MarkerSize = 8;
|
||||
maxima_plot.LineWidth = 1.5;
|
||||
|
45
lpss_preemph.m
Normal file
45
lpss_preemph.m
Normal file
@ -0,0 +1,45 @@
|
||||
%% lpss_preemph.m
|
||||
%%
|
||||
%% Load wav and play with preemphasis filter
|
||||
|
||||
close all;clear all;clc;
|
||||
|
||||
[y, Fs] = audioread('samples/hood_m.wav');
|
||||
|
||||
b = [1 -0.68];
|
||||
|
||||
[filter_vals, filter_freqs] = freqz(b, 1, 1000, Fs);
|
||||
|
||||
%% PREEMPH FILTER RESPONSE
|
||||
figure(1)
|
||||
plot(filter_freqs, filter_vals);
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Amplitude')
|
||||
|
||||
%% ORIGINAL FFT
|
||||
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
|
||||
figure(2)
|
||||
plot(freq_dom_freqs, 20*log10(freq_dom_vals));
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Amplitude')
|
||||
title('Original spectrum')
|
||||
|
||||
%% POST FILTER FFT
|
||||
y_filt = filter(b, 1, y);
|
||||
[freq_dom_freqs_post, freq_dom_vals_post] = fft_(y_filt, Fs);
|
||||
figure(3)
|
||||
plot(freq_dom_freqs_post, 20*log10(freq_dom_vals_post));
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Amplitude')
|
||||
title('Post-filter spectrum')
|
||||
|
||||
%% BOTH
|
||||
figure(4)
|
||||
plot(freq_dom_freqs, 20*log10(freq_dom_vals), 'b');
|
||||
hold on
|
||||
plot(freq_dom_freqs_post, 20*log10(freq_dom_vals_post), 'r--');
|
||||
hold off
|
||||
xlabel('Frequency (Hz)')
|
||||
ylabel('Amplitude')
|
||||
legend('Original Signal', 'Filtered')
|
||||
title('Post-filter spectrum')
|
11
lpss_synth.m
Normal file
11
lpss_synth.m
Normal file
@ -0,0 +1,11 @@
|
||||
%% lpss.m
|
||||
%%
|
||||
%% Coursework script
|
||||
|
||||
close all;clear all;clc;
|
||||
|
||||
Fs = 24000; % Hz, sampling
|
||||
Ff = 100; % Hz, fundamental
|
||||
sample_length = 1000; % ms
|
||||
|
||||
sample = get_impulse_train(Ff, Fs, sample_length)
|
@ -268,8 +268,8 @@ Brief
|
||||
\begin_layout Standard
|
||||
The aim of this report is to demonstrate how digital signal processing technique
|
||||
s can be used to analyse, model and synthesise speech.
|
||||
The task will take will be considered as two areas of concern, that of
|
||||
modelling and synthesis.
|
||||
The task will be considered as two areas of concern, that of modelling
|
||||
and synthesis.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
@ -280,7 +280,7 @@ The modelling stage will utilise Linear Predictive Coding and the source-filter
|
||||
the original sound will be presented, the effect of different filter orders
|
||||
will also be demonstrated.
|
||||
Relevant parameters of the original vowel speech segment will be presented
|
||||
including the fundamental frequency and further formant frequencies.
|
||||
including the fundamental frequency and formant frequencies.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
@ -296,10 +296,123 @@ d and analysed.
|
||||
Implementation
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
The implementation of this system was completed using
|
||||
\noun on
|
||||
Matlab
|
||||
\noun default
|
||||
with aid from functions in the digital signal processing toolbox among
|
||||
others.
|
||||
Following loading a vowel sample, a segment of changing length (100ms was
|
||||
standard) was clipped for processing.
|
||||
The clip optionally also underwent pre-emphasis using a high pass filter.
|
||||
As speech spectra can tend to have higher energy at lower frequencies,
|
||||
the use of pre-emphasis can balance the magnitude across the spectrum.
|
||||
A first order filter was used and the coefficient varied, over-use could
|
||||
prove excessive for higher frequencies including fricative sounds.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Modelling
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
In order to estimate the filter state of the vocal tract, the linear predictive
|
||||
coding coefficients of varying orders were calculated using the
|
||||
\begin_inset listings
|
||||
lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4"
|
||||
inline true
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
lpc(signal, order)
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
function.
|
||||
In order to compare the frequency response of the LPC filter with the original
|
||||
signal, the Fourier transform of the signal was calculated.
|
||||
The frequency domain representation of the LPC filter was found using the
|
||||
|
||||
\begin_inset listings
|
||||
lstparams "language=Matlab,basicstyle={\ttfamily},tabsize=4"
|
||||
inline true
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
freqz(b, a, n, f)
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
function and co-plotted with the original signal.
|
||||
This frequency plot of the LPC filter constitutes the spectral envelope
|
||||
of the signal and the vowel formant frequencies can be found at the maxima
|
||||
of the spectrum.
|
||||
Due to the smooth profile of the LPC spectrum, formant frequencies were
|
||||
estimated by identifying the local maxima of the function.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
In order to find the fundamental frequency of the signal, the cepstrum was
|
||||
used.
|
||||
The use of a low pass filter was investigated in order to smooth the cepstrum
|
||||
before programmatically finding pitch period candidates by applying
|
||||
\begin_inset Formula $x$
|
||||
\end_inset
|
||||
|
||||
and
|
||||
\begin_inset Formula $y$
|
||||
\end_inset
|
||||
|
||||
thresholds.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Synthesis
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
In order to synthesise speech, a periodic impulse train at the identified
|
||||
fundamental frequency of the original vowel was generated.
|
||||
The impulse train was sampled at the same frequency as the original sound.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Results
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
LPC Filter
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
Order Variation
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Spectral Analysis
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
Fundamental Frequency
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
Formant Frequencies
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsubsection
|
||||
Cepstrum Smoothing
|
||||
\end_layout
|
||||
|
||||
\begin_layout Subsection
|
||||
Synthesis
|
||||
\end_layout
|
||||
|
||||
\begin_layout Section
|
||||
Discussion
|
||||
\end_layout
|
||||
@ -346,15 +459,38 @@ name "sec:Code"
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
While much of the code was developed in individual scripts in order to experimen
|
||||
t with separate aspects of the system, for collecting results a script which
|
||||
constitutes the entire system was written,
|
||||
\begin_inset listings
|
||||
lstparams "basicstyle={\ttfamily}"
|
||||
inline true
|
||||
status open
|
||||
|
||||
\begin_layout Plain Layout
|
||||
|
||||
lpss.m
|
||||
\end_layout
|
||||
|
||||
\end_inset
|
||||
|
||||
.
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../lpss.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples},caption={Main script},label={main_script}"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, mfcc, spectro, fft_, autocorr, clip_segment, islocalmax, ms_to_samples, rceps, cceps, ones, audioplayer, play, get_impulse_train, lpc},caption={Main script including source-filter model and spectral analysis},label={main_script}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\begin_inset Newpage pagebreak
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
@ -394,7 +530,7 @@ lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},comm
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/clip_segment.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples},caption={Retrieve a segment of the original speech signal},label={clip_segment_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
@ -405,7 +541,18 @@ lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},comm
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/ms_to_samples.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={clip_segment_function-1}"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram},caption={Transform time in milliseconds into the respective number of samples},label={ms_to_samples_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
||||
\end_layout
|
||||
|
||||
\begin_layout Standard
|
||||
\begin_inset CommandInset include
|
||||
LatexCommand lstinputlisting
|
||||
filename "../func/get_impulse_train.m"
|
||||
lstparams "breaklines=true,frame=tb,language=Matlab,basicstyle={\\ttfamily},commentstyle={\\color{commentgreen}\\itshape},keywordstyle={\\color{blue}},emphstyle={\\color{red}},stringstyle={\\color{red}},identifierstyle={\\color{cyan}},morekeywords={audioread, aryule, xcorr, freqz, spectrogram, ms_to_samples, repmat},caption={Generate an impulse rate of given fundamental frequency at a provided sampling frequency for a given length of time},label={get_impulse_train_function}"
|
||||
|
||||
\end_inset
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user