linear-predictive-speech-synth/lpss.m

254 lines
6.6 KiB
Matlab

%% lpss.m
%%
%% Coursework script
close all;clear all;clc;
NAME = 'hood_m';
% NAME = 'head_f';
SEGMENT_LENGTH = 100; % ms
SEGMENT_OFFSET = 20; % ms from start
LPC_ORDER = 30;
AC_DISP_SAMPLES = 1000; % autocorrelation display samples
WINDOW_NUMBER = 10; % number of windows for spectrogram
WINDOW_OVERLAP = 10; % ms
SYNTH_WINDOW_NUMBER = 60; % number of windows for spectrogram
SYNTH_WINDOW_OVERLAP = 20; % ms
PREEMPHASIS_COEFFS = [1 -0.9]; % first order zero coeff for pre-emphasis
F0 = 60; % low-pitched male speech
% F0 = 600; % children
% flags for selective running
PREEMPHASIS = false;
CEPSTRUM_LOW_PASS = true; % smooth cepstrum for fund. freq. isolation
CEPSTRUM_LOW_PASS_COEFFS = [1 -0.7];
FREQ_RESPONSE = true;
AUTOCORRELATION = false;
CEPSTRUM_COMPLEX = false; % else real cepstrum
CEPSTRUM_PLOT = true;
CEPSTRUM_THRESHOLD = 0.075; % threshold for isolating peaks in cepstrum
ORIG_LPC_T_COMPARE = false;
ORIG_SPECTROGRAM = true;
SYNTH_SPECTROGRAM = true;
SYNTHESISED_SOUND_LENGTH = 100; % ms
WRITE = ~true;
PLAY = ~false;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% READ SIGNAL
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[y, Fs] = audioread(strcat('samples/', NAME, '.wav'));
% take segment of sample for processing
y = clip_segment(y, Fs, SEGMENT_LENGTH, SEGMENT_OFFSET);
y_orig = y;
if PREEMPHASIS
y = filter(PREEMPHASIS_COEFFS, 1, y);
end
L = length(y); % number of samples
max_lag = Fs/ F0; % for autocorrelation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% LPC
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a = lpc(y, LPC_ORDER) % signal, filter order
est_y = filter(0.02, a, y);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% COMPARE ORIGINAL SIGNAL WITH LPC (T DOMAIN)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if ORIG_LPC_T_COMPARE
x = 1:AC_DISP_SAMPLES;
AC_DISP_SAMPLES = min([AC_DISP_SAMPLES L]);
% plot t domain for original signal and estimation using LPC coeffs
figure(1)
plot(x, y(end-AC_DISP_SAMPLES+1:end), x, est_y(end-AC_DISP_SAMPLES+1:end), '--')
gridh
xlabel('Sample Number')
ylabel('Amplitude')
legend('Original signal','LPC estimate')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% T DOMAIN PREDICTION ERROR
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
t_domain_err = y - est_y; % residual?
if AUTOCORRELATION
figure(2)
[acs, lags] = autocorr(t_domain_err, max_lag, true, Fs);
title('Autocorrelation of error in time domain')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% FREQUENCY RESPONSE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if FREQ_RESPONSE
figure(3)
%% ORIGINAL FFT
[freq_dom_freqs, freq_dom_vals] = fft_(y, Fs);
orig_freq_plot = plot(freq_dom_freqs, 20*log10(abs(freq_dom_vals)), 'black');
orig_freq_plot.Color(4) = 0.25;
orig_freq_plot.LineWidth = 1;
hold on
%% LPC FILTER RESPONSE
[filter_vals, filter_freqs] = freqz(1, a, length(freq_dom_freqs), Fs);
filter_vals_db = 20*log10(abs(filter_vals));
lpc_freq_plot = plot(filter_freqs, filter_vals_db, 'b');
lpc_freq_plot.LineWidth = 2;
% MAXIMA
% estimate formant frequencies from maxima of LPC filter freq response
maxima = islocalmax(filter_vals_db);
maxima_freqs = filter_freqs(maxima)
maxima_db = filter_vals_db(maxima);
maxima_plot = plot(maxima_freqs, maxima_db, 'rx');
maxima_plot.MarkerSize = 12;
maxima_plot.LineWidth = 2;
%% PRE_FILTER LPC
if PREEMPHASIS
[prefilter_vals, prefilter_freqs] = freqz(1, lpc(y_orig, LPC_ORDER), length(freq_dom_freqs), Fs);
prefilter_plot = plot(prefilter_freqs, 20*log10(abs(prefilter_vals)), 'g');
prefilter_plot.Color(4) = 0.8;
prefilter_plot.LineWidth = 1.5;
end
%% PLOT
hold off
grid
xlabel('Frequency (Hz)')
ylabel('Magnitude (dB)')
if PREEMPHASIS
legend('Original Signal', 'LPC Filter', 'LPC Maxima', 'LPC No Pre-emphasis')
else
legend('Original Signal', 'LPC Filter', 'LPC Maxima')
end
title('Frequency Response For Speech Signal and LPC Filter')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CEPSTRUM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if CEPSTRUM_COMPLEX
cep = cceps(y);
else
cep = rceps(y);
end
cep_filt = filter(1, CEPSTRUM_LOW_PASS_COEFFS, cep);
if CEPSTRUM_PLOT % plot cepstrum in t domain
ceps_t = (0:L - 1);
if CEPSTRUM_LOW_PASS
c = cep_filt;
else
c = cep;
end
figure(4)
hold on
plot(ceps_t(1:round(L / 2)), c(1:round(L / 2)))
%% MAXIMA
% value threshold
c(c < CEPSTRUM_THRESHOLD) = 0;
% local maxima
cep_maxima_indexes = islocalmax(c);
cep_maxima_times = ceps_t(cep_maxima_indexes);
c = c(cep_maxima_indexes);
% quefrency threshold
cep_time_indexes = 20 < cep_maxima_times;
cep_maxima_times = cep_maxima_times(cep_time_indexes);
c = c(cep_time_indexes);
% 1st half
cep_half_indexes = cep_maxima_times <= round(L / 2);
cep_maxima_times = cep_maxima_times(cep_half_indexes);
c = c(cep_half_indexes);
maxima_plot = plot(cep_maxima_times, c, 'rx');
maxima_plot.MarkerSize = 8;
maxima_plot.LineWidth = 1.5;
grid
xlabel('Quefrency (samples)')
ylabel('ceps(x[n])')
xlim([0 L / 2])
title('Speech Signal Cepstrum')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% CALCULATE FUNDAMENTAL FREQUENCY
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% CEPSTRUM
if CEPSTRUM_PLOT && length(cep_maxima_times) >= 1
pitch_period = cep_maxima_times(c == max(c))
fundamental_freq = 1 / (pitch_period / Fs)
else
disp('pitch periods not identified')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% GENERATE SIGNAL
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if exist('fundamental_freq')
excitation = get_impulse_train(fundamental_freq, Fs, SYNTHESISED_SOUND_LENGTH);
synth_sound = filter(1, a, excitation);
if WRITE
audiowrite(strcat('synthed/', NAME, '_o', num2str(LPC_ORDER), '_', num2str(SEGMENT_LENGTH), '_', num2str(SEGMENT_OFFSET), 'ms.wav'), synth_sound, Fs);
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% SPECTROGRAM
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if ORIG_SPECTROGRAM
figure(6)
spectro(y, Fs, WINDOW_NUMBER, WINDOW_OVERLAP);
colormap bone
title('Speech Signal Spectrogram')
end
if SYNTH_SPECTROGRAM
figure(7)
spectro(synth_sound, Fs, SYNTH_WINDOW_NUMBER, SYNTH_WINDOW_OVERLAP);
colormap bone
title('Synthesised Vowel Sound Spectrogram')
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% PLAY
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if PLAY
sound(y, Fs);
pause(1);
if exist('synth_sound')
sound(synth_sound, Fs);
end
end