fixed PR, changed pca

This commit is contained in:
aj 2019-11-30 15:55:43 +00:00
parent 521f52a397
commit aaa4feae09
9 changed files with 340 additions and 83 deletions

View File

@ -22,10 +22,10 @@ OUT_FOLDER = 'descriptors';
%% and within that folder, create another folder to hold these descriptors
%% the idea is all your descriptors are in individual folders - within
%% the folder specified as 'OUT_FOLDER'.
% OUT_SUBFOLDER='avgRGB';
OUT_SUBFOLDER='avgRGB';
% OUT_SUBFOLDER='globalRGBhisto';
% OUT_SUBFOLDER='spatialColour';
OUT_SUBFOLDER='spatialColourTexture';
% OUT_SUBFOLDER='spatialColourTexture';
allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
for filenum=1:length(allfiles)
@ -37,10 +37,10 @@ for filenum=1:length(allfiles)
fout=[OUT_FOLDER,'/',OUT_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
%% EXTRACT FUNCTION
% F=extractAvgRGB(img);
F=extractAvgRGB(img);
% F=extractGlobalColHist(img);
% F=extractSpatialColour(img);
F=extractSpatialColourTexture(img);
% F=extractSpatialColourTexture(img);
save(fout,'F');
toc
end

View File

@ -89,10 +89,10 @@ NIMG=size(ALLFEAT,1); % number of images in collection
confusion_matrix = zeros(CAT_TOTAL);
AP_values = zeros([1, CAT_TOTAL]);
for run=1:CAT_TOTAL
for iteration=1:CAT_TOTAL
%% 2) Pick an image at random to be the query
queryimg=getRandomCategoryImage(run); % index of a random image
queryimg=getRandomCategoryImage(iteration); % index of a random image
%% 3) Compute the distance of image to the query
dst=[];
@ -107,7 +107,7 @@ for run=1:CAT_TOTAL
dst=[dst ; [thedst i category]];
end
dst=sortrows(dst,1); % sort the results
%% 4) Calculate PR
precision_values=zeros([1, NIMG]);
recall_values=zeros([1, NIMG]);
@ -116,8 +116,10 @@ for run=1:CAT_TOTAL
query_row = dst(1,:);
query_category = query_row(1,3);
fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, run)
if query_category ~= iteration
dst
end
fprintf('category was %s\n', CATEGORIES(query_category))
%calculate PR for each n
for i=1:NIMG
@ -132,7 +134,7 @@ for run=1:CAT_TOTAL
row = rows(n, :);
category = row(3);
if category == query_category
if category == iteration
correct_results = correct_results + 1;
else
incorrect_results = incorrect_results + 1;
@ -145,7 +147,7 @@ for run=1:CAT_TOTAL
row = rows(i, :);
category = row(3);
if category == query_category
if category == iteration
correct_results = correct_results + 1;
correct_at_n(i) = 1;
else
@ -153,7 +155,7 @@ for run=1:CAT_TOTAL
end
precision = correct_results / i;
recall = correct_results / CAT_HIST(1,query_category);
recall = correct_results / CAT_HIST(1,iteration);
precision_values(i) = precision;
recall_values(i) = recall;
@ -170,9 +172,9 @@ for run=1:CAT_TOTAL
end
sum_P_rel_n = sum(P_rel_n);
average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
average_precision = sum_P_rel_n / CAT_HIST(1,iteration)
AP_values(run) = average_precision;
AP_values(iteration) = average_precision;
@ -189,7 +191,7 @@ for run=1:CAT_TOTAL
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
SHOW=20; % Show top 15 results
SHOW=25; % Show top 25 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)

View File

@ -0,0 +1,244 @@
%% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
%%
%% cvpr_visualsearch.m
%% Skeleton code provided as part of the coursework assessment
%%
%% This code will load in all descriptors pre-computed (by the
%% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
%%
%% It will pick a descriptor at random and compare all other descriptors to
%% it - by calling cvpr_compare. In doing so it will rank the images by
%% similarity to the randomly picked descriptor. Note that initially the
%% function cvpr_compare returns a random number - you need to code it
%% so that it returns the Euclidean distance or some other distance metric
%% between the two descriptors it is passed.
%%
%% (c) John Collomosse 2010 (J.Collomosse@surrey.ac.uk)
%% Centre for Vision Speech and Signal Processing (CVSSP)
%% University of Surrey, United Kingdom
close all;
clear all;
%% Edit the following line to the folder you unzipped the MSRCv2 dataset to
DATASET_FOLDER = 'dataset';
%% Folder that holds the results...
DESCRIPTOR_FOLDER = 'descriptors';
%% and within that folder, another folder to hold the descriptors
%% we are interested in working with
DESCRIPTOR_SUBFOLDER='avgRGB';
% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
% DESCRIPTOR_SUBFOLDER='spatialColour';
% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
CATEGORIES = ["Farm Animal"
"Tree"
"Building"
"Plane"
"Cow"
"Face"
"Car"
"Bike"
"Sheep"
"Flower"
"Sign"
"Bird"
"Book Shelf"
"Bench"
"Cat"
"Dog"
"Road"
"Water Features"
"Human Figures"
"Coast"
];
QUERY_INDEXES=[301 358 384 436 447 476 509 537 572 5 61 80 97 127 179 181 217 266 276 333];
% 1_10 2_16 3_12 4_4 5_15 6_14 7_17 8_15 9_1 10_14 11_8 12_26 13_10 14_10
% 15_8 16_10 17_16 18_5 19_15 20_12
%% 1) Load all the descriptors into "ALLFEAT"
%% each row of ALLFEAT is a descriptor (is an image)
ALLFEAT=[];
ALLFILES=cell(1,0);
ALLCATs=[];
ctr=1;
allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
for filenum=1:length(allfiles)
fname=allfiles(filenum).name;
%identify photo category for PR calculation
split_string = split(fname, '_');
ALLCATs(filenum) = str2double(split_string(1));
imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
img=double(imread(imgfname_full))./255;
thesefeat=[];
featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
load(featfile,'F');
ALLFILES{ctr}=imgfname_full;
ALLFEAT=[ALLFEAT ; F];
ctr=ctr+1;
end
% get counts for each category for PR calculation
CAT_HIST = histogram(ALLCATs).Values;
CAT_TOTAL = length(CAT_HIST);
NIMG=size(ALLFEAT,1); % number of images in collection
confusion_matrix = zeros(CAT_TOTAL);
AP_values = zeros([1, CAT_TOTAL]);
for iteration=1:CAT_TOTAL
%% 2) Pick an image at random to be the query
queryimg=QUERY_INDEXES(iteration); % index of a random image
%% 3) Compute EigenModel
E = getEigenModel(ALLFEAT);
E = deflateEigen(E, 2);
%% 4) Project data to lower dimensionality
% ALLFEAT=ALLFEAT-repmat(E.org,size(ALLFEAT,1),1);
ALLFEAT=((E.vct')*(ALLFEAT'))';
%% 3) Compute the distance of image to the query
dst=[];
for i=1:NIMG
candidate=ALLFEAT(i,:);
query=ALLFEAT(queryimg,:);
category=ALLCATs(i);
%% COMPARE FUNCTION
thedst=compareMahalanobis(E, ALLFEAT, query);
dst=[dst ; [thedst i category]];
end
dst=sortrows(dst,1); % sort the results
%% 4) Calculate PR
precision_values=zeros([1, NIMG]);
recall_values=zeros([1, NIMG]);
correct_at_n=zeros([1, NIMG]);
query_row = dst(1,:);
query_category = query_row(1,3);
if query_category ~= iteration
dst
end
fprintf('category was %s\n', CATEGORIES(query_category))
%calculate PR for each n
for i=1:NIMG
rows = dst(1:i, :);
correct_results = 0;
incorrect_results = 0;
if i > 1
for n=1:i - 1
row = rows(n, :);
category = row(3);
if category == iteration
correct_results = correct_results + 1;
else
incorrect_results = incorrect_results + 1;
end
end
end
% LAST ROW
row = rows(i, :);
category = row(3);
if category == iteration
correct_results = correct_results + 1;
correct_at_n(i) = 1;
else
incorrect_results = incorrect_results + 1;
end
precision = correct_results / i;
recall = correct_results / CAT_HIST(1,iteration);
precision_values(i) = precision;
recall_values(i) = recall;
end
%% 5) calculate AP
P_rel_n = zeros([1, NIMG]);
for i = 1:NIMG
precision = precision_values(i);
i_result_relevant = correct_at_n(i);
P_rel_n(i) = precision * i_result_relevant;
end
sum_P_rel_n = sum(P_rel_n);
average_precision = sum_P_rel_n / CAT_HIST(1,iteration)
AP_values(iteration) = average_precision;
%% 6) plot PR curve
figure(1)
plot(recall_values, precision_values);
hold on;
title('PR Curve');
xlabel('Recall');
ylabel('Precision');
%% 7) Visualise the results
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
SHOW=25; % Show top 25 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)
img=imread(ALLFILES{dst(i,2)});
img=img(1:2:end,1:2:end,:); % make image a quarter size
img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
outdisplay=[outdisplay img];
%populate confusion matrix
confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
end
% figure(3)
% imgshow(outdisplay);
% axis off;
end
% normalise confusion matrix
norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
%% 8 Calculate MAP
% figure(4)
% histogram(AP_values);
% title('Average Precision Distribution');
% ylabel('Count');
% xlabel('Average Precision');
% xlim([0, 1]);
MAP = mean(AP_values)
AP_sd = std(AP_values)
% figure(2)
% plot(1:CAT_TOTAL, AP_values);
% title('Average Precision Per Run');
% xlabel('Run');
% ylabel('Average Precision');

View File

@ -54,6 +54,11 @@ CATEGORIES = ["Farm Animal"
"Coast"
];
QUERY_INDEXES=[301 358 384 436 447 476 509 537 572 5 61 80 97 127 179 181 217 266 276 333];
% 1_10 2_16 3_12 4_4 5_15 6_14 7_17 8_15 9_1 10_14 11_8 12_26 13_10 14_10
% 15_8 16_10 17_16 18_5 19_15 20_12
%% 1) Load all the descriptors into "ALLFEAT"
%% each row of ALLFEAT is a descriptor (is an image)
@ -85,55 +90,45 @@ CAT_HIST = histogram(ALLCATs).Values;
CAT_TOTAL = length(CAT_HIST);
NIMG=size(ALLFEAT,1); % number of images in collection
MODEL_SIZE = 10;
confusion_matrix = zeros(CAT_TOTAL);
AP_values = zeros([1, CAT_TOTAL]);
for iterating_category=1:CAT_TOTAL
%% 2) Select descriptors for category and training data
category_training_descriptors = [];
test_descriptors = [];
test_categories = [];
for i=1:NIMG
if (iterating_category == ALLCATs(i)) && (size(category_training_descriptors,1) < MODEL_SIZE)
category_training_descriptors = [ category_training_descriptors ; ALLFEAT(i,:) ];
else
test_descriptors = [ test_descriptors ; ALLFEAT(i,:) ];
test_categories = [ test_categories ; ALLCATs(i) ];
end
end
[eig_vct, eig_val, model_mean] = getEigenModel(category_training_descriptors);
TEST_SIZE = size(test_descriptors,1);
for iteration=1:CAT_TOTAL
%% 2) Pick an image at random to be the query
queryimg=QUERY_INDEXES(iteration); % index of a random image
%% 3) Compute the distance of image to the query
dst=[];
for i=1:TEST_SIZE
candidate=test_descriptors(i,:);
category=test_categories(i);
for i=1:NIMG
candidate=ALLFEAT(i,:);
query=ALLFEAT(queryimg,:);
category=ALLCATs(i);
%% COMPARE FUNCTION
thedst=compareMahalanobis(eig_vct, eig_val, model_mean, candidate);
thedst=compareEuclidean(query, candidate);
dst=[dst ; [thedst i category]];
end
dst=sortrows(dst,1); % sort the results
%% 4) Calculate PR
precision_values=zeros([1, TEST_SIZE]);
recall_values=zeros([1, TEST_SIZE]);
precision_values=zeros([1, NIMG]);
recall_values=zeros([1, NIMG]);
correct_at_n=zeros([1, TEST_SIZE]);
correct_at_n=zeros([1, NIMG]);
query_row = dst(1,:);
query_category = query_row(1,3);
fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, iterating_category)
if query_category ~= iteration
dst
end
fprintf('category was %s\n', CATEGORIES(query_category))
%calculate PR for each n
for i=1:TEST_SIZE
for i=1:NIMG
rows = dst(1:i, :);
@ -145,7 +140,7 @@ for iterating_category=1:CAT_TOTAL
row = rows(n, :);
category = row(3);
if category == query_category
if category == iteration
correct_results = correct_results + 1;
else
incorrect_results = incorrect_results + 1;
@ -158,7 +153,7 @@ for iterating_category=1:CAT_TOTAL
row = rows(i, :);
category = row(3);
if category == query_category
if category == iteration
correct_results = correct_results + 1;
correct_at_n(i) = 1;
else
@ -166,7 +161,7 @@ for iterating_category=1:CAT_TOTAL
end
precision = correct_results / i;
recall = correct_results / CAT_HIST(1,query_category);
recall = correct_results / CAT_HIST(1,iteration);
precision_values(i) = precision;
recall_values(i) = recall;
@ -174,8 +169,8 @@ for iterating_category=1:CAT_TOTAL
%% 5) calculate AP
P_rel_n = zeros([1, TEST_SIZE]);
for i = 1:TEST_SIZE
P_rel_n = zeros([1, NIMG]);
for i = 1:NIMG
precision = precision_values(i);
i_result_relevant = correct_at_n(i);
@ -183,9 +178,9 @@ for iterating_category=1:CAT_TOTAL
end
sum_P_rel_n = sum(P_rel_n);
average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
average_precision = sum_P_rel_n / CAT_HIST(1,iteration);
AP_values(iterating_category) = average_precision;
AP_values(iteration) = average_precision;
@ -202,7 +197,7 @@ for iterating_category=1:CAT_TOTAL
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
SHOW=20; % Show top 15 results
SHOW=25; % Show top 25 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)

View File

@ -1,7 +1,7 @@
function F=extractSpatialColourTexture(img)
grid_rows = 4;
grid_columns = 4;
grid_rows = 8;
grid_columns = 8;
img_size = size(img);
img_rows = img_size(1);
@ -43,7 +43,7 @@ for i = 1:grid_rows
avg_vals = extractAvgRGB(img_cell);
[mag_img, angle_img] = getEdgeInfo(grey_img_cell);
edge_hist = getEdgeAngleHist(mag_img, angle_img, 6, 0.05);
edge_hist = getEdgeAngleHist(mag_img, angle_img, 8, 0.05);
%concatenate average values into vector
descriptor = [descriptor edge_hist avg_vals(1) avg_vals(2) avg_vals(3)];

View File

@ -1,11 +1,17 @@
function dst=compareMahalanobis(vct, val, mean, F2)
function d=compareMahalanobis(E, obs, query)
x_minus_mean = (F2 - mean)';
matrices = val' * vct' * x_minus_mean;
obs_translated = (obs -repmat(query, size(obs,1), 1))';
x=matrices.^2;
x=sum(x);
proj=E.vct*obs_translated;
dstsq=proj.*proj;
dst=sqrt(sqrt(x));
E.val(E.val==0)=1; % check for eigenvalues of 0
dst=dstsq./repmat((E.val),1,size(obs,2));
d=sum(dst);
d=sqrt(d);
return;

View File

@ -601,10 +601,16 @@ Where
refers to the number of edge histogram bins.
\end_layout
\begin_layout Subsection
\begin_layout Section
Principal Component Analysis
\end_layout
\begin_layout Standard
Principal component analysis is a process to identify the variations in
a set of data.
The result is a
\end_layout
\begin_layout Section
Distance Measures
\end_layout
@ -824,22 +830,22 @@ Category Response
\begin_layout Standard
The category response aims to control for a descriptor's varying performance
at each of the dataset's categories by looping through each category and
randomly selecting an image from each as the query image.
using a preselected image from each as the query image.
Each category iteration has precision and recall values calculated for
all
\begin_inset Formula $n$
\end_inset
to allow the mean average precision to be calculated.
This mean value is calculated from 20 iterations for the MSRCv2 dataset.
This mean value is calculated from the 20 category iterations for the MSRCv2
dataset.
\end_layout
\begin_layout Standard
Completing one iteration for each category also allows a confusion matrix
to be constructed.
For each iteration the top 20 results were evaluated, this number was chosen
as this is approximately the mean number of images in each category.
For each iteration the top 25 results were evaluated, this number was chosen
as this is approximately the mean category size.
\end_layout
\begin_layout Standard
@ -847,17 +853,6 @@ The completed confusion matrix allows the main category confusions to be
identified and discussions to be made.
\end_layout
\begin_layout Subsubsection
Random Response
\end_layout
\begin_layout Standard
The random response places emphasis on iteration over controlling for inter-cate
gory response.
Here query images are selected at random from the entire dataset and many
iterations are run in order to identify a mean response.
\end_layout
\begin_layout Section
Results
\end_layout

4
util/deflateEigen.m Normal file
View File

@ -0,0 +1,4 @@
function E=deflateEigen(E, param)
E.val=E.val(1:param);
E.vct=E.vct(:,1:param);

View File

@ -1,12 +1,23 @@
function [eig_vct, eig_val, model_mean]=getEigenModel(model_descriptors)
function E=getEigenModel(obs)
model_size = size(model_descriptors, 1);
E.N = size(obs,1);
E.D = size(obs,2);
E.org= mean(obs);
model_mean = mean(model_descriptors);
model_data_min_mean = model_descriptors - repmat(model_mean, model_size, 1);
obs_translated=obs-repmat(E.org,E.N,1);
C = (model_data_min_mean' * model_data_min_mean) ./ model_size;
C=(1/E.N) * (obs_translated' * obs_translated);
[eig_vct, eig_val] = eig(C);
[U V]=eig(C);
% sort eigenvectors and eigenvalues by eigenvalue size (desc)
linV=V*ones(size(V,2),1);
S=[linV U'];
S=flipud(sortrows(S,1));
U=S(:,2:end)';
V=S(:,1);
E.vct=U;
E.val=V;
return;