added category walk, starting eigenmodel

This commit is contained in:
aj 2019-11-29 00:37:43 +00:00
parent de1d6b3464
commit a2575085de
9 changed files with 691 additions and 64 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ dataset
descriptors
*~
*#
coursework.pdf

View File

@ -22,8 +22,8 @@ OUT_FOLDER = 'descriptors';
%% and within that folder, create another folder to hold these descriptors
%% the idea is all your descriptors are in individual folders - within
%% the folder specified as 'OUT_FOLDER'.
% OUT_SUBFOLDER='avgRGB';
OUT_SUBFOLDER='globalRGBhisto';
OUT_SUBFOLDER='avgRGB';
% OUT_SUBFOLDER='globalRGBhisto';
% OUT_SUBFOLDER='spatialColour';
% OUT_SUBFOLDER='spatialColourTexture';
@ -37,8 +37,8 @@ for filenum=1:length(allfiles)
fout=[OUT_FOLDER,'/',OUT_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
%% EXTRACT FUNCTION
% F=extractAvgRGB(img);
F=extractGlobalColHist(img);
F=extractAvgRGB(img);
% F=extractGlobalColHist(img);
% F=extractSpatialColour(img);
% F=extractSpatialColourTexture(img);
save(fout,'F');

View File

@ -27,10 +27,10 @@ DATASET_FOLDER = 'dataset';
DESCRIPTOR_FOLDER = 'descriptors';
%% and within that folder, another folder to hold the descriptors
%% we are interested in working with
% DESCRIPTOR_SUBFOLDER='avgRGB';
DESCRIPTOR_SUBFOLDER='avgRGB';
% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
% DESCRIPTOR_SUBFOLDER='spatialColour';
DESCRIPTOR_SUBFOLDER='spatialColourTexture';
% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
CATEGORIES = ["Farm Animal"
"Tree"
@ -84,13 +84,15 @@ end
CAT_HIST = histogram(ALLCATs).Values;
CAT_TOTAL = length(CAT_HIST);
run_total = 1;
run_total = 50;
AP_values = zeros([1, run_total]);
for run=1:run_total
%% 2) Pick an image at random to be the query
NIMG=size(ALLFEAT,1); % number of images in collection
queryimg=floor(rand()*NIMG); % index of a random image
if queryimg == 0
queryimg = 1;
end
%% 3) Compute the distance of image to the query
dst=[];
@ -114,7 +116,7 @@ for run=1:run_total
query_row = dst(1,:);
query_category = query_row(1,3);
fprintf('category was %s', CATEGORIES(query_category))
fprintf('category was %s\n', CATEGORIES(query_category))
%calculate PR for each n
@ -187,39 +189,39 @@ for run=1:run_total
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
confusion_matrix = zeros(CAT_TOTAL);
SHOW=15; % Show top 15 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)
img=imread(ALLFILES{dst(i,2)});
img=img(1:2:end,1:2:end,:); % make image a quarter size
img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
outdisplay=[outdisplay img];
%populate confusion matrix
confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
end
figure(3)
imgshow(outdisplay);
axis off;
% confusion_matrix = zeros(CAT_TOTAL);
%
% SHOW=15; % Show top 15 results
% dst=dst(1:SHOW,:);
% outdisplay=[];
% for i=1:size(dst,1)
% img=imread(ALLFILES{dst(i,2)});
% img=img(1:2:end,1:2:end,:); % make image a quarter size
% img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
% outdisplay=[outdisplay img];
%
% %populate confusion matrix
% confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
% end
% figure(3)
% imgshow(outdisplay);
% axis off;
end
%% 8 Calculate MAP
figure(4)
histogram(AP_values);
title('Average Precision Distribution');
ylabel('Count');
xlabel('Average Precision');
xlim([0, 1]);
% figure(4)
% histogram(AP_values);
% title('Average Precision Distribution');
% ylabel('Count');
% xlabel('Average Precision');
% xlim([0, 1]);
MAP = mean(AP_values)
AP_sd = std(AP_values)
figure(2)
plot(1:run_total, AP_values);
title('Average Precision Per Run');
xlabel('Run');
ylabel('Average Precision');
% figure(2)
% plot(1:run_total, AP_values);
% title('Average Precision Per Run');
% xlabel('Run');
% ylabel('Average Precision');

View File

@ -0,0 +1,228 @@
%% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
%%
%% cvpr_visualsearch.m
%% Skeleton code provided as part of the coursework assessment
%%
%% This code will load in all descriptors pre-computed (by the
%% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
%%
%% It will pick a descriptor at random and compare all other descriptors to
%% it - by calling cvpr_compare. In doing so it will rank the images by
%% similarity to the randomly picked descriptor. Note that initially the
%% function cvpr_compare returns a random number - you need to code it
%% so that it returns the Euclidean distance or some other distance metric
%% between the two descriptors it is passed.
%%
%% (c) John Collomosse 2010 (J.Collomosse@surrey.ac.uk)
%% Centre for Vision Speech and Signal Processing (CVSSP)
%% University of Surrey, United Kingdom
close all;
clear all;
%% Edit the following line to the folder you unzipped the MSRCv2 dataset to
DATASET_FOLDER = 'dataset';
%% Folder that holds the results...
DESCRIPTOR_FOLDER = 'descriptors';
%% and within that folder, another folder to hold the descriptors
%% we are interested in working with
DESCRIPTOR_SUBFOLDER='avgRGB';
% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
% DESCRIPTOR_SUBFOLDER='spatialColour';
% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
CATEGORIES = ["Farm Animal"
"Tree"
"Building"
"Plane"
"Cow"
"Face"
"Car"
"Bike"
"Sheep"
"Flower"
"Sign"
"Bird"
"Book Shelf"
"Bench"
"Cat"
"Dog"
"Road"
"Water Features"
"Human Figures"
"Coast"
];
%% 1) Load all the descriptors into "ALLFEAT"
%% each row of ALLFEAT is a descriptor (is an image)
ALLFEAT=[];
ALLFILES=cell(1,0);
ALLCATs=[];
ctr=1;
allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
for filenum=1:length(allfiles)
fname=allfiles(filenum).name;
%identify photo category for PR calculation
split_string = split(fname, '_');
ALLCATs(filenum) = str2double(split_string(1));
imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
img=double(imread(imgfname_full))./255;
thesefeat=[];
featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
load(featfile,'F');
ALLFILES{ctr}=imgfname_full;
ALLFEAT=[ALLFEAT ; F];
ctr=ctr+1;
end
% get counts for each category for PR calculation
CAT_HIST = histogram(ALLCATs).Values;
CAT_TOTAL = length(CAT_HIST);
NIMG=size(ALLFEAT,1); % number of images in collection
confusion_matrix = zeros(CAT_TOTAL);
AP_values = zeros([1, CAT_TOTAL]);
for run=1:CAT_TOTAL
%% 2) Pick an image at random to be the query
queryimg=getRandomCategoryImage(run); % index of a random image
%% 3) Compute the distance of image to the query
dst=[];
for i=1:NIMG
candidate=ALLFEAT(i,:);
query=ALLFEAT(queryimg,:);
category=ALLCATs(i);
%% COMPARE FUNCTION
thedst=compareEuclidean(query, candidate);
dst=[dst ; [thedst i category]];
end
dst=sortrows(dst,1); % sort the results
%% 4) Calculate PR
precision_values=zeros([1, NIMG]);
recall_values=zeros([1, NIMG]);
correct_at_n=zeros([1, NIMG]);
query_row = dst(1,:);
query_category = query_row(1,3);
fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, run)
%calculate PR for each n
for i=1:NIMG
rows = dst(1:i, :);
correct_results = 0;
incorrect_results = 0;
if i > 1
for n=1:i - 1
row = rows(n, :);
category = row(3);
if category == query_category
correct_results = correct_results + 1;
else
incorrect_results = incorrect_results + 1;
end
end
end
% LAST ROW
row = rows(i, :);
category = row(3);
if category == query_category
correct_results = correct_results + 1;
correct_at_n(i) = 1;
else
incorrect_results = incorrect_results + 1;
end
precision = correct_results / i;
recall = correct_results / CAT_HIST(1,query_category);
precision_values(i) = precision;
recall_values(i) = recall;
end
%% 5) calculate AP
P_rel_n = zeros([1, NIMG]);
for i = 1:NIMG
precision = precision_values(i);
i_result_relevant = correct_at_n(i);
P_rel_n(i) = precision * i_result_relevant;
end
sum_P_rel_n = sum(P_rel_n);
average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
AP_values(run) = average_precision;
%% 6) plot PR curve
figure(1)
plot(recall_values, precision_values);
hold on;
title('PR Curve');
xlabel('Recall');
ylabel('Precision');
%% 7) Visualise the results
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
SHOW=20; % Show top 15 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)
img=imread(ALLFILES{dst(i,2)});
img=img(1:2:end,1:2:end,:); % make image a quarter size
img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
outdisplay=[outdisplay img];
%populate confusion matrix
confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
end
% figure(3)
% imgshow(outdisplay);
% axis off;
end
% normalise confusion matrix
norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
%% 8 Calculate MAP
% figure(4)
% histogram(AP_values);
% title('Average Precision Distribution');
% ylabel('Count');
% xlabel('Average Precision');
% xlim([0, 1]);
MAP = mean(AP_values)
AP_sd = std(AP_values)
% figure(2)
% plot(1:CAT_TOTAL, AP_values);
% title('Average Precision Per Run');
% xlabel('Run');
% ylabel('Average Precision');

249
cvpr_visualsearch_pca.m Normal file
View File

@ -0,0 +1,249 @@
%% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
%%
%% cvpr_visualsearch.m
%% Skeleton code provided as part of the coursework assessment
%%
%% This code will load in all descriptors pre-computed (by the
%% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
%%
%% It will pick a descriptor at random and compare all other descriptors to
%% it - by calling cvpr_compare. In doing so it will rank the images by
%% similarity to the randomly picked descriptor. Note that initially the
%% function cvpr_compare returns a random number - you need to code it
%% so that it returns the Euclidean distance or some other distance metric
%% between the two descriptors it is passed.
%%
%% (c) John Collomosse 2010 (J.Collomosse@surrey.ac.uk)
%% Centre for Vision Speech and Signal Processing (CVSSP)
%% University of Surrey, United Kingdom
close all;
clear all;
%% Edit the following line to the folder you unzipped the MSRCv2 dataset to
DATASET_FOLDER = 'dataset';
%% Folder that holds the results...
DESCRIPTOR_FOLDER = 'descriptors';
%% and within that folder, another folder to hold the descriptors
%% we are interested in working with
DESCRIPTOR_SUBFOLDER='avgRGB';
% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
% DESCRIPTOR_SUBFOLDER='spatialColour';
% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
CATEGORIES = ["Farm Animal"
"Tree"
"Building"
"Plane"
"Cow"
"Face"
"Car"
"Bike"
"Sheep"
"Flower"
"Sign"
"Bird"
"Book Shelf"
"Bench"
"Cat"
"Dog"
"Road"
"Water Features"
"Human Figures"
"Coast"
];
%% 1) Load all the descriptors into "ALLFEAT"
%% each row of ALLFEAT is a descriptor (is an image)
ALLFEAT=[];
ALLFILES=cell(1,0);
ALLCATs=[];
ctr=1;
allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
for filenum=1:length(allfiles)
fname=allfiles(filenum).name;
%identify photo category for PR calculation
split_string = split(fname, '_');
ALLCATs(filenum) = str2double(split_string(1));
imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
img=double(imread(imgfname_full))./255;
thesefeat=[];
featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
load(featfile,'F');
ALLFILES{ctr}=imgfname_full;
ALLFEAT=[ALLFEAT ; F];
ctr=ctr+1;
end
% get counts for each category for PR calculation
CAT_HIST = histogram(ALLCATs).Values;
CAT_TOTAL = length(CAT_HIST);
NIMG=size(ALLFEAT,1); % number of images in collection
MODEL_SIZE = 10;
confusion_matrix = zeros(CAT_TOTAL);
AP_values = zeros([1, CAT_TOTAL]);
for iterating_category=1:CAT_TOTAL
%% 2) Select descriptors for category and training data
category_training_descriptors = [];
test_descriptors = [];
for i=1:NIMG
if iterating_category == ALLCATs(i)
category_training_descriptors = [ category_training_descriptors ; ALLFEAT(i,:) ];
else
test_descriptors = [ test_descriptors ; ALLFEAT(i,:) ];
end
end
model_descriptors = category_training_descriptors(1:MODEL_SIZE, :);
model_mean = mean(model_descriptors);
model_data_min_mean = model_descriptors - repmat(model_mean, MODEL_SIZE, 1);
C = (model_data_min_mean' * model_data_min_mean) ./ MODEL_SIZE;
[eig_vct, eig_val] = eig(C);
TEST_SIZE = size(test_descriptors,1);
%% 3) Compute the distance of image to the query
dst=[];
for i=1:TEST_SIZE
candidate=test_descriptors(i,:);
query=ALLFEAT(queryimg,:);
category=ALLCATs(i);
%% COMPARE FUNCTION
thedst=compareEuclidean(query, candidate);
dst=[dst ; [thedst i category]];
end
dst=sortrows(dst,1); % sort the results
%% 4) Calculate PR
precision_values=zeros([1, NIMG]);
recall_values=zeros([1, NIMG]);
correct_at_n=zeros([1, NIMG]);
query_row = dst(1,:);
query_category = query_row(1,3);
fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, iterating_category)
%calculate PR for each n
for i=1:NIMG
rows = dst(1:i, :);
correct_results = 0;
incorrect_results = 0;
if i > 1
for n=1:i - 1
row = rows(n, :);
category = row(3);
if category == query_category
correct_results = correct_results + 1;
else
incorrect_results = incorrect_results + 1;
end
end
end
% LAST ROW
row = rows(i, :);
category = row(3);
if category == query_category
correct_results = correct_results + 1;
correct_at_n(i) = 1;
else
incorrect_results = incorrect_results + 1;
end
precision = correct_results / i;
recall = correct_results / CAT_HIST(1,query_category);
precision_values(i) = precision;
recall_values(i) = recall;
end
%% 5) calculate AP
P_rel_n = zeros([1, NIMG]);
for i = 1:NIMG
precision = precision_values(i);
i_result_relevant = correct_at_n(i);
P_rel_n(i) = precision * i_result_relevant;
end
sum_P_rel_n = sum(P_rel_n);
average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
AP_values(iterating_category) = average_precision;
%% 6) plot PR curve
figure(1)
plot(recall_values, precision_values);
hold on;
title('PR Curve');
xlabel('Recall');
ylabel('Precision');
%% 7) Visualise the results
%% These may be a little hard to see using imgshow
%% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
SHOW=20; % Show top 15 results
dst=dst(1:SHOW,:);
outdisplay=[];
for i=1:size(dst,1)
img=imread(ALLFILES{dst(i,2)});
img=img(1:2:end,1:2:end,:); % make image a quarter size
img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
outdisplay=[outdisplay img];
%populate confusion matrix
confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
end
% figure(3)
% imgshow(outdisplay);
% axis off;
end
% normalise confusion matrix
norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
%% 8 Calculate MAP
% figure(4)
% histogram(AP_values);
% title('Average Precision Distribution');
% ylabel('Count');
% xlabel('Average Precision');
% xlim([0, 1]);
MAP = mean(AP_values)
AP_sd = std(AP_values)
% figure(2)
% plot(1:CAT_TOTAL, AP_values);
% title('Average Precision Per Run');
% xlabel('Run');
% ylabel('Average Precision');

View File

@ -142,6 +142,13 @@ These measured features can be arranged as a data structure or descriptor
It is an example of content based image retrieval or CBIR.
\end_layout
\begin_layout Standard
Visual search is used in consumer products to generate powerful results
such as Google Lens and Google reverse image search.
It also has applicability as smaller features of products such as 'related
products' results.
\end_layout
\begin_layout Subsection
Extraction
\end_layout
@ -169,17 +176,6 @@ Typically a descriptor is a single column vector of numbers calculated about
Methods for calculating the distance will determine how images are ranked.
\end_layout
\begin_layout Subsection
Applications
\end_layout
\begin_layout Standard
Visual search is used in consumer products to generate powerful results
such as Google Lens and Google reverse image search.
It also has applicability as smaller features of products such as 'related
products' results.
\end_layout
\begin_layout Section
Descriptors
\end_layout
@ -605,14 +601,58 @@ Where
refers to the number of edge histogram bins.
\end_layout
\begin_layout Subsection
Principal Component Analysis
\end_layout
\begin_layout Section
Distance Measures
\end_layout
\begin_layout Standard
Once image descriptors are plotted in a feature space a visual search system
compares descriptors by measuring the distance between them.
The method for doing so will affect the ranking of descriptors.
\end_layout
\begin_layout Subsection
L1 Norm
\end_layout
\begin_layout Subsection
L2 Norm
\end_layout
\begin_layout Standard
The L2 norm, or Euclidean distance, is the shortest difference between two
points in space, it is also referred to as the magnitude of a vector.
In a three dimensional Euclidean space the magnitude of a vector,
\begin_inset Formula $x=\left(i,j,k\right)$
\end_inset
, is given by,
\end_layout
\begin_layout Standard
\begin_inset Formula
\[
\left\Vert x\right\Vert _{2}=\sqrt{i^{2}+j^{2}+k^{2}}
\]
\end_inset
\end_layout
\begin_layout Standard
It's intuitive distance measurement makes it the most commonly used norm
in Euclidean space.
\end_layout
\begin_layout Subsection
Mahalanobis Distance
\end_layout
\begin_layout Section
Test Methods
\end_layout
@ -645,6 +685,28 @@ noprefix "false"
.
\end_layout
\begin_layout Standard
Worth noting about the dataset is that there are some similarities and overlap
between categories which has implications on the results which can be calculate
d when using it.
\end_layout
\begin_layout Standard
For example category 1 is a collection of images of cows, sheep and horses
on grass however cows and sheep each have their own distinct categories.
Category 18 also has many similarities to category 20 with both being mainly
shots of bodies of water and boats in water of varying sizes.
\end_layout
\begin_layout Standard
During the evaulation of implemented visual search techniques the classification
of each image is done by referencing the group index they are named with.
As such, occurences of false negatives may increase as images that do in
fact look similar as they are both, say, images of cows will be marked
as not similar and measure negatively for the performance of the method.
\end_layout
\begin_layout Subsection
Precision and Recall
\end_layout
@ -737,22 +799,85 @@ Precision Recall Curve
\end_layout
\begin_layout Standard
A way to visualise the response of a system is to calculate both precision
and recall at each
A way to visualise the response of a visual search system is to calculate
both precision and recall for all values of
\begin_inset Formula $n$
\end_inset
and plot both as what is known as a precision-recall curve or PR curve.
and plot each pair against each for what is known as a precision-recall
curve or PR curve.
\end_layout
\begin_layout Subsection
Methods
\end_layout
\begin_layout Standard
In order to evaluate the performance of each descriptor two different tests
were conducted.
\end_layout
\begin_layout Subsubsection
Category Response
\end_layout
\begin_layout Standard
The category response aims to control for a descriptor's varying performance
at each of the dataset's categories by looping through each category and
randomly selecting an image from each as the query image.
Each category iteration has precision and recall values calculated for
all
\begin_inset Formula $n$
\end_inset
to allow the mean average precision to be calculated.
This mean value is calculated from 20 iterations for the MSRCv2 dataset.
\end_layout
\begin_layout Standard
Completing one iteration for each category also allows a confusion matrix
to be constructed.
For each iteration the top 20 results were evaluated, this number was chosen
as this is approximately the mean number of images in each category.
\end_layout
\begin_layout Standard
The completed confusion matrix allows the main category confusions to be
identified and discussions to be made.
\end_layout
\begin_layout Subsubsection
Random Response
\end_layout
\begin_layout Standard
The random response places emphasis on iteration over controlling for inter-cate
gory response.
Here query images are selected at random from the entire dataset and many
iterations are run in order to identify a mean response.
\end_layout
\begin_layout Section
Results
\end_layout
\begin_layout Subsection
Average RGB
\end_layout
\begin_layout Subsection
Global Colour Histogram
\end_layout
\begin_layout Subsection
Spatial Colour
\end_layout
\begin_layout Subsection
Spatial Colour and Texture
\end_layout
\begin_layout Section
Discussion
\end_layout
@ -782,7 +907,7 @@ options "plain"
\end_layout
\begin_layout Section
MSRC Dataset Classifications
MSRCv2 Dataset Classifications
\begin_inset CommandInset label
LatexCommand label
name "sec:MSRC-Dataset-Classifications"
@ -1073,7 +1198,7 @@ Bird
\begin_inset Text
\begin_layout Plain Layout
Book Shelf
Books
\end_layout
\end_inset

Binary file not shown.

View File

@ -1,12 +1,2 @@
img = double(imread('dataset/Images/10_11_s.bmp'))./255;
% imshow(img);
img = getGreyscale(img);
[mag, angle] = getEdgeInfo(img);
F = getEdgeAngleHist(mag, angle);
imshow(mag > 0.05)
getRandomCategoryImage(7)

View File

@ -0,0 +1,32 @@
function return_index=getRandomCategoryImage(category)
if category > 20
error('number greater than category count');
end
DATASET_FOLDER = 'dataset';
allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
number_of_images = length(allfiles);
ALLCATs=zeros([1 number_of_images]);
for filenum=1:number_of_images
fname=allfiles(filenum).name;
split_string = split(fname, '_');
ALLCATs(filenum) = str2double(split_string(1));
end
return_index = 0;
while return_index == 0
index = floor(rand() * number_of_images);
if index == 0
index = 1;
end
if ALLCATs(index) == category
return_index = index;
end
end
return;