added category walk, starting eigenmodel

2019-11-29 00:37:43 +00:00 · 2019-11-29 00:37:43 +00:00 · a2575085de
commit a2575085de
parent de1d6b3464
9 changed files with 691 additions and 64 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ dataset
 descriptors
 *~
 *#
+coursework.pdf
--- a/cvpr_computedescriptors.m
+++ b/cvpr_computedescriptors.m
@ -22,8 +22,8 @@ OUT_FOLDER = 'descriptors';
 %% and within that folder, create another folder to hold these descriptors
 %% the idea is all your descriptors are in individual folders - within
 %% the folder specified as 'OUT_FOLDER'.
-% OUT_SUBFOLDER='avgRGB';
-OUT_SUBFOLDER='globalRGBhisto';
+OUT_SUBFOLDER='avgRGB';
+% OUT_SUBFOLDER='globalRGBhisto';
 % OUT_SUBFOLDER='spatialColour';
 % OUT_SUBFOLDER='spatialColourTexture';

@ -37,8 +37,8 @@ for filenum=1:length(allfiles)
    fout=[OUT_FOLDER,'/',OUT_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
    
    %% EXTRACT FUNCTION
-%     F=extractAvgRGB(img);
-    F=extractGlobalColHist(img);
+    F=extractAvgRGB(img);
+%     F=extractGlobalColHist(img);
 %     F=extractSpatialColour(img);
 %     F=extractSpatialColourTexture(img);
    save(fout,'F');
--- a/cvpr_visualsearch.m
+++ b/cvpr_visualsearch.m
@ -27,10 +27,10 @@ DATASET_FOLDER = 'dataset';
 DESCRIPTOR_FOLDER = 'descriptors';
 %% and within that folder, another folder to hold the descriptors
 %% we are interested in working with
-% DESCRIPTOR_SUBFOLDER='avgRGB';
+DESCRIPTOR_SUBFOLDER='avgRGB';
 % DESCRIPTOR_SUBFOLDER='globalRGBhisto';
 % DESCRIPTOR_SUBFOLDER='spatialColour';
-DESCRIPTOR_SUBFOLDER='spatialColourTexture';
+% DESCRIPTOR_SUBFOLDER='spatialColourTexture';

 CATEGORIES = ["Farm Animal" 
    "Tree"
@ -84,13 +84,15 @@ end
 CAT_HIST = histogram(ALLCATs).Values;
 CAT_TOTAL = length(CAT_HIST);

-run_total = 1;
+run_total = 50;
 AP_values = zeros([1, run_total]);
 for run=1:run_total
    %% 2) Pick an image at random to be the query
    NIMG=size(ALLFEAT,1);           % number of images in collection
    queryimg=floor(rand()*NIMG);    % index of a random image
-
+    if queryimg == 0
+        queryimg = 1;
+    end

    %% 3) Compute the distance of image to the query
    dst=[];
@ -114,7 +116,7 @@ for run=1:run_total

    query_row = dst(1,:);
    query_category = query_row(1,3);
-    fprintf('category was %s', CATEGORIES(query_category))
+    fprintf('category was %s\n', CATEGORIES(query_category))
    
    
    %calculate PR for each n
@ -187,39 +189,39 @@ for run=1:run_total
    %% These may be a little hard to see using imgshow
    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)

-    confusion_matrix = zeros(CAT_TOTAL);
-    
-    SHOW=15; % Show top 15 results
-    dst=dst(1:SHOW,:);
-    outdisplay=[];
-    for i=1:size(dst,1)
-       img=imread(ALLFILES{dst(i,2)});
-       img=img(1:2:end,1:2:end,:); % make image a quarter size
-       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
-       outdisplay=[outdisplay img];
-       
-       %populate confusion matrix
-       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
-    end
-    figure(3)
-    imgshow(outdisplay);
-    axis off;
+%     confusion_matrix = zeros(CAT_TOTAL);
+%     
+%     SHOW=15; % Show top 15 results
+%     dst=dst(1:SHOW,:);
+%     outdisplay=[];
+%     for i=1:size(dst,1)
+%        img=imread(ALLFILES{dst(i,2)});
+%        img=img(1:2:end,1:2:end,:); % make image a quarter size
+%        img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
+%        outdisplay=[outdisplay img];
+%        
+%        %populate confusion matrix
+%        confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
+%     end
+%     figure(3)
+%     imgshow(outdisplay);
+%     axis off;

 end

 %% 8 Calculate MAP
-figure(4)
-histogram(AP_values);
-title('Average Precision Distribution');
-ylabel('Count');
-xlabel('Average Precision');
-xlim([0, 1]);
+% figure(4)
+% histogram(AP_values);
+% title('Average Precision Distribution');
+% ylabel('Count');
+% xlabel('Average Precision');
+% xlim([0, 1]);

 MAP = mean(AP_values)
 AP_sd = std(AP_values)

-figure(2)
-plot(1:run_total, AP_values);
-title('Average Precision Per Run');
-xlabel('Run');
-ylabel('Average Precision');
+% figure(2)
+% plot(1:run_total, AP_values);
+% title('Average Precision Per Run');
+% xlabel('Run');
+% ylabel('Average Precision');
--- a/cvpr_visualsearch_category_walk.m
+++ b/cvpr_visualsearch_category_walk.m
@ -0,0 +1,228 @@
+%% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
+%%
+%% cvpr_visualsearch.m
+%% Skeleton code provided as part of the coursework assessment
+%%
+%% This code will load in all descriptors pre-computed (by the
+%% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
+%%
+%% It will pick a descriptor at random and compare all other descriptors to
+%% it - by calling cvpr_compare.  In doing so it will rank the images by
+%% similarity to the randomly picked descriptor.  Note that initially the
+%% function cvpr_compare returns a random number - you need to code it
+%% so that it returns the Euclidean distance or some other distance metric
+%% between the two descriptors it is passed.
+%%
+%% (c) John Collomosse 2010  (J.Collomosse@surrey.ac.uk)
+%% Centre for Vision Speech and Signal Processing (CVSSP)
+%% University of Surrey, United Kingdom
+
+close all;
+clear all;
+
+%% Edit the following line to the folder you unzipped the MSRCv2 dataset to
+DATASET_FOLDER = 'dataset';
+
+%% Folder that holds the results...
+DESCRIPTOR_FOLDER = 'descriptors';
+%% and within that folder, another folder to hold the descriptors
+%% we are interested in working with
+DESCRIPTOR_SUBFOLDER='avgRGB';
+% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
+% DESCRIPTOR_SUBFOLDER='spatialColour';
+% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
+
+CATEGORIES = ["Farm Animal" 
+    "Tree"
+    "Building"
+    "Plane"
+    "Cow"
+    "Face"
+    "Car"
+    "Bike"
+    "Sheep"
+    "Flower"
+    "Sign"
+    "Bird"
+    "Book Shelf"
+    "Bench"
+    "Cat"
+    "Dog"
+    "Road"
+    "Water Features"
+    "Human Figures"
+    "Coast"
+    ];
+
+
+%% 1) Load all the descriptors into "ALLFEAT"
+%% each row of ALLFEAT is a descriptor (is an image)
+
+ALLFEAT=[];
+ALLFILES=cell(1,0);
+ALLCATs=[];
+ctr=1;
+allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
+for filenum=1:length(allfiles)
+    fname=allfiles(filenum).name;
+    
+    %identify photo category for PR calculation
+    split_string = split(fname, '_');
+    ALLCATs(filenum) = str2double(split_string(1));
+    
+    imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
+    img=double(imread(imgfname_full))./255;
+    thesefeat=[];
+    featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
+    load(featfile,'F');
+    ALLFILES{ctr}=imgfname_full;
+    ALLFEAT=[ALLFEAT ; F];
+    ctr=ctr+1;
+end
+
+% get counts for each category for PR calculation
+CAT_HIST = histogram(ALLCATs).Values;
+CAT_TOTAL = length(CAT_HIST);
+
+NIMG=size(ALLFEAT,1);           % number of images in collection
+
+confusion_matrix = zeros(CAT_TOTAL);
+
+AP_values = zeros([1, CAT_TOTAL]);
+for run=1:CAT_TOTAL
+    
+    %% 2) Pick an image at random to be the query
+    queryimg=getRandomCategoryImage(run);    % index of a random image
+
+    %% 3) Compute the distance of image to the query
+    dst=[];
+    for i=1:NIMG
+        candidate=ALLFEAT(i,:);
+        query=ALLFEAT(queryimg,:);
+
+        category=ALLCATs(i);
+
+        %% COMPARE FUNCTION
+        thedst=compareEuclidean(query, candidate);
+        dst=[dst ; [thedst i category]];
+    end
+    dst=sortrows(dst,1);  % sort the results
+
+    %% 4) Calculate PR
+    precision_values=zeros([1, NIMG]);
+    recall_values=zeros([1, NIMG]);
+
+    correct_at_n=zeros([1, NIMG]);
+
+    query_row = dst(1,:);
+    query_category = query_row(1,3);
+    fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, run)
+    
+    
+    %calculate PR for each n
+    for i=1:NIMG
+
+        rows = dst(1:i, :);
+
+        correct_results = 0;
+        incorrect_results = 0;
+
+        if i > 1    
+            for n=1:i - 1
+                row = rows(n, :);
+                category = row(3);
+
+                if category == query_category
+                    correct_results = correct_results + 1;
+                else
+                    incorrect_results = incorrect_results + 1;
+                end
+
+            end
+        end
+
+        % LAST ROW
+        row = rows(i, :);
+        category = row(3);
+
+        if category == query_category
+            correct_results = correct_results + 1;
+            correct_at_n(i) = 1;
+        else
+            incorrect_results = incorrect_results + 1;
+        end
+
+        precision = correct_results / i;
+        recall = correct_results / CAT_HIST(1,query_category);
+
+        precision_values(i) = precision;
+        recall_values(i) = recall;
+    end
+
+
+    %% 5) calculate AP
+    P_rel_n = zeros([1, NIMG]);
+    for i = 1:NIMG
+        precision = precision_values(i);
+        i_result_relevant = correct_at_n(i);
+
+        P_rel_n(i) = precision * i_result_relevant;
+    end
+
+    sum_P_rel_n = sum(P_rel_n);
+    average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
+    
+    AP_values(run) = average_precision;
+    
+
+
+    %% 6) plot PR curve
+    figure(1)
+    plot(recall_values, precision_values);
+    hold on;
+    title('PR Curve');
+    xlabel('Recall');
+    ylabel('Precision');
+    
+    
+    %% 7) Visualise the results
+    %% These may be a little hard to see using imgshow
+    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
+    
+    SHOW=20; % Show top 15 results
+    dst=dst(1:SHOW,:);
+    outdisplay=[];
+    for i=1:size(dst,1)
+       img=imread(ALLFILES{dst(i,2)});
+       img=img(1:2:end,1:2:end,:); % make image a quarter size
+       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
+       outdisplay=[outdisplay img];
+       
+       %populate confusion matrix
+       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
+    end
+%     figure(3)
+%     imgshow(outdisplay);
+%     axis off;
+
+end
+
+% normalise confusion matrix
+norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
+
+%% 8 Calculate MAP
+% figure(4)
+% histogram(AP_values);
+% title('Average Precision Distribution');
+% ylabel('Count');
+% xlabel('Average Precision');
+% xlim([0, 1]);
+
+MAP = mean(AP_values)
+AP_sd = std(AP_values)
+
+% figure(2)
+% plot(1:CAT_TOTAL, AP_values);
+% title('Average Precision Per Run');
+% xlabel('Run');
+% ylabel('Average Precision');
--- a/cvpr_visualsearch_pca.m
+++ b/cvpr_visualsearch_pca.m
@ -0,0 +1,249 @@
+%% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
+%%
+%% cvpr_visualsearch.m
+%% Skeleton code provided as part of the coursework assessment
+%%
+%% This code will load in all descriptors pre-computed (by the
+%% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
+%%
+%% It will pick a descriptor at random and compare all other descriptors to
+%% it - by calling cvpr_compare.  In doing so it will rank the images by
+%% similarity to the randomly picked descriptor.  Note that initially the
+%% function cvpr_compare returns a random number - you need to code it
+%% so that it returns the Euclidean distance or some other distance metric
+%% between the two descriptors it is passed.
+%%
+%% (c) John Collomosse 2010  (J.Collomosse@surrey.ac.uk)
+%% Centre for Vision Speech and Signal Processing (CVSSP)
+%% University of Surrey, United Kingdom
+
+close all;
+clear all;
+
+%% Edit the following line to the folder you unzipped the MSRCv2 dataset to
+DATASET_FOLDER = 'dataset';
+
+%% Folder that holds the results...
+DESCRIPTOR_FOLDER = 'descriptors';
+%% and within that folder, another folder to hold the descriptors
+%% we are interested in working with
+DESCRIPTOR_SUBFOLDER='avgRGB';
+% DESCRIPTOR_SUBFOLDER='globalRGBhisto';
+% DESCRIPTOR_SUBFOLDER='spatialColour';
+% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
+
+CATEGORIES = ["Farm Animal" 
+    "Tree"
+    "Building"
+    "Plane"
+    "Cow"
+    "Face"
+    "Car"
+    "Bike"
+    "Sheep"
+    "Flower"
+    "Sign"
+    "Bird"
+    "Book Shelf"
+    "Bench"
+    "Cat"
+    "Dog"
+    "Road"
+    "Water Features"
+    "Human Figures"
+    "Coast"
+    ];
+
+
+%% 1) Load all the descriptors into "ALLFEAT"
+%% each row of ALLFEAT is a descriptor (is an image)
+
+ALLFEAT=[];
+ALLFILES=cell(1,0);
+ALLCATs=[];
+ctr=1;
+allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
+for filenum=1:length(allfiles)
+    fname=allfiles(filenum).name;
+    
+    %identify photo category for PR calculation
+    split_string = split(fname, '_');
+    ALLCATs(filenum) = str2double(split_string(1));
+    
+    imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
+    img=double(imread(imgfname_full))./255;
+    thesefeat=[];
+    featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
+    load(featfile,'F');
+    ALLFILES{ctr}=imgfname_full;
+    ALLFEAT=[ALLFEAT ; F];
+    ctr=ctr+1;
+end
+
+% get counts for each category for PR calculation
+CAT_HIST = histogram(ALLCATs).Values;
+CAT_TOTAL = length(CAT_HIST);
+
+NIMG=size(ALLFEAT,1);           % number of images in collection
+MODEL_SIZE = 10;
+
+confusion_matrix = zeros(CAT_TOTAL);
+
+AP_values = zeros([1, CAT_TOTAL]);
+for iterating_category=1:CAT_TOTAL
+    
+    %% 2) Select descriptors for category and training data
+    category_training_descriptors = [];
+    test_descriptors = [];
+    for i=1:NIMG
+        if iterating_category == ALLCATs(i)
+            category_training_descriptors = [ category_training_descriptors ; ALLFEAT(i,:) ];
+        else
+            test_descriptors = [ test_descriptors ; ALLFEAT(i,:) ];
+        end
+    end
+    
+    model_descriptors = category_training_descriptors(1:MODEL_SIZE, :);
+    
+    model_mean = mean(model_descriptors);
+    model_data_min_mean = model_descriptors - repmat(model_mean, MODEL_SIZE, 1);
+
+    C = (model_data_min_mean' * model_data_min_mean) ./ MODEL_SIZE;
+    
+    [eig_vct, eig_val] = eig(C);
+    
+    
+    TEST_SIZE = size(test_descriptors,1);
+    
+    %% 3) Compute the distance of image to the query
+    dst=[];
+    for i=1:TEST_SIZE
+        candidate=test_descriptors(i,:);
+        query=ALLFEAT(queryimg,:);
+
+        category=ALLCATs(i);
+
+        %% COMPARE FUNCTION
+        thedst=compareEuclidean(query, candidate);
+        dst=[dst ; [thedst i category]];
+    end
+    dst=sortrows(dst,1);  % sort the results
+
+    %% 4) Calculate PR
+    precision_values=zeros([1, NIMG]);
+    recall_values=zeros([1, NIMG]);
+
+    correct_at_n=zeros([1, NIMG]);
+
+    query_row = dst(1,:);
+    query_category = query_row(1,3);
+    fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, iterating_category)
+    
+    
+    %calculate PR for each n
+    for i=1:NIMG
+
+        rows = dst(1:i, :);
+
+        correct_results = 0;
+        incorrect_results = 0;
+
+        if i > 1    
+            for n=1:i - 1
+                row = rows(n, :);
+                category = row(3);
+
+                if category == query_category
+                    correct_results = correct_results + 1;
+                else
+                    incorrect_results = incorrect_results + 1;
+                end
+
+            end
+        end
+
+        % LAST ROW
+        row = rows(i, :);
+        category = row(3);
+
+        if category == query_category
+            correct_results = correct_results + 1;
+            correct_at_n(i) = 1;
+        else
+            incorrect_results = incorrect_results + 1;
+        end
+
+        precision = correct_results / i;
+        recall = correct_results / CAT_HIST(1,query_category);
+
+        precision_values(i) = precision;
+        recall_values(i) = recall;
+    end
+
+
+    %% 5) calculate AP
+    P_rel_n = zeros([1, NIMG]);
+    for i = 1:NIMG
+        precision = precision_values(i);
+        i_result_relevant = correct_at_n(i);
+
+        P_rel_n(i) = precision * i_result_relevant;
+    end
+
+    sum_P_rel_n = sum(P_rel_n);
+    average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
+    
+    AP_values(iterating_category) = average_precision;
+    
+
+
+    %% 6) plot PR curve
+    figure(1)
+    plot(recall_values, precision_values);
+    hold on;
+    title('PR Curve');
+    xlabel('Recall');
+    ylabel('Precision');
+    
+    
+    %% 7) Visualise the results
+    %% These may be a little hard to see using imgshow
+    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
+    
+    SHOW=20; % Show top 15 results
+    dst=dst(1:SHOW,:);
+    outdisplay=[];
+    for i=1:size(dst,1)
+       img=imread(ALLFILES{dst(i,2)});
+       img=img(1:2:end,1:2:end,:); % make image a quarter size
+       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
+       outdisplay=[outdisplay img];
+       
+       %populate confusion matrix
+       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
+    end
+%     figure(3)
+%     imgshow(outdisplay);
+%     axis off;
+
+end
+
+% normalise confusion matrix
+norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
+
+%% 8 Calculate MAP
+% figure(4)
+% histogram(AP_values);
+% title('Average Precision Distribution');
+% ylabel('Count');
+% xlabel('Average Precision');
+% xlim([0, 1]);
+
+MAP = mean(AP_values)
+AP_sd = std(AP_values)
+
+% figure(2)
+% plot(1:CAT_TOTAL, AP_values);
+% title('Average Precision Per Run');
+% xlabel('Run');
+% ylabel('Average Precision');
--- a/report/coursework.lyx
+++ b/report/coursework.lyx
@ -142,6 +142,13 @@ These measured features can be arranged as a data structure or descriptor
 It is an example of content based image retrieval or CBIR.
 \end_layout

+\begin_layout Standard
+Visual search is used in consumer products to generate powerful results
+ such as Google Lens and Google reverse image search.
+ It also has applicability as smaller features of products such as 'related
+ products' results.
+\end_layout
+
 \begin_layout Subsection
 Extraction
 \end_layout
@ -169,17 +176,6 @@ Typically a descriptor is a single column vector of numbers calculated about
 Methods for calculating the distance will determine how images are ranked.
 \end_layout

-\begin_layout Subsection
-Applications
-\end_layout
-
-\begin_layout Standard
-Visual search is used in consumer products to generate powerful results
- such as Google Lens and Google reverse image search.
- It also has applicability as smaller features of products such as 'related
- products' results.
-\end_layout
-
 \begin_layout Section
 Descriptors
 \end_layout
@ -605,14 +601,58 @@ Where
 refers to the number of edge histogram bins.
 \end_layout

+\begin_layout Subsection
+Principal Component Analysis
+\end_layout
+
 \begin_layout Section
 Distance Measures
 \end_layout

+\begin_layout Standard
+Once image descriptors are plotted in a feature space a visual search system
+ compares descriptors by measuring the distance between them.
+ The method for doing so will affect the ranking of descriptors.
+\end_layout
+
 \begin_layout Subsection
 L1 Norm
 \end_layout

+\begin_layout Subsection
+L2 Norm
+\end_layout
+
+\begin_layout Standard
+The L2 norm, or Euclidean distance, is the shortest difference between two
+ points in space, it is also referred to as the magnitude of a vector.
+ In a three dimensional Euclidean space the magnitude of a vector, 
+\begin_inset Formula $x=\left(i,j,k\right)$
+\end_inset
+
+, is given by,
+\end_layout
+
+\begin_layout Standard
+\begin_inset Formula 
+\[
+\left\Vert x\right\Vert _{2}=\sqrt{i^{2}+j^{2}+k^{2}}
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+It's intuitive distance measurement makes it the most commonly used norm
+ in Euclidean space.
+\end_layout
+
+\begin_layout Subsection
+Mahalanobis Distance
+\end_layout
+
 \begin_layout Section
 Test Methods
 \end_layout
@ -645,6 +685,28 @@ noprefix "false"
 .
 \end_layout

+\begin_layout Standard
+Worth noting about the dataset is that there are some similarities and overlap
+ between categories which has implications on the results which can be calculate
+d when using it.
+\end_layout
+
+\begin_layout Standard
+For example category 1 is a collection of images of cows, sheep and horses
+ on grass however cows and sheep each have their own distinct categories.
+ Category 18 also has many similarities to category 20 with both being mainly
+ shots of bodies of water and boats in water of varying sizes.
+ 
+\end_layout
+
+\begin_layout Standard
+During the evaulation of implemented visual search techniques the classification
+ of each image is done by referencing the group index they are named with.
+ As such, occurences of false negatives may increase as images that do in
+ fact look similar as they are both, say, images of cows will be marked
+ as not similar and measure negatively for the performance of the method.
+\end_layout
+
 \begin_layout Subsection
 Precision and Recall
 \end_layout
@ -737,22 +799,85 @@ Precision Recall Curve
 \end_layout

 \begin_layout Standard
-A way to visualise the response of a system is to calculate both precision
- and recall at each 
+A way to visualise the response of a visual search system is to calculate
+ both precision and recall for all values of 
 \begin_inset Formula $n$
 \end_inset

- and plot both as what is known as a precision-recall curve or PR curve.
+ and plot each pair against each for what is known as a precision-recall
+ curve or PR curve.
 \end_layout

 \begin_layout Subsection
 Methods
 \end_layout

+\begin_layout Standard
+In order to evaluate the performance of each descriptor two different tests
+ were conducted.
+\end_layout
+
+\begin_layout Subsubsection
+Category Response
+\end_layout
+
+\begin_layout Standard
+The category response aims to control for a descriptor's varying performance
+ at each of the dataset's categories by looping through each category and
+ randomly selecting an image from each as the query image.
+ Each category iteration has precision and recall values calculated for
+ all 
+\begin_inset Formula $n$
+\end_inset
+
+ to allow the mean average precision to be calculated.
+ This mean value is calculated from 20 iterations for the MSRCv2 dataset.
+\end_layout
+
+\begin_layout Standard
+Completing one iteration for each category also allows a confusion matrix
+ to be constructed.
+ For each iteration the top 20 results were evaluated, this number was chosen
+ as this is approximately the mean number of images in each category.
+ 
+\end_layout
+
+\begin_layout Standard
+The completed confusion matrix allows the main category confusions to be
+ identified and discussions to be made.
+\end_layout
+
+\begin_layout Subsubsection
+Random Response
+\end_layout
+
+\begin_layout Standard
+The random response places emphasis on iteration over controlling for inter-cate
+gory response.
+ Here query images are selected at random from the entire dataset and many
+ iterations are run in order to identify a mean response.
+\end_layout
+
 \begin_layout Section
 Results
 \end_layout

+\begin_layout Subsection
+Average RGB
+\end_layout
+
+\begin_layout Subsection
+Global Colour Histogram
+\end_layout
+
+\begin_layout Subsection
+Spatial Colour
+\end_layout
+
+\begin_layout Subsection
+Spatial Colour and Texture
+\end_layout
+
 \begin_layout Section
 Discussion
 \end_layout
@ -782,7 +907,7 @@ options "plain"
 \end_layout

 \begin_layout Section
-MSRC Dataset Classifications
+MSRCv2 Dataset Classifications
 \begin_inset CommandInset label
 LatexCommand label
 name "sec:MSRC-Dataset-Classifications"
@ -1073,7 +1198,7 @@ Bird
 \begin_inset Text

 \begin_layout Plain Layout
-Book Shelf
+Books
 \end_layout

 \end_inset
--- a/report/coursework.pdf
+++ b/report/coursework.pdf
--- a/scratch.m
+++ b/scratch.m
@ -1,12 +1,2 @@

-
-img = double(imread('dataset/Images/10_11_s.bmp'))./255;
-% imshow(img);
-
-img = getGreyscale(img);
-
-[mag, angle] = getEdgeInfo(img);
-
-F = getEdgeAngleHist(mag, angle);
-
-imshow(mag > 0.05)
+getRandomCategoryImage(7)
--- a/util/getRandomCategoryImage.m
+++ b/util/getRandomCategoryImage.m
@ -0,0 +1,32 @@
+function return_index=getRandomCategoryImage(category)
+
+if category > 20
+    error('number greater than category count');
+end
+
+DATASET_FOLDER = 'dataset';
+
+allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
+number_of_images = length(allfiles);
+
+ALLCATs=zeros([1 number_of_images]);
+for filenum=1:number_of_images
+    fname=allfiles(filenum).name;
+    
+    split_string = split(fname, '_');
+    ALLCATs(filenum) = str2double(split_string(1));
+end
+
+return_index = 0;
+while return_index == 0
+    index = floor(rand() * number_of_images);
+    if index == 0
+        index = 1;
+    end
+    
+    if ALLCATs(index) == category
+        return_index = index;
+    end
+end
+
+return;