added category walk, starting eigenmodel

2019-11-29 00:37:43 +00:00 · 2019-11-29 00:37:43 +00:00 · a2575085de
commit a2575085de
parent de1d6b3464
9 changed files with 691 additions and 64 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,3 +2,4 @@ dataset
 descriptors
 *~
 *#
 coursework.pdf
--- a/cvpr_computedescriptors.m
+++ b/cvpr_computedescriptors.m
@ -22,8 +22,8 @@ OUT_FOLDER = 'descriptors';
 %% and within that folder, create another folder to hold these descriptors
 %% the idea is all your descriptors are in individual folders - within
 %% the folder specified as 'OUT_FOLDER'.
-% OUT_SUBFOLDER='avgRGB';
+OUT_SUBFOLDER='avgRGB';
-OUT_SUBFOLDER='globalRGBhisto';
+% OUT_SUBFOLDER='globalRGBhisto';
 % OUT_SUBFOLDER='spatialColour';
 % OUT_SUBFOLDER='spatialColourTexture';
@ -37,8 +37,8 @@ for filenum=1:length(allfiles)
    fout=[OUT_FOLDER,'/',OUT_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
    %% EXTRACT FUNCTION
-%     F=extractAvgRGB(img);
+    F=extractAvgRGB(img);
-    F=extractGlobalColHist(img);
+%     F=extractGlobalColHist(img);
 %     F=extractSpatialColour(img);
 %     F=extractSpatialColourTexture(img);
    save(fout,'F');
--- a/cvpr_visualsearch.m
+++ b/cvpr_visualsearch.m
@ -27,10 +27,10 @@ DATASET_FOLDER = 'dataset';
 DESCRIPTOR_FOLDER = 'descriptors';
 %% and within that folder, another folder to hold the descriptors
 %% we are interested in working with
-% DESCRIPTOR_SUBFOLDER='avgRGB';
+DESCRIPTOR_SUBFOLDER='avgRGB';
 % DESCRIPTOR_SUBFOLDER='globalRGBhisto';
 % DESCRIPTOR_SUBFOLDER='spatialColour';
-DESCRIPTOR_SUBFOLDER='spatialColourTexture';
+% DESCRIPTOR_SUBFOLDER='spatialColourTexture';
 CATEGORIES = ["Farm Animal" 
    "Tree"
@ -84,13 +84,15 @@ end
 CAT_HIST = histogram(ALLCATs).Values;
 CAT_TOTAL = length(CAT_HIST);
-run_total = 1;
+run_total = 50;
 AP_values = zeros([1, run_total]);
 for run=1:run_total
    %% 2) Pick an image at random to be the query
    NIMG=size(ALLFEAT,1);           % number of images in collection
    queryimg=floor(rand()*NIMG);    % index of a random image
-
+    if queryimg == 0
        queryimg = 1;
    end
    %% 3) Compute the distance of image to the query
    dst=[];
@ -114,7 +116,7 @@ for run=1:run_total
    query_row = dst(1,:);
    query_category = query_row(1,3);
-    fprintf('category was %s', CATEGORIES(query_category))
+    fprintf('category was %s\n', CATEGORIES(query_category))
    %calculate PR for each n
@ -187,39 +189,39 @@ for run=1:run_total
    %% These may be a little hard to see using imgshow
    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
-    confusion_matrix = zeros(CAT_TOTAL);
+%     confusion_matrix = zeros(CAT_TOTAL);
-    
+%     
-    SHOW=15; % Show top 15 results
+%     SHOW=15; % Show top 15 results
-    dst=dst(1:SHOW,:);
+%     dst=dst(1:SHOW,:);
-    outdisplay=[];
+%     outdisplay=[];
-    for i=1:size(dst,1)
+%     for i=1:size(dst,1)
-       img=imread(ALLFILES{dst(i,2)});
+%        img=imread(ALLFILES{dst(i,2)});
-       img=img(1:2:end,1:2:end,:); % make image a quarter size
+%        img=img(1:2:end,1:2:end,:); % make image a quarter size
-       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
+%        img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
-       outdisplay=[outdisplay img];
+%        outdisplay=[outdisplay img];
-       
+%        
-       %populate confusion matrix
+%        %populate confusion matrix
-       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
+%        confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
-    end
+%     end
-    figure(3)
+%     figure(3)
-    imgshow(outdisplay);
+%     imgshow(outdisplay);
-    axis off;
+%     axis off;
 end
 %% 8 Calculate MAP
-figure(4)
+% figure(4)
-histogram(AP_values);
+% histogram(AP_values);
-title('Average Precision Distribution');
+% title('Average Precision Distribution');
-ylabel('Count');
+% ylabel('Count');
-xlabel('Average Precision');
+% xlabel('Average Precision');
-xlim([0, 1]);
+% xlim([0, 1]);
 MAP = mean(AP_values)
 AP_sd = std(AP_values)
-figure(2)
+% figure(2)
-plot(1:run_total, AP_values);
+% plot(1:run_total, AP_values);
-title('Average Precision Per Run');
+% title('Average Precision Per Run');
-xlabel('Run');
+% xlabel('Run');
-ylabel('Average Precision');
+% ylabel('Average Precision');
--- a/cvpr_visualsearch_category_walk.m
+++ b/cvpr_visualsearch_category_walk.m
@ -0,0 +1,228 @@
 %% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
 %%
 %% cvpr_visualsearch.m
 %% Skeleton code provided as part of the coursework assessment
 %%
 %% This code will load in all descriptors pre-computed (by the
 %% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
 %%
 %% It will pick a descriptor at random and compare all other descriptors to
 %% it - by calling cvpr_compare.  In doing so it will rank the images by
 %% similarity to the randomly picked descriptor.  Note that initially the
 %% function cvpr_compare returns a random number - you need to code it
 %% so that it returns the Euclidean distance or some other distance metric
 %% between the two descriptors it is passed.
 %%
 %% (c) John Collomosse 2010  (J.Collomosse@surrey.ac.uk)
 %% Centre for Vision Speech and Signal Processing (CVSSP)
 %% University of Surrey, United Kingdom
 close all;
 clear all;
 %% Edit the following line to the folder you unzipped the MSRCv2 dataset to
 DATASET_FOLDER = 'dataset';
 %% Folder that holds the results...
 DESCRIPTOR_FOLDER = 'descriptors';
 %% and within that folder, another folder to hold the descriptors
 %% we are interested in working with
 DESCRIPTOR_SUBFOLDER='avgRGB';
 % DESCRIPTOR_SUBFOLDER='globalRGBhisto';
 % DESCRIPTOR_SUBFOLDER='spatialColour';
 % DESCRIPTOR_SUBFOLDER='spatialColourTexture';
 CATEGORIES = ["Farm Animal" 
    "Tree"
    "Building"
    "Plane"
    "Cow"
    "Face"
    "Car"
    "Bike"
    "Sheep"
    "Flower"
    "Sign"
    "Bird"
    "Book Shelf"
    "Bench"
    "Cat"
    "Dog"
    "Road"
    "Water Features"
    "Human Figures"
    "Coast"
    ];
 %% 1) Load all the descriptors into "ALLFEAT"
 %% each row of ALLFEAT is a descriptor (is an image)
 ALLFEAT=[];
 ALLFILES=cell(1,0);
 ALLCATs=[];
 ctr=1;
 allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
 for filenum=1:length(allfiles)
    fname=allfiles(filenum).name;
    %identify photo category for PR calculation
    split_string = split(fname, '_');
    ALLCATs(filenum) = str2double(split_string(1));
    imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
    img=double(imread(imgfname_full))./255;
    thesefeat=[];
    featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
    load(featfile,'F');
    ALLFILES{ctr}=imgfname_full;
    ALLFEAT=[ALLFEAT ; F];
    ctr=ctr+1;
 end
 % get counts for each category for PR calculation
 CAT_HIST = histogram(ALLCATs).Values;
 CAT_TOTAL = length(CAT_HIST);
 NIMG=size(ALLFEAT,1);           % number of images in collection
 confusion_matrix = zeros(CAT_TOTAL);
 AP_values = zeros([1, CAT_TOTAL]);
 for run=1:CAT_TOTAL
    %% 2) Pick an image at random to be the query
    queryimg=getRandomCategoryImage(run);    % index of a random image
    %% 3) Compute the distance of image to the query
    dst=[];
    for i=1:NIMG
        candidate=ALLFEAT(i,:);
        query=ALLFEAT(queryimg,:);
        category=ALLCATs(i);
        %% COMPARE FUNCTION
        thedst=compareEuclidean(query, candidate);
        dst=[dst ; [thedst i category]];
    end
    dst=sortrows(dst,1);  % sort the results
    %% 4) Calculate PR
    precision_values=zeros([1, NIMG]);
    recall_values=zeros([1, NIMG]);
    correct_at_n=zeros([1, NIMG]);
    query_row = dst(1,:);
    query_category = query_row(1,3);
    fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, run)
    %calculate PR for each n
    for i=1:NIMG
        rows = dst(1:i, :);
        correct_results = 0;
        incorrect_results = 0;
        if i > 1    
            for n=1:i - 1
                row = rows(n, :);
                category = row(3);
                if category == query_category
                    correct_results = correct_results + 1;
                else
                    incorrect_results = incorrect_results + 1;
                end
            end
        end
        % LAST ROW
        row = rows(i, :);
        category = row(3);
        if category == query_category
            correct_results = correct_results + 1;
            correct_at_n(i) = 1;
        else
            incorrect_results = incorrect_results + 1;
        end
        precision = correct_results / i;
        recall = correct_results / CAT_HIST(1,query_category);
        precision_values(i) = precision;
        recall_values(i) = recall;
    end
    %% 5) calculate AP
    P_rel_n = zeros([1, NIMG]);
    for i = 1:NIMG
        precision = precision_values(i);
        i_result_relevant = correct_at_n(i);
        P_rel_n(i) = precision * i_result_relevant;
    end
    sum_P_rel_n = sum(P_rel_n);
    average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
    AP_values(run) = average_precision;
    %% 6) plot PR curve
    figure(1)
    plot(recall_values, precision_values);
    hold on;
    title('PR Curve');
    xlabel('Recall');
    ylabel('Precision');
    %% 7) Visualise the results
    %% These may be a little hard to see using imgshow
    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
    SHOW=20; % Show top 15 results
    dst=dst(1:SHOW,:);
    outdisplay=[];
    for i=1:size(dst,1)
       img=imread(ALLFILES{dst(i,2)});
       img=img(1:2:end,1:2:end,:); % make image a quarter size
       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
       outdisplay=[outdisplay img];
       %populate confusion matrix
       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
    end
 %     figure(3)
 %     imgshow(outdisplay);
 %     axis off;
 end
 % normalise confusion matrix
 norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
 %% 8 Calculate MAP
 % figure(4)
 % histogram(AP_values);
 % title('Average Precision Distribution');
 % ylabel('Count');
 % xlabel('Average Precision');
 % xlim([0, 1]);
 MAP = mean(AP_values)
 AP_sd = std(AP_values)
 % figure(2)
 % plot(1:CAT_TOTAL, AP_values);
 % title('Average Precision Per Run');
 % xlabel('Run');
 % ylabel('Average Precision');
--- a/cvpr_visualsearch_pca.m
+++ b/cvpr_visualsearch_pca.m
@ -0,0 +1,249 @@
 %% EEE3032 - Computer Vision and Pattern Recognition (ee3.cvpr)
 %%
 %% cvpr_visualsearch.m
 %% Skeleton code provided as part of the coursework assessment
 %%
 %% This code will load in all descriptors pre-computed (by the
 %% function cvpr_computedescriptors) from the images in the MSRCv2 dataset.
 %%
 %% It will pick a descriptor at random and compare all other descriptors to
 %% it - by calling cvpr_compare.  In doing so it will rank the images by
 %% similarity to the randomly picked descriptor.  Note that initially the
 %% function cvpr_compare returns a random number - you need to code it
 %% so that it returns the Euclidean distance or some other distance metric
 %% between the two descriptors it is passed.
 %%
 %% (c) John Collomosse 2010  (J.Collomosse@surrey.ac.uk)
 %% Centre for Vision Speech and Signal Processing (CVSSP)
 %% University of Surrey, United Kingdom
 close all;
 clear all;
 %% Edit the following line to the folder you unzipped the MSRCv2 dataset to
 DATASET_FOLDER = 'dataset';
 %% Folder that holds the results...
 DESCRIPTOR_FOLDER = 'descriptors';
 %% and within that folder, another folder to hold the descriptors
 %% we are interested in working with
 DESCRIPTOR_SUBFOLDER='avgRGB';
 % DESCRIPTOR_SUBFOLDER='globalRGBhisto';
 % DESCRIPTOR_SUBFOLDER='spatialColour';
 % DESCRIPTOR_SUBFOLDER='spatialColourTexture';
 CATEGORIES = ["Farm Animal" 
    "Tree"
    "Building"
    "Plane"
    "Cow"
    "Face"
    "Car"
    "Bike"
    "Sheep"
    "Flower"
    "Sign"
    "Bird"
    "Book Shelf"
    "Bench"
    "Cat"
    "Dog"
    "Road"
    "Water Features"
    "Human Figures"
    "Coast"
    ];
 %% 1) Load all the descriptors into "ALLFEAT"
 %% each row of ALLFEAT is a descriptor (is an image)
 ALLFEAT=[];
 ALLFILES=cell(1,0);
 ALLCATs=[];
 ctr=1;
 allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
 for filenum=1:length(allfiles)
    fname=allfiles(filenum).name;
    %identify photo category for PR calculation
    split_string = split(fname, '_');
    ALLCATs(filenum) = str2double(split_string(1));
    imgfname_full=([DATASET_FOLDER,'/Images/',fname]);
    img=double(imread(imgfname_full))./255;
    thesefeat=[];
    featfile=[DESCRIPTOR_FOLDER,'/',DESCRIPTOR_SUBFOLDER,'/',fname(1:end-4),'.mat'];%replace .bmp with .mat
    load(featfile,'F');
    ALLFILES{ctr}=imgfname_full;
    ALLFEAT=[ALLFEAT ; F];
    ctr=ctr+1;
 end
 % get counts for each category for PR calculation
 CAT_HIST = histogram(ALLCATs).Values;
 CAT_TOTAL = length(CAT_HIST);
 NIMG=size(ALLFEAT,1);           % number of images in collection
 MODEL_SIZE = 10;
 confusion_matrix = zeros(CAT_TOTAL);
 AP_values = zeros([1, CAT_TOTAL]);
 for iterating_category=1:CAT_TOTAL
    %% 2) Select descriptors for category and training data
    category_training_descriptors = [];
    test_descriptors = [];
    for i=1:NIMG
        if iterating_category == ALLCATs(i)
            category_training_descriptors = [ category_training_descriptors ; ALLFEAT(i,:) ];
        else
            test_descriptors = [ test_descriptors ; ALLFEAT(i,:) ];
        end
    end
    model_descriptors = category_training_descriptors(1:MODEL_SIZE, :);
    model_mean = mean(model_descriptors);
    model_data_min_mean = model_descriptors - repmat(model_mean, MODEL_SIZE, 1);
    C = (model_data_min_mean' * model_data_min_mean) ./ MODEL_SIZE;
    [eig_vct, eig_val] = eig(C);
    TEST_SIZE = size(test_descriptors,1);
    %% 3) Compute the distance of image to the query
    dst=[];
    for i=1:TEST_SIZE
        candidate=test_descriptors(i,:);
        query=ALLFEAT(queryimg,:);
        category=ALLCATs(i);
        %% COMPARE FUNCTION
        thedst=compareEuclidean(query, candidate);
        dst=[dst ; [thedst i category]];
    end
    dst=sortrows(dst,1);  % sort the results
    %% 4) Calculate PR
    precision_values=zeros([1, NIMG]);
    recall_values=zeros([1, NIMG]);
    correct_at_n=zeros([1, NIMG]);
    query_row = dst(1,:);
    query_category = query_row(1,3);
    fprintf('category was %s, %i, %i\n', CATEGORIES(query_category), query_category, iterating_category)
    %calculate PR for each n
    for i=1:NIMG
        rows = dst(1:i, :);
        correct_results = 0;
        incorrect_results = 0;
        if i > 1    
            for n=1:i - 1
                row = rows(n, :);
                category = row(3);
                if category == query_category
                    correct_results = correct_results + 1;
                else
                    incorrect_results = incorrect_results + 1;
                end
            end
        end
        % LAST ROW
        row = rows(i, :);
        category = row(3);
        if category == query_category
            correct_results = correct_results + 1;
            correct_at_n(i) = 1;
        else
            incorrect_results = incorrect_results + 1;
        end
        precision = correct_results / i;
        recall = correct_results / CAT_HIST(1,query_category);
        precision_values(i) = precision;
        recall_values(i) = recall;
    end
    %% 5) calculate AP
    P_rel_n = zeros([1, NIMG]);
    for i = 1:NIMG
        precision = precision_values(i);
        i_result_relevant = correct_at_n(i);
        P_rel_n(i) = precision * i_result_relevant;
    end
    sum_P_rel_n = sum(P_rel_n);
    average_precision = sum_P_rel_n / CAT_HIST(1,query_category);
    AP_values(iterating_category) = average_precision;
    %% 6) plot PR curve
    figure(1)
    plot(recall_values, precision_values);
    hold on;
    title('PR Curve');
    xlabel('Recall');
    ylabel('Precision');
    %% 7) Visualise the results
    %% These may be a little hard to see using imgshow
    %% If you have access, try using imshow(outdisplay) or imagesc(outdisplay)
    SHOW=20; % Show top 15 results
    dst=dst(1:SHOW,:);
    outdisplay=[];
    for i=1:size(dst,1)
       img=imread(ALLFILES{dst(i,2)});
       img=img(1:2:end,1:2:end,:); % make image a quarter size
       img=img(1:81,:,:); % crop image to uniform size vertically (some MSVC images are different heights)
       outdisplay=[outdisplay img];
       %populate confusion matrix
       confusion_matrix(query_category, dst(i,3)) = confusion_matrix(query_category, dst(i,3)) + 1;
    end
 %     figure(3)
 %     imgshow(outdisplay);
 %     axis off;
 end
 % normalise confusion matrix
 norm_confusion_matrix = confusion_matrix ./ sum(confusion_matrix, 'all');
 %% 8 Calculate MAP
 % figure(4)
 % histogram(AP_values);
 % title('Average Precision Distribution');
 % ylabel('Count');
 % xlabel('Average Precision');
 % xlim([0, 1]);
 MAP = mean(AP_values)
 AP_sd = std(AP_values)
 % figure(2)
 % plot(1:CAT_TOTAL, AP_values);
 % title('Average Precision Per Run');
 % xlabel('Run');
 % ylabel('Average Precision');
--- a/report/coursework.lyx
+++ b/report/coursework.lyx
@ -142,6 +142,13 @@ These measured features can be arranged as a data structure or descriptor
 It is an example of content based image retrieval or CBIR.
 \end_layout
 \begin_layout Standard
 Visual search is used in consumer products to generate powerful results
 such as Google Lens and Google reverse image search.
 It also has applicability as smaller features of products such as 'related
 products' results.
 \end_layout
 \begin_layout Subsection
 Extraction
 \end_layout
@ -169,17 +176,6 @@ Typically a descriptor is a single column vector of numbers calculated about
 Methods for calculating the distance will determine how images are ranked.
 \end_layout
 \begin_layout Subsection
 Applications
 \end_layout
 \begin_layout Standard
 Visual search is used in consumer products to generate powerful results
 such as Google Lens and Google reverse image search.
 It also has applicability as smaller features of products such as 'related
 products' results.
 \end_layout
 \begin_layout Section
 Descriptors
 \end_layout
@ -605,14 +601,58 @@ Where
 refers to the number of edge histogram bins.
 \end_layout
 \begin_layout Subsection
 Principal Component Analysis
 \end_layout
 \begin_layout Section
 Distance Measures
 \end_layout
 \begin_layout Standard
 Once image descriptors are plotted in a feature space a visual search system
 compares descriptors by measuring the distance between them.
 The method for doing so will affect the ranking of descriptors.
 \end_layout
 \begin_layout Subsection
 L1 Norm
 \end_layout
 \begin_layout Subsection
 L2 Norm
 \end_layout
 \begin_layout Standard
 The L2 norm, or Euclidean distance, is the shortest difference between two
 points in space, it is also referred to as the magnitude of a vector.
 In a three dimensional Euclidean space the magnitude of a vector, 
 \begin_inset Formula $x=\left(i,j,k\right)$
 \end_inset
 , is given by,
 \end_layout
 \begin_layout Standard
 \begin_inset Formula 
 \[
 \left\Vert x\right\Vert _{2}=\sqrt{i^{2}+j^{2}+k^{2}}
 \]
 \end_inset
 \end_layout
 \begin_layout Standard
 It's intuitive distance measurement makes it the most commonly used norm
 in Euclidean space.
 \end_layout
 \begin_layout Subsection
 Mahalanobis Distance
 \end_layout
 \begin_layout Section
 Test Methods
 \end_layout
@ -645,6 +685,28 @@ noprefix "false"
 .
 \end_layout
 \begin_layout Standard
 Worth noting about the dataset is that there are some similarities and overlap
 between categories which has implications on the results which can be calculate
 d when using it.
 \end_layout
 \begin_layout Standard
 For example category 1 is a collection of images of cows, sheep and horses
 on grass however cows and sheep each have their own distinct categories.
 Category 18 also has many similarities to category 20 with both being mainly
 shots of bodies of water and boats in water of varying sizes.
 \end_layout
 \begin_layout Standard
 During the evaulation of implemented visual search techniques the classification
 of each image is done by referencing the group index they are named with.
 As such, occurences of false negatives may increase as images that do in
 fact look similar as they are both, say, images of cows will be marked
 as not similar and measure negatively for the performance of the method.
 \end_layout
 \begin_layout Subsection
 Precision and Recall
 \end_layout
@ -737,22 +799,85 @@ Precision Recall Curve
 \end_layout
 \begin_layout Standard
-A way to visualise the response of a system is to calculate both precision
+A way to visualise the response of a visual search system is to calculate
- and recall at each 
+ both precision and recall for all values of 
 \begin_inset Formula $n$
 \end_inset
- and plot both as what is known as a precision-recall curve or PR curve.
+ and plot each pair against each for what is known as a precision-recall
 curve or PR curve.
 \end_layout
 \begin_layout Subsection
 Methods
 \end_layout
 \begin_layout Standard
 In order to evaluate the performance of each descriptor two different tests
 were conducted.
 \end_layout
 \begin_layout Subsubsection
 Category Response
 \end_layout
 \begin_layout Standard
 The category response aims to control for a descriptor's varying performance
 at each of the dataset's categories by looping through each category and
 randomly selecting an image from each as the query image.
 Each category iteration has precision and recall values calculated for
 all 
 \begin_inset Formula $n$
 \end_inset
 to allow the mean average precision to be calculated.
 This mean value is calculated from 20 iterations for the MSRCv2 dataset.
 \end_layout
 \begin_layout Standard
 Completing one iteration for each category also allows a confusion matrix
 to be constructed.
 For each iteration the top 20 results were evaluated, this number was chosen
 as this is approximately the mean number of images in each category.
 \end_layout
 \begin_layout Standard
 The completed confusion matrix allows the main category confusions to be
 identified and discussions to be made.
 \end_layout
 \begin_layout Subsubsection
 Random Response
 \end_layout
 \begin_layout Standard
 The random response places emphasis on iteration over controlling for inter-cate
 gory response.
 Here query images are selected at random from the entire dataset and many
 iterations are run in order to identify a mean response.
 \end_layout
 \begin_layout Section
 Results
 \end_layout
 \begin_layout Subsection
 Average RGB
 \end_layout
 \begin_layout Subsection
 Global Colour Histogram
 \end_layout
 \begin_layout Subsection
 Spatial Colour
 \end_layout
 \begin_layout Subsection
 Spatial Colour and Texture
 \end_layout
 \begin_layout Section
 Discussion
 \end_layout
@ -782,7 +907,7 @@ options "plain"
 \end_layout
 \begin_layout Section
-MSRC Dataset Classifications
+MSRCv2 Dataset Classifications
 \begin_inset CommandInset label
 LatexCommand label
 name "sec:MSRC-Dataset-Classifications"
@ -1073,7 +1198,7 @@ Bird
 \begin_inset Text
 \begin_layout Plain Layout
-Book Shelf
+Books
 \end_layout
 \end_inset
--- a/report/coursework.pdf
+++ b/report/coursework.pdf
--- a/scratch.m
+++ b/scratch.m
@ -1,12 +1,2 @@
-
+getRandomCategoryImage(7)
 img = double(imread('dataset/Images/10_11_s.bmp'))./255;
 % imshow(img);
 img = getGreyscale(img);
 [mag, angle] = getEdgeInfo(img);
 F = getEdgeAngleHist(mag, angle);
 imshow(mag > 0.05)
--- a/util/getRandomCategoryImage.m
+++ b/util/getRandomCategoryImage.m
@ -0,0 +1,32 @@
 function return_index=getRandomCategoryImage(category)
 if category > 20
    error('number greater than category count');
 end
 DATASET_FOLDER = 'dataset';
 allfiles=dir (fullfile([DATASET_FOLDER,'/Images/*.bmp']));
 number_of_images = length(allfiles);
 ALLCATs=zeros([1 number_of_images]);
 for filenum=1:number_of_images
    fname=allfiles(filenum).name;
    split_string = split(fname, '_');
    ALLCATs(filenum) = str2double(split_string(1));
 end
 return_index = 0;
 while return_index == 0
    index = floor(rand() * number_of_images);
    if index == 0
        index = 1;
    end
    if ALLCATs(index) == category
        return_index = index;
    end
 end
 return;