added descriptor extraction, visual search query, edge detection
This commit is contained in:
parent
e19f13bbe5
commit
1dea8e54d5
@ -9,16 +9,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.image.AxesImage at 0x7f6a9f8a01d0>"
|
||||
"<matplotlib.image.AxesImage at 0x7f9b3bf4b650>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -55,16 +55,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.image.AxesImage at 0x7f6a9c204bd0>"
|
||||
"<matplotlib.image.AxesImage at 0x7f9b3a1cd610>"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -99,16 +99,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 68,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.image.AxesImage at 0x7f758913b8d0>"
|
||||
"<matplotlib.image.AxesImage at 0x7f9b39fbcc90>"
|
||||
]
|
||||
},
|
||||
"execution_count": 68,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -142,7 +142,7 @@
|
||||
"dx = np.concatenate((dx_grey, dx_blur[1:-1, 1:-1]), axis=1)\n",
|
||||
"dy = np.concatenate((dy_grey, dy_blur[1:-1, 1:-1]), axis=1)\n",
|
||||
"\n",
|
||||
"x = np.sqrt(pow(dx, 2) + pow(dy, 2))\n",
|
||||
"x = np.sqrt(dx**2 + dy**2)\n",
|
||||
"plt.imshow(x>0.08)"
|
||||
]
|
||||
}
|
||||
|
121
vision.ipynb
121
vision.ipynb
File diff suppressed because one or more lines are too long
@ -1,7 +1,27 @@
|
||||
from vision.model import Image
|
||||
from typing import List
|
||||
import numpy as np
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def extract_average_rgb(images: List[Image]):
|
||||
for image in images:
|
||||
image.descriptor = image.mean(axis=(0, 1))
|
||||
def extract_average_rgb(pixels: np.array = None,
|
||||
image: Image = None,
|
||||
images: List[Image] = None):
|
||||
|
||||
if pixels is None and image is None and images is None:
|
||||
raise KeyError('no image provided')
|
||||
|
||||
def extract(i):
|
||||
return i.mean(axis=(0, 1))
|
||||
|
||||
if images is not None:
|
||||
length = len(images)
|
||||
for index, image in enumerate(images):
|
||||
logger.debug(f'generating {index} of {length}')
|
||||
image.descriptor = extract(image)
|
||||
return
|
||||
elif image is not None:
|
||||
image.descriptor = extract(image)
|
||||
else:
|
||||
return extract(pixels)
|
||||
|
86
vision/descriptor/spatial.py
Normal file
86
vision/descriptor/spatial.py
Normal file
@ -0,0 +1,86 @@
|
||||
from vision.model import Image
|
||||
from typing import List
|
||||
import numpy as np
|
||||
|
||||
import vision.descriptor.avg_RGB as rgb
|
||||
import vision.util.edge as edge
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def grid_image(height: int, width: int, pixels: np.array):
|
||||
shape = pixels.shape
|
||||
|
||||
segments = []
|
||||
for i in range(height):
|
||||
for j in range(width):
|
||||
|
||||
row_start = round(i * shape[0] / height)
|
||||
row_end = round((i+1) * shape[0] / height)
|
||||
|
||||
column_start = round(j * shape[1] / width)
|
||||
column_end = round((j + 1) * shape[1] / width)
|
||||
|
||||
segments.append(pixels[row_start:row_end, column_start:column_end, :])
|
||||
|
||||
return segments
|
||||
|
||||
|
||||
def extract_spatial_texture(height: int,
|
||||
width: int,
|
||||
bins: int,
|
||||
threshold: float,
|
||||
pixels: np.array = None,
|
||||
image: Image = None,
|
||||
images: List[Image] = None):
|
||||
|
||||
if pixels is None and image is None and images is None:
|
||||
raise KeyError('no image provided')
|
||||
|
||||
def extract(i):
|
||||
segments = grid_image(height, width, i)
|
||||
descriptor = np.array([])
|
||||
for seg in segments:
|
||||
img_edge = edge.get_edge_info(pixels=seg)
|
||||
hist = edge.get_edge_angle_hist(img_edge, bins=bins, threshold=threshold)
|
||||
descriptor = np.append(descriptor, hist[0])
|
||||
return descriptor
|
||||
|
||||
if images is not None:
|
||||
length = len(images)
|
||||
for index, image in enumerate(images):
|
||||
logger.debug(f'generating {index} of {length}')
|
||||
image.descriptor = extract(image.pixels)
|
||||
return
|
||||
elif image is not None:
|
||||
image.descriptor = extract(image.pixels)
|
||||
else:
|
||||
return extract(pixels)
|
||||
|
||||
|
||||
def extract_spatial_average_rgb(height: int,
|
||||
width: int,
|
||||
pixels: np.array = None,
|
||||
image: Image = None,
|
||||
images: List[Image] = None):
|
||||
|
||||
if pixels is None and image is None and images is None:
|
||||
raise KeyError('no image provided')
|
||||
|
||||
def extract(i):
|
||||
segments = grid_image(height, width, pixels)
|
||||
descriptor = np.array([])
|
||||
for seg in segments:
|
||||
descriptor = np.append(descriptor, rgb.extract_average_rgb(pixels=seg))
|
||||
return descriptor
|
||||
|
||||
if images is not None:
|
||||
length = len(images)
|
||||
for index, image in enumerate(images):
|
||||
logger.debug(f'generating {index} of {length}')
|
||||
image.descriptor = extract(image.pixels)
|
||||
return
|
||||
elif image is not None:
|
||||
image.descriptor = extract(image.pixels)
|
||||
else:
|
||||
return extract(pixels)
|
0
vision/maths/__init__.py
Normal file
0
vision/maths/__init__.py
Normal file
38
vision/maths/precision_recall.py
Normal file
38
vision/maths/precision_recall.py
Normal file
@ -0,0 +1,38 @@
|
||||
from typing import List
|
||||
from vision.model import Image
|
||||
|
||||
|
||||
class PrecisionRecall:
|
||||
def __init__(self, precision, recall, ap):
|
||||
self.precision = precision
|
||||
self.recall = recall
|
||||
self.ap = ap
|
||||
|
||||
|
||||
def get_precision(images: List[Image], test):
|
||||
return len([i for i in images if i.category == test]) / len(images)
|
||||
|
||||
|
||||
def get_recall(images: List[Image], test, category_count):
|
||||
return len([i for i in images if i.category == test]) / category_count
|
||||
|
||||
|
||||
def get_pr(images: List[Image], query: Image):
|
||||
images = sorted(images, key=lambda x: x.distance)
|
||||
|
||||
query_category_count = len([i for i in images if i.category == query.category])
|
||||
|
||||
p = []
|
||||
r = []
|
||||
for i in range(len(images)):
|
||||
p.append(get_precision(images[:i+1], query.category))
|
||||
r.append(get_recall(images[:i+1], query.category, query_category_count))
|
||||
|
||||
precision_list = []
|
||||
for index, image in enumerate(images):
|
||||
if image.category == query.category:
|
||||
precision_list.append(p[index])
|
||||
|
||||
ap = sum(precision_list) / query_category_count
|
||||
|
||||
return PrecisionRecall(precision=p, recall=r, ap=ap)
|
@ -12,6 +12,12 @@ class Image:
|
||||
self.name = name
|
||||
self.descriptor = descriptor
|
||||
|
||||
self.distance = 0
|
||||
|
||||
def clear(self):
|
||||
self.descriptor = None
|
||||
self.distance = 0
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.pixels.shape
|
||||
|
59
vision/util/edge.py
Normal file
59
vision/util/edge.py
Normal file
@ -0,0 +1,59 @@
|
||||
from vision.model import Image
|
||||
from typing import List
|
||||
import numpy as np
|
||||
from scipy.signal import convolve2d
|
||||
import math as m
|
||||
import cv2
|
||||
|
||||
|
||||
class Edge:
|
||||
def __init__(self, magnitude: np.array, angle: np.array):
|
||||
self.magnitude = magnitude
|
||||
self.angle = angle
|
||||
|
||||
|
||||
def get_edge_angle_hist(edge: Edge, bins: int, threshold: float):
|
||||
|
||||
angle_vals = []
|
||||
for i in range(edge.magnitude.shape[0]):
|
||||
for j in range(edge.magnitude.shape[1]):
|
||||
if edge.magnitude[i, j] > threshold:
|
||||
|
||||
bin_val = m.floor((edge.angle[i, j] / (2*np.pi)) * bins)
|
||||
angle_vals.append(bin_val)
|
||||
return np.histogram(angle_vals, bins=bins, density=True)
|
||||
|
||||
|
||||
def get_edge_info(pixels: np.array = None,
|
||||
image: Image = None,
|
||||
images: List[Image] = None,
|
||||
blur: bool = True):
|
||||
|
||||
if pixels is None and image is None and images is None:
|
||||
raise KeyError('no image provided')
|
||||
|
||||
def extract(i):
|
||||
kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 4
|
||||
ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) / 4
|
||||
|
||||
grey = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
if blur is True:
|
||||
i = convolve2d(grey, np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) / 9)
|
||||
|
||||
dx = convolve2d(grey, kx)
|
||||
dy = convolve2d(grey, ky)
|
||||
|
||||
mag = np.sqrt(dx**2 + dy**2)
|
||||
angle = np.arctan2(dy, dx)
|
||||
angle = (angle + 2 * np.pi) % (2 * np.pi)
|
||||
return Edge(mag, angle)
|
||||
|
||||
if images is not None:
|
||||
for image in images:
|
||||
image.descriptor = extract(image.pixels)
|
||||
return
|
||||
elif image is not None:
|
||||
return extract(image.pixels)
|
||||
else:
|
||||
return extract(pixels)
|
46
vision/visualsearch/__init__.py
Normal file
46
vision/visualsearch/__init__.py
Normal file
@ -0,0 +1,46 @@
|
||||
from typing import List
|
||||
import random
|
||||
import numpy as np
|
||||
from vision.model import Image
|
||||
import vision.maths.precision_recall as pr
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class QueryResult:
|
||||
def __init__(self,
|
||||
sorted_images: List[Image],
|
||||
query_image: Image,
|
||||
precision_recall: pr.PrecisionRecall):
|
||||
self.sorted_images = sorted_images
|
||||
self.query_image = query_image
|
||||
self.precision_recall = precision_recall
|
||||
|
||||
|
||||
def run_query(images: List[Image], distance_measure=None, query_index=None):
|
||||
logger.info(f'running query on {len(images)} images, query index {query_index}')
|
||||
|
||||
if query_index is not None:
|
||||
query_image = images[query_index]
|
||||
else:
|
||||
query_image = random.choice(images)
|
||||
|
||||
if any(i for i in images if i.descriptor is None):
|
||||
raise ValueError('descriptors required for all images')
|
||||
|
||||
for image in images:
|
||||
if distance_measure is None:
|
||||
image.distance = np.linalg.norm(image.descriptor-query_image.descriptor)
|
||||
else:
|
||||
image.distance = distance_measure(image.descriptor - query_image.descriptor)
|
||||
|
||||
images = [i for i in images if not (i.category == query_image.category and i.name == query_image.name)]
|
||||
|
||||
query_pr = pr.get_pr(images, query=query_image)
|
||||
|
||||
results = QueryResult(sorted_images=images,
|
||||
query_image=query_image,
|
||||
precision_recall=query_pr)
|
||||
logger.info(f'query finished AP: {results.precision_recall.ap}')
|
||||
return results
|
Loading…
Reference in New Issue
Block a user