added descriptor extraction, visual search query, edge detection

This commit is contained in:
aj 2019-12-08 14:42:30 +00:00
parent e19f13bbe5
commit 1dea8e54d5
9 changed files with 386 additions and 16 deletions

View File

@ -9,16 +9,16 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x7f6a9f8a01d0>"
"<matplotlib.image.AxesImage at 0x7f9b3bf4b650>"
]
},
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
},
@ -55,16 +55,16 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x7f6a9c204bd0>"
"<matplotlib.image.AxesImage at 0x7f9b3a1cd610>"
]
},
"execution_count": 4,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
@ -99,16 +99,16 @@
},
{
"cell_type": "code",
"execution_count": 68,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.image.AxesImage at 0x7f758913b8d0>"
"<matplotlib.image.AxesImage at 0x7f9b39fbcc90>"
]
},
"execution_count": 68,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
},
@ -142,7 +142,7 @@
"dx = np.concatenate((dx_grey, dx_blur[1:-1, 1:-1]), axis=1)\n",
"dy = np.concatenate((dy_grey, dy_blur[1:-1, 1:-1]), axis=1)\n",
"\n",
"x = np.sqrt(pow(dx, 2) + pow(dy, 2))\n",
"x = np.sqrt(dx**2 + dy**2)\n",
"plt.imshow(x>0.08)"
]
}

File diff suppressed because one or more lines are too long

View File

@ -1,7 +1,27 @@
from vision.model import Image
from typing import List
import numpy as np
import logging
logger = logging.getLogger(__name__)
def extract_average_rgb(images: List[Image]):
for image in images:
image.descriptor = image.mean(axis=(0, 1))
def extract_average_rgb(pixels: np.array = None,
image: Image = None,
images: List[Image] = None):
if pixels is None and image is None and images is None:
raise KeyError('no image provided')
def extract(i):
return i.mean(axis=(0, 1))
if images is not None:
length = len(images)
for index, image in enumerate(images):
logger.debug(f'generating {index} of {length}')
image.descriptor = extract(image)
return
elif image is not None:
image.descriptor = extract(image)
else:
return extract(pixels)

View File

@ -0,0 +1,86 @@
from vision.model import Image
from typing import List
import numpy as np
import vision.descriptor.avg_RGB as rgb
import vision.util.edge as edge
import logging
logger = logging.getLogger(__name__)
def grid_image(height: int, width: int, pixels: np.array):
shape = pixels.shape
segments = []
for i in range(height):
for j in range(width):
row_start = round(i * shape[0] / height)
row_end = round((i+1) * shape[0] / height)
column_start = round(j * shape[1] / width)
column_end = round((j + 1) * shape[1] / width)
segments.append(pixels[row_start:row_end, column_start:column_end, :])
return segments
def extract_spatial_texture(height: int,
width: int,
bins: int,
threshold: float,
pixels: np.array = None,
image: Image = None,
images: List[Image] = None):
if pixels is None and image is None and images is None:
raise KeyError('no image provided')
def extract(i):
segments = grid_image(height, width, i)
descriptor = np.array([])
for seg in segments:
img_edge = edge.get_edge_info(pixels=seg)
hist = edge.get_edge_angle_hist(img_edge, bins=bins, threshold=threshold)
descriptor = np.append(descriptor, hist[0])
return descriptor
if images is not None:
length = len(images)
for index, image in enumerate(images):
logger.debug(f'generating {index} of {length}')
image.descriptor = extract(image.pixels)
return
elif image is not None:
image.descriptor = extract(image.pixels)
else:
return extract(pixels)
def extract_spatial_average_rgb(height: int,
width: int,
pixels: np.array = None,
image: Image = None,
images: List[Image] = None):
if pixels is None and image is None and images is None:
raise KeyError('no image provided')
def extract(i):
segments = grid_image(height, width, pixels)
descriptor = np.array([])
for seg in segments:
descriptor = np.append(descriptor, rgb.extract_average_rgb(pixels=seg))
return descriptor
if images is not None:
length = len(images)
for index, image in enumerate(images):
logger.debug(f'generating {index} of {length}')
image.descriptor = extract(image.pixels)
return
elif image is not None:
image.descriptor = extract(image.pixels)
else:
return extract(pixels)

0
vision/maths/__init__.py Normal file
View File

View File

@ -0,0 +1,38 @@
from typing import List
from vision.model import Image
class PrecisionRecall:
def __init__(self, precision, recall, ap):
self.precision = precision
self.recall = recall
self.ap = ap
def get_precision(images: List[Image], test):
return len([i for i in images if i.category == test]) / len(images)
def get_recall(images: List[Image], test, category_count):
return len([i for i in images if i.category == test]) / category_count
def get_pr(images: List[Image], query: Image):
images = sorted(images, key=lambda x: x.distance)
query_category_count = len([i for i in images if i.category == query.category])
p = []
r = []
for i in range(len(images)):
p.append(get_precision(images[:i+1], query.category))
r.append(get_recall(images[:i+1], query.category, query_category_count))
precision_list = []
for index, image in enumerate(images):
if image.category == query.category:
precision_list.append(p[index])
ap = sum(precision_list) / query_category_count
return PrecisionRecall(precision=p, recall=r, ap=ap)

View File

@ -12,6 +12,12 @@ class Image:
self.name = name
self.descriptor = descriptor
self.distance = 0
def clear(self):
self.descriptor = None
self.distance = 0
@property
def shape(self):
return self.pixels.shape

59
vision/util/edge.py Normal file
View File

@ -0,0 +1,59 @@
from vision.model import Image
from typing import List
import numpy as np
from scipy.signal import convolve2d
import math as m
import cv2
class Edge:
def __init__(self, magnitude: np.array, angle: np.array):
self.magnitude = magnitude
self.angle = angle
def get_edge_angle_hist(edge: Edge, bins: int, threshold: float):
angle_vals = []
for i in range(edge.magnitude.shape[0]):
for j in range(edge.magnitude.shape[1]):
if edge.magnitude[i, j] > threshold:
bin_val = m.floor((edge.angle[i, j] / (2*np.pi)) * bins)
angle_vals.append(bin_val)
return np.histogram(angle_vals, bins=bins, density=True)
def get_edge_info(pixels: np.array = None,
image: Image = None,
images: List[Image] = None,
blur: bool = True):
if pixels is None and image is None and images is None:
raise KeyError('no image provided')
def extract(i):
kx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 4
ky = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]]) / 4
grey = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
if blur is True:
i = convolve2d(grey, np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) / 9)
dx = convolve2d(grey, kx)
dy = convolve2d(grey, ky)
mag = np.sqrt(dx**2 + dy**2)
angle = np.arctan2(dy, dx)
angle = (angle + 2 * np.pi) % (2 * np.pi)
return Edge(mag, angle)
if images is not None:
for image in images:
image.descriptor = extract(image.pixels)
return
elif image is not None:
return extract(image.pixels)
else:
return extract(pixels)

View File

@ -0,0 +1,46 @@
from typing import List
import random
import numpy as np
from vision.model import Image
import vision.maths.precision_recall as pr
import logging
logger = logging.getLogger(__name__)
class QueryResult:
def __init__(self,
sorted_images: List[Image],
query_image: Image,
precision_recall: pr.PrecisionRecall):
self.sorted_images = sorted_images
self.query_image = query_image
self.precision_recall = precision_recall
def run_query(images: List[Image], distance_measure=None, query_index=None):
logger.info(f'running query on {len(images)} images, query index {query_index}')
if query_index is not None:
query_image = images[query_index]
else:
query_image = random.choice(images)
if any(i for i in images if i.descriptor is None):
raise ValueError('descriptors required for all images')
for image in images:
if distance_measure is None:
image.distance = np.linalg.norm(image.descriptor-query_image.descriptor)
else:
image.distance = distance_measure(image.descriptor - query_image.descriptor)
images = [i for i in images if not (i.category == query_image.category and i.name == query_image.name)]
query_pr = pr.get_pr(images, query=query_image)
results = QueryResult(sorted_images=images,
query_image=query_image,
precision_recall=query_pr)
logger.info(f'query finished AP: {results.precision_recall.ap}')
return results