shift image processing and downloading into separate package, opencv not required for basic network operations

This commit is contained in:
aj 2020-07-17 12:40:31 +01:00
parent 948179ed5b
commit c02fcb117f
5 changed files with 207 additions and 129 deletions

View File

@ -2,33 +2,18 @@ from bs4 import BeautifulSoup
import requests import requests
from datetime import date from datetime import date
from fmframework.model import Album, Artist, Image from fmframework.model import Album, Artist
from fmframework.net.network import Network, LastFMNetworkException from fmframework.net.network import Network, LastFMNetworkException
import fmframework.image
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def get_album_chart_image(net: Network,
username: str,
from_date: date,
to_date: date,
limit: int = 20,
image_size: Image.Size = None,
image_width: int = 5):
album_chart = get_populated_album_chart(net=net, username=username,
from_date=from_date, to_date=to_date,
limit=limit)
return fmframework.image.get_image_grid_from_objects(net=net,
objects=album_chart,
image_size=image_size,
image_width=image_width)
def get_populated_album_chart(net: Network, username: str, from_date: date, to_date: date, limit: int): def get_populated_album_chart(net: Network, username: str, from_date: date, to_date: date, limit: int):
chart = get_scraped_album_chart(username, from_date, to_date, limit) """Scrape chart from last.fm frontend before pulling each from the backend for a complete object"""
chart = get_scraped_album_chart(username or net.username, from_date, to_date, limit)
logger.info('populating scraped albums') logger.info('populating scraped albums')
albums = [] albums = []
for counter, scraped in enumerate(chart): for counter, scraped in enumerate(chart):
@ -42,6 +27,8 @@ def get_populated_album_chart(net: Network, username: str, from_date: date, to_d
def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit: int): def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit: int):
"""Scrape 'light' objects from last.fm frontend based on date range and limit"""
logger.info(f'scraping album chart from {from_date} to {to_date} for {username}') logger.info(f'scraping album chart from {from_date} to {to_date} for {username}')
pages = int(limit / 50) pages = int(limit / 50)
@ -58,6 +45,8 @@ def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit
def get_scraped_album_chart_page(username: str, from_date: date, to_date: date, page: int): def get_scraped_album_chart_page(username: str, from_date: date, to_date: date, page: int):
"""Scrape 'light' objects single page of last.fm frontend based on date range"""
logger.debug(f'loading page {page} from {from_date} to {to_date} for {username}') logger.debug(f'loading page {page} from {from_date} to {to_date} for {username}')
html = requests.get(f'https://www.last.fm/user/{username}/library/albums' html = requests.get(f'https://www.last.fm/user/{username}/library/albums'

View File

@ -1,6 +1,10 @@
import numpy as np import numpy as np
from typing import List from typing import List
from fmframework.net.network import Network, ImageSizeNotAvailableException from datetime import date
from fmframework.net.network import Network
from fmframework.chart import get_populated_album_chart
from fmframework.image.downloader import Downloader, ImageSizeNotAvailableException
from fmframework.model import Image from fmframework.model import Image
import logging import logging
@ -35,18 +39,39 @@ def arrange_cover_grid(images: List[np.array], width: int = 5):
return final_img return final_img
def get_image_grid_from_objects(net: Network, objects, image_size=None, final_scale=(300, 300), image_width: int = 5): def get_image_grid_from_objects(objects,
logger.debug(f'getting {image_size.name if image_size is not None else "best"} image grid of {len(objects)} objects at width {image_width}') image_size=None,
final_scale=(300, 300),
image_width: int = 5,
overlay_count: bool = False,
loader=None,
check_cache=True,
cache=True):
logger.debug(f'getting {image_size.name if image_size is not None else "best"} image grid '
f'of {len(objects)} objects at width {image_width}')
if loader is None:
loader = Downloader()
images = [] images = []
for counter, iter_object in enumerate(objects): for counter, iter_object in enumerate(objects):
logger.debug(f'downloading image {counter+1} of {len(objects)}') logger.debug(f'downloading image {counter+1} of {len(objects)}')
try: try:
if image_size is None: if image_size is None:
downloaded = net.download_best_image(iter_object, final_scale=final_scale) downloaded = loader.download_best_image(iter_object,
final_scale=final_scale,
check_cache=check_cache,
cache=cache)
else: else:
downloaded = net.download_image_by_size(iter_object, size=image_size) downloaded = loader.download_image_by_size(iter_object,
size=image_size,
check_cache=check_cache,
cache=cache)
if downloaded is not None: if downloaded is not None:
if overlay_count:
loader.add_scrobble_count_to_image(downloaded, iter_object.user_scrobbles)
images.append(downloaded) images.append(downloaded)
else: else:
images.append(get_blank_image(final_scale[0], final_scale[1])) images.append(get_blank_image(final_scale[0], final_scale[1]))
@ -63,10 +88,47 @@ def chunk(l, n):
yield l[i:i+n] yield l[i:i+n]
def generate_album_chart_grid(net: Network, class AlbumChartCollage:
@staticmethod
def from_relative_range(net: Network,
chart_range: Network.Range, chart_range: Network.Range,
image_size: Image.Size = None, username: str = None,
limit: int = 20, limit: int = 20,
image_width: int = 5): overlay_count: bool = False,
chart = net.get_top_albums(period=chart_range, limit=limit) image_size: Image.Size = None,
return get_image_grid_from_objects(net=net, objects=chart, image_size=image_size, image_width=image_width) image_width: int = 5,
check_cache=True,
cache=True):
chart = net.get_top_albums(username=username,
period=chart_range,
limit=limit)
return get_image_grid_from_objects(objects=chart,
image_size=image_size,
image_width=image_width,
overlay_count=overlay_count,
check_cache=check_cache,
cache=cache)
@staticmethod
def from_dates(net: Network,
from_date: date,
to_date: date,
username: str = None,
limit: int = 20,
overlay_count: bool = False,
image_size: Image.Size = None,
image_width: int = 5,
check_cache=True,
cache=True):
chart = get_populated_album_chart(net=net,
username=username,
from_date=from_date,
to_date=to_date,
limit=limit)
return get_image_grid_from_objects(objects=chart,
image_size=image_size,
image_width=image_width,
overlay_count=overlay_count,
check_cache=check_cache,
cache=cache)

View File

@ -0,0 +1,120 @@
import logging
import os
from typing import Union
import requests
import cv2
import numpy as np
from fmframework.model import Album, Artist, Image, Track
from fmframework import config_directory
logger = logging.getLogger(__name__)
class ImageSizeNotAvailableException(Exception):
pass
class Downloader:
def __init__(self):
self.rsession = requests.Session()
self.cache_path = os.path.join(config_directory, 'cache')
def download_image_by_size(self,
fm_object: Union[Track, Album, Artist],
size: Image.Size,
check_cache=True,
cache=True):
try:
images = fm_object.images
image_pointer = next((i for i in images if i.size == size), None)
if image_pointer is not None:
return self.download_image(image_pointer=image_pointer, check_cache=check_cache, cache=cache)
else:
logger.error(f'image of size {size.name} not found')
raise ImageSizeNotAvailableException
except AttributeError:
logger.error(f'{fm_object} has no images')
def download_best_image(self,
fm_object: Union[Track, Album, Artist],
final_scale=None,
check_cache=True,
cache=True):
try:
images = sorted(fm_object.images, key=lambda x: x.size.value, reverse=True)
for image in images:
downloaded = self.download_image(image_pointer=image, check_cache=check_cache, cache=cache)
if downloaded is not None:
if final_scale is not None:
if downloaded.shape != final_scale:
downloaded = cv2.resize(downloaded, final_scale)
return downloaded
else:
logger.error('null image returned, iterating')
except AttributeError:
logger.error(f'{fm_object} has no images')
@staticmethod
def add_scrobble_count_to_image(image, count: int):
cv2.putText(image,
f'{count:,}',
(11, 36),
cv2.FONT_HERSHEY_DUPLEX,
1,
(0, 0, 0),
2)
cv2.putText(image,
f'{count:,}',
(11, 38),
cv2.FONT_HERSHEY_DUPLEX,
1,
(0, 0, 0),
2)
cv2.putText(image,
f'{count:,}',
(9, 35),
cv2.FONT_HERSHEY_DUPLEX,
1,
(255, 255, 255),
2)
def download_image(self, image_pointer: Image, check_cache=True, cache=True):
"""Perform network action to download Image object"""
logger.info(f'downloading {image_pointer.size.name} image - {image_pointer.link}')
# Check for valid link to download
if image_pointer.link is None or len(image_pointer.link) == 0 or image_pointer.link == '':
logger.error('invalid image url')
return None
url_split = image_pointer.link.split('/')
file_path = os.path.join(self.cache_path, url_split[-2] + url_split[-1])
if check_cache and os.path.exists(file_path):
return cv2.imread(file_path)
resp = self.rsession.get(image_pointer.link, stream=True)
if 200 <= resp.status_code < 300:
image = np.asarray(bytearray(resp.content), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
if image.any() and cache:
if not os.path.exists(self.cache_path):
os.makedirs(self.cache_path)
if not cv2.imwrite(filename=file_path, img=image):
logger.error('failed to dump to cache')
return image
else:
logger.error(f'http error {resp.status_code}')

View File

@ -1,25 +1,17 @@
import requests import requests
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, List, Union from typing import Optional, List
from copy import deepcopy from copy import deepcopy
import logging import logging
import os
from enum import Enum from enum import Enum
from datetime import datetime, date, time, timedelta from datetime import datetime, date, time, timedelta
import numpy as np
import cv2
from fmframework.model import Album, Artist, Image, Wiki, WeeklyChart, Scrobble, Track from fmframework.model import Album, Artist, Image, Wiki, WeeklyChart, Scrobble, Track
from fmframework import config_directory
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class ImageSizeNotAvailableException(Exception):
pass
@dataclass @dataclass
class LastFMNetworkException(Exception): class LastFMNetworkException(Exception):
http_code: int http_code: int
@ -276,93 +268,6 @@ class Network:
return [self.parse_artist(i) for i in iterator.items] return [self.parse_artist(i) for i in iterator.items]
def download_image_by_size(self, fm_object: Union[Track, Album, Artist], size: Image.Size):
try:
images = fm_object.images
image_pointer = next((i for i in images if i.size == size), None)
if image_pointer is not None:
return self.download_image(image_pointer=image_pointer)
else:
logger.error(f'image of size {size.name} not found')
raise ImageSizeNotAvailableException
except AttributeError:
logger.error(f'{fm_object} has no images')
def download_best_image(self, fm_object: Union[Track, Album, Artist], final_scale=None, add_count: bool = False):
try:
images = sorted(fm_object.images, key=lambda x: x.size.value, reverse=True)
for image in images:
downloaded = self.download_image(image_pointer=image)
if downloaded is not None:
if final_scale is not None:
if downloaded.shape != final_scale:
downloaded = cv2.resize(downloaded, final_scale)
if add_count:
self.add_scrobble_count_to_image(downloaded, fm_object.user_scrobbles)
return downloaded
else:
logger.error('null image returned, iterating')
except AttributeError:
logger.error(f'{fm_object} has no images')
@staticmethod
def add_scrobble_count_to_image(image, count: int):
cv2.putText(image,
f'{count:,}',
(11, 36),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 0),
2)
cv2.putText(image,
f'{count:,}',
(11, 38),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(0, 0, 0),
2)
cv2.putText(image,
f'{count:,}',
(9, 35),
cv2.FONT_HERSHEY_SIMPLEX,
1,
(255, 255, 255),
2)
@staticmethod
def download_image(image_pointer: Image, cache=True):
logger.info(f'downloading {image_pointer.size.name} image - {image_pointer.link}')
if image_pointer.link is None or len(image_pointer.link) == 0 or image_pointer.link == '':
logger.error('invalid image url')
return None
url_split = image_pointer.link.split('/')
cache_path = os.path.join(config_directory, 'cache')
file_path = os.path.join(cache_path, url_split[-2]+url_split[-1])
if os.path.exists(file_path):
return cv2.imread(file_path)
resp = requests.get(image_pointer.link, stream=True)
if 200 <= resp.status_code < 300:
image = np.asarray(bytearray(resp.content), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
if cache:
if not os.path.exists(cache_path):
os.makedirs(cache_path)
if not cv2.imwrite(filename=file_path, img=image):
logger.error('failed to dump to cache')
return image
else:
logger.error(f'http error {resp.status_code}')
def get_weekly_charts(self, username: str = None): def get_weekly_charts(self, username: str = None):
logger.info('getting weekly chart list') logger.info('getting weekly chart list')

View File

@ -1,7 +1,9 @@
beautifulsoup4==4.9.1
certifi==2020.6.20 certifi==2020.6.20
chardet==3.0.4 chardet==3.0.4
idna==2.10 idna==2.10
numpy==1.19.0 numpy==1.19.0
opencv-python==4.2.0.34 opencv-python==4.3.0.36
requests==2.24.0 requests==2.24.0
soupsieve==2.0.1
urllib3==1.25.9 urllib3==1.25.9