use enum for time range, added scrobble count to returned tracks

This commit is contained in:
aj 2020-08-11 18:28:37 +01:00
parent ebe2fc94bf
commit cc67a08402

View File

@ -1,4 +1,5 @@
from datetime import date, datetime
from typing import Union
from bs4 import BeautifulSoup
from requests import Session
@ -11,9 +12,27 @@ import logging
logger = logging.getLogger(__name__)
class LibraryScraper:
rsession = Session()
@staticmethod
def api_date_range_to_url_string(period: Network.Range):
if period == Network.Range.WEEK:
return 'LAST_7_DAYS'
elif period == Network.Range.MONTH:
return 'LAST_30_DAYS'
elif period == Network.Range.QUARTER:
return 'LAST_90_DAYS'
elif period == Network.Range.HALFYEAR:
return 'LAST_180_DAYS'
elif period == Network.Range.YEAR:
return 'LAST_365_DAYS'
elif period == Network.Range.OVERALL:
return 'ALL'
else:
raise TypeError(f'invalid period provided, {period} / {type(period)}')
@staticmethod
def get_scrobbled_tracks(username: str, artist: str, net: Network = None, whole_track=True,
from_date: datetime = None, to_date: datetime = None,
@ -79,7 +98,6 @@ class LibraryScraper:
else:
logger.error(f'no tracks returned for page 1 of {artist} / {username}')
@staticmethod
def get_scrobbled_albums(username: str, artist: str, net: Network = None, whole_album=True,
from_date: datetime = None, to_date: datetime = None,
@ -292,7 +310,6 @@ class LibraryScraper:
scrobble_datetime = None
logger.error(f'{len(timestamp_parts)} timestamp parts found, {timestamp_parts}')
track_objects.append(Scrobble(track=Track(name=name_cell.string,
artist=Artist(name=artist),
album=Album(name=album_cell.string,
@ -301,6 +318,10 @@ class LibraryScraper:
time=scrobble_datetime)
)
length = len(track_objects)
for scrobble in track_objects:
scrobble.track.user_scrobbles = length
return track_objects
else:
logger.error(f'no scrobbles returned for page 1 of {track} / {artist} / {username}')
@ -314,7 +335,7 @@ class LibraryScraper:
include_pages=False,
from_date: datetime = None, to_date: datetime = None,
date_preset: str = None):
date_preset: Union[str, Network.Range] = None):
logger.debug(f'loading page {page} of {artist} for {username}')
url = f'https://www.last.fm/user/{username}/library/music/{parse.quote_plus(artist)}'
@ -332,13 +353,19 @@ class LibraryScraper:
if from_date and to_date:
url += f'&from={from_date.strftime("%Y-%m-%d")}&to={to_date.strftime("%Y-%m-%d")}'
elif date_preset:
if isinstance(date_preset, str):
date_preset = date_preset.strip().upper()
if date_preset not in ['LAST_7_DAYS', 'LAST_30_DAYS', 'LAST_90_DAYS',
'LAST_180_DAYS', 'LAST_365_DAYS', 'ALL']:
raise ValueError(f'date range {date_preset} not of allowed value')
url += f'&date_preset={date_preset}'
elif isinstance(date_preset, Network.Range):
url += f'&date_preset={LibraryScraper.api_date_range_to_url_string(date_preset)}'
else:
raise TypeError(f'invalid period provided, {date_preset} / {type(date_preset)}')
html = LibraryScraper.rsession.get(url)
if 200 <= html.status_code < 300:
@ -350,7 +377,7 @@ class LibraryScraper:
objs = [i for i in list_section.tbody.find_all('tr') if i.find('td', class_='chartlist-name')]
if include_pages:
return (objs, len(parser.find_all('li', class_='pagination-page')))
return objs, len(parser.find_all('li', class_='pagination-page'))
else:
return objs
else:
@ -359,6 +386,7 @@ class LibraryScraper:
else:
logger.error(f'HTTP error occurred {html.status_code}')
class UserScraper:
rsession = Session()