85 lines
3.0 KiB
Python
85 lines
3.0 KiB
Python
from bs4 import BeautifulSoup
|
|
import requests
|
|
from datetime import date
|
|
|
|
from fmframework.model import Album, Artist
|
|
from fmframework.net.network import Network, LastFMNetworkException
|
|
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_populated_album_chart(net: Network, username: str, from_date: date, to_date: date, limit: int):
|
|
"""Scrape chart from last.fm frontend before pulling each from the backend for a complete object"""
|
|
|
|
chart = get_scraped_album_chart(username or net.username, from_date, to_date, limit)
|
|
logger.info('populating scraped albums')
|
|
albums = []
|
|
for counter, scraped in enumerate(chart):
|
|
logger.debug(f'populating {counter+1} of {len(chart)}')
|
|
try:
|
|
albums.append(net.get_album(name=scraped.name, artist=scraped.artist.name))
|
|
except LastFMNetworkException:
|
|
logger.exception(f'error occured during album retrieval')
|
|
|
|
return albums
|
|
|
|
|
|
def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit: int):
|
|
"""Scrape 'light' objects from last.fm frontend based on date range and limit"""
|
|
|
|
logger.info(f'scraping album chart from {from_date} to {to_date} for {username}')
|
|
|
|
pages = int(limit / 50)
|
|
if limit % 50 != 0:
|
|
pages += 1
|
|
|
|
albums = []
|
|
for i in range(pages):
|
|
scraped_albums = get_scraped_album_chart_page(username, from_date, to_date, i + 1)
|
|
if scraped_albums is not None:
|
|
albums += scraped_albums
|
|
|
|
return albums[:limit]
|
|
|
|
|
|
def get_scraped_album_chart_page(username: str, from_date: date, to_date: date, page: int):
|
|
"""Scrape 'light' objects single page of last.fm frontend based on date range"""
|
|
|
|
logger.debug(f'loading page {page} from {from_date} to {to_date} for {username}')
|
|
|
|
html = requests.get(f'https://www.last.fm/user/{username}/library/albums'
|
|
f'?from={from_date.strftime("%Y-%m-%d")}'
|
|
f'&to={to_date.strftime("%Y-%m-%d")}'
|
|
f'&page={page}')
|
|
if 200 <= html.status_code < 300:
|
|
parser = BeautifulSoup(html.content, 'html.parser')
|
|
|
|
chart_section = parser.find('section', id='top-albums-section')
|
|
|
|
rows = chart_section.find_all('tr', 'chartlist-row')
|
|
|
|
albums = []
|
|
for row in rows:
|
|
names = row.find_all('a', title=True)
|
|
album_name = names[0]['title']
|
|
artist_name = names[1]['title']
|
|
|
|
scrobble_tag = row.find('span', {"class": "chartlist-count-bar-value"})
|
|
scrobble_count = [int(s) for s in scrobble_tag.contents[0].split() if s.isdigit()]
|
|
|
|
if len(scrobble_count) != 1:
|
|
logger.error('no scrobble count integers found')
|
|
scrobble_count = 0
|
|
else:
|
|
scrobble_count = scrobble_count[0]
|
|
|
|
artist = Artist(name=artist_name)
|
|
album = Album(name=album_name, artist=artist, user_scrobbles=scrobble_count)
|
|
albums.append(album)
|
|
|
|
return albums
|
|
else:
|
|
logger.error(f'HTTP error occurred {html.status_code}')
|