pyfmframework/fmframework/chart/__init__.py

85 lines
3.0 KiB
Python

from bs4 import BeautifulSoup
import requests
from datetime import date
from fmframework.model import Album, Artist
from fmframework.net.network import Network, LastFMNetworkException
import logging
logger = logging.getLogger(__name__)
def get_populated_album_chart(net: Network, username: str, from_date: date, to_date: date, limit: int):
"""Scrape chart from last.fm frontend before pulling each from the backend for a complete object"""
chart = get_scraped_album_chart(username or net.username, from_date, to_date, limit)
logger.info('populating scraped albums')
albums = []
for counter, scraped in enumerate(chart):
logger.debug(f'populating {counter+1} of {len(chart)}')
try:
albums.append(net.get_album(name=scraped.name, artist=scraped.artist.name))
except LastFMNetworkException:
logger.exception(f'error occured during album retrieval')
return albums
def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit: int):
"""Scrape 'light' objects from last.fm frontend based on date range and limit"""
logger.info(f'scraping album chart from {from_date} to {to_date} for {username}')
pages = int(limit / 50)
if limit % 50 != 0:
pages += 1
albums = []
for i in range(pages):
scraped_albums = get_scraped_album_chart_page(username, from_date, to_date, i + 1)
if scraped_albums is not None:
albums += scraped_albums
return albums[:limit]
def get_scraped_album_chart_page(username: str, from_date: date, to_date: date, page: int):
"""Scrape 'light' objects single page of last.fm frontend based on date range"""
logger.debug(f'loading page {page} from {from_date} to {to_date} for {username}')
html = requests.get(f'https://www.last.fm/user/{username}/library/albums'
f'?from={from_date.strftime("%Y-%m-%d")}'
f'&to={to_date.strftime("%Y-%m-%d")}'
f'&page={page}')
if 200 <= html.status_code < 300:
parser = BeautifulSoup(html.content, 'html.parser')
chart_section = parser.find('section', id='top-albums-section')
rows = chart_section.find_all('tr', 'chartlist-row')
albums = []
for row in rows:
names = row.find_all('a', title=True)
album_name = names[0]['title']
artist_name = names[1]['title']
scrobble_tag = row.find('span', {"class": "chartlist-count-bar-value"})
scrobble_count = [int(s) for s in scrobble_tag.contents[0].split() if s.isdigit()]
if len(scrobble_count) != 1:
logger.error('no scrobble count integers found')
scrobble_count = 0
else:
scrobble_count = scrobble_count[0]
artist = Artist(name=artist_name)
album = Album(name=album_name, artist=artist, user_scrobbles=scrobble_count)
albums.append(album)
return albums
else:
logger.error(f'HTTP error occurred {html.status_code}')