pyfmframework/fmframework/chart/__init__.py

85 lines
3.0 KiB
Python
Raw Normal View History

from bs4 import BeautifulSoup
import requests
from datetime import date
from fmframework.model import Album, Artist
from fmframework.net.network import Network, LastFMNetworkException
import logging
logger = logging.getLogger(__name__)
def get_populated_album_chart(net: Network, username: str, from_date: date, to_date: date, limit: int):
"""Scrape chart from last.fm frontend before pulling each from the backend for a complete object"""
chart = get_scraped_album_chart(username or net.username, from_date, to_date, limit)
logger.info('populating scraped albums')
albums = []
for counter, scraped in enumerate(chart):
2019-12-28 02:53:16 +00:00
logger.debug(f'populating {counter+1} of {len(chart)}')
try:
albums.append(net.get_album(name=scraped.name, artist=scraped.artist.name))
except LastFMNetworkException:
logger.exception(f'error occured during album retrieval')
return albums
def get_scraped_album_chart(username: str, from_date: date, to_date: date, limit: int):
"""Scrape 'light' objects from last.fm frontend based on date range and limit"""
logger.info(f'scraping album chart from {from_date} to {to_date} for {username}')
pages = int(limit / 50)
if limit % 50 != 0:
pages += 1
albums = []
for i in range(pages):
scraped_albums = get_scraped_album_chart_page(username, from_date, to_date, i + 1)
if scraped_albums is not None:
albums += scraped_albums
return albums[:limit]
def get_scraped_album_chart_page(username: str, from_date: date, to_date: date, page: int):
"""Scrape 'light' objects single page of last.fm frontend based on date range"""
logger.debug(f'loading page {page} from {from_date} to {to_date} for {username}')
html = requests.get(f'https://www.last.fm/user/{username}/library/albums'
f'?from={from_date.strftime("%Y-%m-%d")}'
f'&to={to_date.strftime("%Y-%m-%d")}'
f'&page={page}')
if 200 <= html.status_code < 300:
parser = BeautifulSoup(html.content, 'html.parser')
chart_section = parser.find('section', id='top-albums-section')
rows = chart_section.find_all('tr', 'chartlist-row')
albums = []
for row in rows:
names = row.find_all('a', title=True)
album_name = names[0]['title']
artist_name = names[1]['title']
scrobble_tag = row.find('span', {"class": "chartlist-count-bar-value"})
scrobble_count = [int(s) for s in scrobble_tag.contents[0].split() if s.isdigit()]
if len(scrobble_count) != 1:
logger.error('no scrobble count integers found')
scrobble_count = 0
else:
scrobble_count = scrobble_count[0]
artist = Artist(name=artist_name)
album = Album(name=album_name, artist=artist, user_scrobbles=scrobble_count)
albums.append(album)
return albums
else:
logger.error(f'HTTP error occurred {html.status_code}')