restructured, added notebooks, refreshed data, poetry
This commit is contained in:
parent
7a9f1fab27
commit
8ef8536213
4
.gitignore
vendored
4
.gitignore
vendored
@ -4,4 +4,6 @@ __pycache__
|
||||
.idea
|
||||
.fm
|
||||
scratch.py
|
||||
service.json
|
||||
service.json
|
||||
cache.json
|
||||
.env
|
14
README.md
14
README.md
@ -1,4 +1,12 @@
|
||||
listening analysis
|
||||
==================
|
||||
# Listening Analysis
|
||||
|
||||
performing analysis on listening habits using last.fm and spotify data
|
||||
Notebooks, [analysis](analysis.ipynb) and other [stats](stats.ipynb).
|
||||
|
||||
Combining Spotify & Last.fm data for exploring habits and trends
|
||||
Uses two data sources,
|
||||
|
||||
1. Last.fm scrobbles
|
||||
2. Spotify audio features
|
||||
|
||||
The two are joined by searching Last.fm tracks on Spotify to get a Uri, the track name and artist name are provided for the query.
|
||||
These Uris can be used to retrieve Spotify feature descriptors. `all_joined()` gets a BigQuery of that joins the scrobble time series with their audio features and provides this as a panda frame.
|
407
analysis.ipynb
Normal file
407
analysis.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -0,0 +1,27 @@
|
||||
import logging
|
||||
import pandas as pd
|
||||
|
||||
float_headers = ["acousticness", "danceability", "energy", "instrumentalness", "liveness", "speechiness", "valence"]
|
||||
descriptor_headers = ["duration_ms", "mode", "loudness", "key", "tempo", "time_signature"] + float_headers
|
||||
|
||||
def init_log():
|
||||
logger = logging.getLogger('listening')
|
||||
spotframework_logger = logging.getLogger('spotframework')
|
||||
fmframework_logger = logging.getLogger('fmframework')
|
||||
spotfm_logger = logging.getLogger('spotfm')
|
||||
|
||||
logger.setLevel('DEBUG')
|
||||
spotframework_logger.setLevel('WARNING')
|
||||
fmframework_logger.setLevel('WARNING')
|
||||
spotfm_logger.setLevel('WARNING')
|
||||
|
||||
log_format = '%(levelname)s %(name)s:%(funcName)s - %(message)s'
|
||||
formatter = logging.Formatter(log_format)
|
||||
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setFormatter(formatter)
|
||||
|
||||
logger.addHandler(stream_handler)
|
||||
spotframework_logger.addHandler(stream_handler)
|
||||
fmframework_logger.addHandler(stream_handler)
|
||||
spotfm_logger.addHandler(stream_handler)
|
@ -8,13 +8,17 @@ logger = logging.getLogger(__name__)
|
||||
class Cache:
|
||||
def __init__(self, cache):
|
||||
self.cache = cache
|
||||
# dictionary indexed by artist name followed by track name
|
||||
|
||||
def set_track(self, name, artist, uri=None, audio_features=None):
|
||||
name = str(name).lower()
|
||||
artist = str(artist).lower()
|
||||
|
||||
# ARTIST
|
||||
if self.cache['cache'].get(artist) is None:
|
||||
self.cache['cache'][artist] = {name: {}}
|
||||
|
||||
# TRACK
|
||||
if self.cache['cache'][artist].get(name) is None:
|
||||
self.cache['cache'][artist][name] = {}
|
||||
|
||||
@ -39,7 +43,7 @@ def load_cache_from_storage(path: str = '.', name: str = 'cache.json'):
|
||||
return Cache(json.loads(file.read()))
|
||||
else:
|
||||
logger.error(f'{os.path.join(path, name)} does not exist')
|
||||
return {'cache': {}}
|
||||
return Cache({'cache': {}})
|
||||
|
||||
|
||||
def write_cache_to_storage(cache: Cache, path: str = '.', name: str = 'cache.json'):
|
||||
|
27
analysis/net.py
Normal file
27
analysis/net.py
Normal file
@ -0,0 +1,27 @@
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from spotframework.model.track import PlaylistTrack
|
||||
from spotframework.net.network import Network as SpotNet, NetworkUser
|
||||
|
||||
def get_spotnet():
|
||||
return SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH'])).refresh_access_token()
|
||||
|
||||
def get_playlist(name: str, spotnet: SpotNet):
|
||||
playlists = spotnet.playlists()
|
||||
playlist = [i for i in playlists if i.name == name][0]
|
||||
playlist.tracks = spotnet.playlist_tracks(uri=playlist.uri)
|
||||
return playlist
|
||||
|
||||
def track_frame(tracks: List[PlaylistTrack]):
|
||||
return pd.DataFrame(
|
||||
[
|
||||
[i.track.name, i.track.artists[0].name]
|
||||
for i in tracks
|
||||
],
|
||||
columns = ["track", "artist"]
|
||||
)
|
102
analysis/prep/feature.py
Normal file
102
analysis/prep/feature.py
Normal file
@ -0,0 +1,102 @@
|
||||
import datetime
|
||||
import logging
|
||||
from csv import DictWriter
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
from analysis.cache import Cache
|
||||
|
||||
from fmframework.net import Network as FMNetwork
|
||||
from spotframework.net.network import Network as SpotNetwork
|
||||
from spotframework.model.uri import Uri
|
||||
|
||||
logger = logging.getLogger('listening')
|
||||
|
||||
def prepare_features(spotnet: SpotNetwork,
|
||||
fmnet: FMNetwork,
|
||||
cache: Cache,
|
||||
limit: int = None):
|
||||
features = populated_features(spotnet=spotnet,
|
||||
fmnet=fmnet,
|
||||
cache=cache,
|
||||
limit=limit)
|
||||
save_features(features)
|
||||
|
||||
def populated_features(spotnet: SpotNetwork,
|
||||
fmnet: FMNetwork,
|
||||
cache: Cache,
|
||||
limit: int = None):
|
||||
|
||||
client = bigquery.Client()
|
||||
|
||||
QUERY = (
|
||||
'SELECT '
|
||||
' DISTINCT uri, track, album, artist '
|
||||
'FROM `sarsooxyz.scrobbles.*` '
|
||||
'WHERE '
|
||||
' uri IS NOT NULL '
|
||||
'ORDER BY artist '
|
||||
)
|
||||
|
||||
if limit is not None:
|
||||
QUERY += f'LIMIT {limit} '
|
||||
|
||||
logger.info('querying uris')
|
||||
query_job = client.query(QUERY)
|
||||
rows = query_job.result()
|
||||
|
||||
features = []
|
||||
for_pulling = []
|
||||
|
||||
# HIT CACHE
|
||||
logger.info('polling cache')
|
||||
for row in rows:
|
||||
cache_entry = cache.get_track(row.track, row.artist)
|
||||
|
||||
try:
|
||||
feature = cache_entry['features']
|
||||
features.append(feature)
|
||||
except (KeyError, TypeError):
|
||||
for_pulling.append(row)
|
||||
|
||||
# GET SPOTIFY TRACKS
|
||||
logger.info('pulling tracks')
|
||||
tracks = spotnet.tracks(uris=[i.uri for i in for_pulling])
|
||||
|
||||
if tracks is not None:
|
||||
logger.info('populating features')
|
||||
tracks = spotnet.populate_track_audio_features(tracks)
|
||||
features += [i.audio_features.to_dict() for i in tracks if i.audio_features is not None]
|
||||
|
||||
logger.info('caching pulled')
|
||||
for cacheable in for_pulling:
|
||||
track = next((i for i in tracks if str(i.uri) == cacheable.uri), None)
|
||||
if track is not None and track.audio_features is not None:
|
||||
cache.set_track(name=cacheable.track, artist=cacheable.artist, audio_features=track.audio_features.to_dict())
|
||||
|
||||
return features
|
||||
|
||||
def save_features(features):
|
||||
date = str(datetime.datetime.now()).replace(':', '.')
|
||||
with open(f'{date}_features.csv', 'w', newline='', encoding='UTF-8') as fileobj:
|
||||
headers = ['acousticness',
|
||||
'analysis_url',
|
||||
'danceability',
|
||||
'duration_ms',
|
||||
'energy',
|
||||
'uri',
|
||||
'instrumentalness',
|
||||
'key',
|
||||
'liveness',
|
||||
'loudness',
|
||||
'mode',
|
||||
'speechiness',
|
||||
'tempo',
|
||||
'time_signature',
|
||||
'track_href',
|
||||
'valence']
|
||||
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
|
||||
writer.writeheader()
|
||||
|
||||
for feature in features:
|
||||
writer.writerow(feature)
|
80
analysis/prep/scrobble.py
Normal file
80
analysis/prep/scrobble.py
Normal file
@ -0,0 +1,80 @@
|
||||
import datetime
|
||||
import logging
|
||||
from csv import DictWriter
|
||||
|
||||
from analysis.cache import Cache
|
||||
|
||||
from fmframework.net import Network as FMNetwork
|
||||
from spotframework.net.network import Network as SpotNetwork
|
||||
from spotframework.model.uri import Uri
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def prepare_scrobbles(spotnet: SpotNetwork,
|
||||
fmnet: FMNetwork,
|
||||
cache: Cache,
|
||||
from_date: datetime.datetime = None,
|
||||
to_date: datetime.datetime = None,
|
||||
limit: int = None):
|
||||
scrobbles = populated_scrobbles(spotnet=spotnet,
|
||||
fmnet=fmnet,
|
||||
cache=cache,
|
||||
from_date=from_date,
|
||||
to_date=to_date,
|
||||
limit=limit)
|
||||
save_scrobbles(scrobbles)
|
||||
|
||||
def populated_scrobbles(spotnet: SpotNetwork,
|
||||
fmnet: FMNetwork,
|
||||
cache: Cache,
|
||||
from_date: datetime.datetime = None,
|
||||
to_date: datetime.datetime = None,
|
||||
limit: int = None):
|
||||
# get all scrobbles for date range
|
||||
scrobbles = fmnet.recent_tracks(limit=limit, from_time=from_date, to_time=to_date, page_limit=200)
|
||||
|
||||
# populate with uris
|
||||
for scrobble in scrobbles:
|
||||
|
||||
cache_entry = cache.get_track(name=scrobble.track.name.lower(), artist=scrobble.track.artist.name.lower())
|
||||
|
||||
if cache_entry is not None and cache_entry.get('uri'):
|
||||
# uri is cached
|
||||
scrobble.uri = cache_entry.get('uri')
|
||||
else:
|
||||
# cache missed or doesn't have uri
|
||||
logger.info(f'pulling {scrobble.track}')
|
||||
spotify_search = spotnet.search(query_types=[Uri.ObjectType.track],
|
||||
track=scrobble.track.name,
|
||||
artist=scrobble.track.artist.name,
|
||||
response_limit=5).tracks
|
||||
if len(spotify_search) > 0:
|
||||
cache.set_track(name=scrobble.track.name.lower(),
|
||||
artist=scrobble.track.artist.name.lower(),
|
||||
uri=str(spotify_search[0].uri))
|
||||
scrobble.uri = str(spotify_search[0].uri)
|
||||
else:
|
||||
logger.debug('no search tracks returned')
|
||||
scrobble.uri = None
|
||||
|
||||
return scrobbles
|
||||
|
||||
def save_scrobbles(scrobbles):
|
||||
date = str(datetime.datetime.now()).replace(':', '.')
|
||||
with open(f'{date}_scrobbles.csv', 'w', newline='', encoding='UTF-8') as fileobj:
|
||||
|
||||
headers = ['track', 'album', 'artist', 'time', 'track id', 'album id', 'artist id', 'uri']
|
||||
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
|
||||
writer.writeheader()
|
||||
|
||||
for scrobble in scrobbles:
|
||||
writer.writerow({
|
||||
'track': scrobble.track.name,
|
||||
'album': scrobble.track.album.name,
|
||||
'artist': scrobble.track.artist.name,
|
||||
'time': scrobble.time,
|
||||
'track id': scrobble.track.mbid,
|
||||
'album id': scrobble.track.album.mbid,
|
||||
'artist id': scrobble.track.artist.mbid,
|
||||
'uri': str(scrobble.uri) if scrobble.uri is not None else ''
|
||||
})
|
23
analysis/query.py
Normal file
23
analysis/query.py
Normal file
@ -0,0 +1,23 @@
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
client = bigquery.Client()
|
||||
|
||||
def all_joined(limit: int = 200):
|
||||
query = (
|
||||
'SELECT '
|
||||
' Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '
|
||||
' Features.acousticness, Features.danceability, Features.duration_ms, '
|
||||
' Features.energy, Features.instrumentalness, Features.key, Features.liveness, '
|
||||
' Features.loudness, Features.mode, Features.speechiness, Features.tempo, '
|
||||
' Features.time_signature, Features.valence '
|
||||
|
||||
'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
|
||||
'INNER JOIN `sarsooxyz.audio_features.features` AS Features '
|
||||
'ON Scrobbles.uri = Features.uri '
|
||||
)
|
||||
|
||||
if limit >= 0:
|
||||
query += f' LIMIT {limit}'
|
||||
|
||||
return client.query(query).to_dataframe()
|
@ -1,93 +0,0 @@
|
||||
from fmframework.net.network import Network as FmNet
|
||||
|
||||
from spotframework.net.network import Network as SpotNet
|
||||
from spotframework.net.user import NetworkUser
|
||||
from spotframework.model.uri import Uri
|
||||
from google.cloud import bigquery
|
||||
|
||||
from csv import DictWriter
|
||||
|
||||
import datetime
|
||||
import os
|
||||
from log import logger
|
||||
|
||||
import analysis.cache
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
|
||||
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
|
||||
cache = analysis.cache.load_cache_from_storage()
|
||||
|
||||
client = bigquery.Client()
|
||||
|
||||
# Perform a query.
|
||||
QUERY = (
|
||||
'SELECT '
|
||||
' DISTINCT uri, track, album, artist '
|
||||
'FROM `sarsooxyz.scrobbles.*` '
|
||||
'WHERE '
|
||||
' uri IS NOT NULL '
|
||||
'ORDER BY artist '
|
||||
)
|
||||
logger.info('querying uris')
|
||||
query_job = client.query(QUERY)
|
||||
rows = query_job.result()
|
||||
|
||||
features = []
|
||||
for_pulling = []
|
||||
|
||||
logger.info('polling cache')
|
||||
for row in rows:
|
||||
cache_entry = cache.get_track(row.track, row.artist)
|
||||
|
||||
if cache_entry is not None:
|
||||
if cache_entry.get('features') is None:
|
||||
features.append(cache_entry)
|
||||
continue
|
||||
|
||||
for_pulling.append(row)
|
||||
|
||||
logger.info('pulling tracks')
|
||||
tracks = spotnet.get_tracks(uri_strings=[i.uri for i in for_pulling])
|
||||
|
||||
if tracks is not None:
|
||||
logger.info('populating features')
|
||||
tracks = spotnet.populate_track_audio_features(tracks)
|
||||
features += [i.audio_features.to_dict() for i in tracks if i.audio_features is not None]
|
||||
|
||||
logger.info('caching pulled')
|
||||
for cacheable in for_pulling:
|
||||
track = next((i for i in tracks if str(i.uri) == cacheable.uri), None)
|
||||
if track is not None and track.audio_features is not None:
|
||||
cache.set_track(name=cacheable.track, artist=cacheable.artist, audio_features=track.audio_features.to_dict())
|
||||
|
||||
logger.info('dumping')
|
||||
date = str(datetime.date.today())
|
||||
with open(f'{date}_features.csv', 'w', newline='') as fileobj:
|
||||
|
||||
headers = ['acousticness',
|
||||
'analysis_url',
|
||||
'danceability',
|
||||
'duration_ms',
|
||||
'energy',
|
||||
'uri',
|
||||
'instrumentalness',
|
||||
'key',
|
||||
'key_code',
|
||||
'liveness',
|
||||
'loudness',
|
||||
'mode',
|
||||
'speechiness',
|
||||
'tempo',
|
||||
'time_signature',
|
||||
'track_href',
|
||||
'valence']
|
||||
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
|
||||
writer.writeheader()
|
||||
|
||||
for feature in features:
|
||||
writer.writerow(feature)
|
||||
|
||||
analysis.cache.write_cache_to_storage(cache)
|
@ -1,32 +0,0 @@
|
||||
import os, json, pprint
|
||||
|
||||
uri_cache_name = 'cache.json'
|
||||
if os.path.isfile(uri_cache_name):
|
||||
with open(uri_cache_name, 'r') as uri_cache:
|
||||
uris = json.loads(uri_cache.read())
|
||||
|
||||
new_cache = {
|
||||
'cache': {}
|
||||
}
|
||||
|
||||
for uri in uris:
|
||||
|
||||
try:
|
||||
new_cache['cache'][uri['artist']]
|
||||
except KeyError:
|
||||
new_cache['cache'][uri['artist']] = {}
|
||||
|
||||
try:
|
||||
new_cache['cache'][uri['artist']][uri['name']]
|
||||
except KeyError:
|
||||
new_cache['cache'][uri['artist']][uri['name']] = {}
|
||||
|
||||
new_cache['cache'][uri['artist']][uri['name']]['uri'] = uri['uri']
|
||||
|
||||
pprint.pprint(new_cache)
|
||||
|
||||
with open(uri_cache_name, 'w') as uri_cache:
|
||||
uri_cache.write(json.dumps(new_cache))
|
||||
|
||||
|
||||
|
2123
poetry.lock
generated
Normal file
2123
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
27
prep-audio-features.py
Normal file
27
prep-audio-features.py
Normal file
@ -0,0 +1,27 @@
|
||||
import datetime
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from fmframework.net.network import Network as FMNet
|
||||
from spotframework.net.network import Network as SpotNet, NetworkUser
|
||||
from spotframework.model.uri import Uri
|
||||
|
||||
from analysis.prep.feature import prepare_features
|
||||
from analysis.cache import load_cache_from_storage, write_cache_to_storage
|
||||
from analysis import init_log
|
||||
|
||||
load_dotenv()
|
||||
init_log()
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH'])).refresh_access_token()
|
||||
fmnet = FMNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
cache = load_cache_from_storage()
|
||||
|
||||
try:
|
||||
prepare_features(spotnet, fmnet, cache)
|
||||
except Exception as e:
|
||||
print(f"Error Occured: {e}")
|
||||
finally:
|
||||
write_cache_to_storage(cache)
|
33
prep-scrobbles.py
Normal file
33
prep-scrobbles.py
Normal file
@ -0,0 +1,33 @@
|
||||
import datetime
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from fmframework.net.network import Network as FMNet
|
||||
from spotframework.net.network import Network as SpotNet, NetworkUser
|
||||
from spotframework.model.uri import Uri
|
||||
|
||||
from analysis.prep.scrobble import prepare_scrobbles, populated_scrobbles
|
||||
from analysis.cache import load_cache_from_storage, write_cache_to_storage
|
||||
from analysis import init_log
|
||||
|
||||
load_dotenv()
|
||||
init_log()
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH'])).refresh_access_token()
|
||||
fmnet = FMNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
cache = load_cache_from_storage()
|
||||
|
||||
try:
|
||||
for year in range(2017, 2021):
|
||||
from_date = datetime.datetime(year=year, month=1, day=1)
|
||||
to_date = datetime.datetime(year=year + 1, month=1, day=1)
|
||||
|
||||
print(f"Getting {year}")
|
||||
|
||||
prepare_scrobbles(spotnet, fmnet, cache, from_date, to_date)
|
||||
except Exception as e:
|
||||
print(f"Error Occured: {e}")
|
||||
finally:
|
||||
write_cache_to_storage(cache)
|
29
pyproject.toml
Normal file
29
pyproject.toml
Normal file
@ -0,0 +1,29 @@
|
||||
[tool.poetry]
|
||||
name = "listening-analysis"
|
||||
version = "0.1.0"
|
||||
description = "Analysing listening habits using Spotify & Last.fm data"
|
||||
authors = ["andy <andy@sarsoo.xyz>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8,<3.10"
|
||||
spotframework = {path = "../spotframework"}
|
||||
fmframework = {path = "../fmframework"}
|
||||
numpy = "^1.20.0"
|
||||
pandas = "^1.2.1"
|
||||
opencv-python = "^4.5.1"
|
||||
ipykernel = "^5.4.3"
|
||||
jupyterlab = {version = "^3.0.6", optional = true}
|
||||
google-cloud-bigquery = "^2.7.0"
|
||||
python-dotenv = "^0.15.0"
|
||||
matplotlib = "^3.3.4"
|
||||
pyarrow = "^3.0.0"
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
pylint = "^2.6.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
jupyter = ["jupyterlab"]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
@ -1,22 +0,0 @@
|
||||
cachetools==4.0.0
|
||||
certifi==2019.11.28
|
||||
chardet==3.0.4
|
||||
Click==7.0
|
||||
google-api-core==1.16.0
|
||||
google-auth==1.11.0
|
||||
google-cloud-bigquery==1.23.1
|
||||
google-cloud-core==1.2.0
|
||||
google-resumable-media==0.5.0
|
||||
googleapis-common-protos==1.51.0
|
||||
idna==2.8
|
||||
numpy==1.18.1
|
||||
opencv-python==4.1.2.30
|
||||
protobuf==3.11.2
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pytz==2019.3
|
||||
requests==2.22.0
|
||||
rsa==4.0
|
||||
six==1.14.0
|
||||
tabulate==0.8.6
|
||||
urllib3==1.25.7
|
@ -1,70 +0,0 @@
|
||||
from fmframework.net.network import Network as FmNet
|
||||
|
||||
from spotframework.net.network import Network as SpotNet
|
||||
from spotframework.net.user import NetworkUser
|
||||
from spotframework.model.uri import Uri
|
||||
|
||||
from csv import DictWriter
|
||||
|
||||
import os
|
||||
import datetime
|
||||
from log import logger
|
||||
|
||||
import analysis.cache
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
|
||||
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
|
||||
# initialise cache
|
||||
cache = analysis.cache.load_cache_from_storage()
|
||||
|
||||
# scrobble range
|
||||
from_date = datetime.datetime(year=2019, month=1, day=1)
|
||||
to_date = datetime.datetime(year=2020, month=1, day=1)
|
||||
|
||||
scrobbles = fmnet.get_recent_tracks(from_time=from_date, to_time=to_date, page_limit=200)
|
||||
|
||||
# populate with uris
|
||||
for scrobble in scrobbles:
|
||||
|
||||
cache_entry = cache.get_track(name=scrobble.track.name.lower(), artist=scrobble.track.artist.name.lower())
|
||||
|
||||
if cache_entry is not None and cache_entry.get('uri'):
|
||||
scrobble.uri = cache_entry.get('uri')
|
||||
else:
|
||||
logger.info(f'pulling {scrobble.track}')
|
||||
spotify_search = spotnet.search(query_types=[Uri.ObjectType.track],
|
||||
track=scrobble.track.name,
|
||||
artist=scrobble.track.artist.name,
|
||||
response_limit=5).tracks
|
||||
if len(spotify_search) > 0:
|
||||
cache.set_track(name=scrobble.track.name.lower(),
|
||||
artist=scrobble.track.artist.name.lower(),
|
||||
uri=str(spotify_search[0].uri))
|
||||
scrobble.uri = str(spotify_search[0].uri)
|
||||
else:
|
||||
logger.debug('no search tracks returned')
|
||||
scrobble.uri = None
|
||||
|
||||
date = str(datetime.date.today())
|
||||
with open(f'{date}_scrobbles.csv', 'w', newline='') as fileobj:
|
||||
|
||||
headers = ['track', 'album', 'artist', 'time', 'track id', 'album id', 'artist id', 'uri']
|
||||
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
|
||||
writer.writeheader()
|
||||
|
||||
for scrobble in scrobbles:
|
||||
writer.writerow({
|
||||
'track': scrobble.track.name,
|
||||
'album': scrobble.track.album.name,
|
||||
'artist': scrobble.track.artist.name,
|
||||
'time': scrobble.time,
|
||||
'track id': scrobble.track.mbid,
|
||||
'album id': scrobble.track.album.mbid,
|
||||
'artist id': scrobble.track.artist.mbid,
|
||||
'uri': str(scrobble.uri) if scrobble.uri is not None else ''
|
||||
})
|
||||
|
||||
analysis.cache.write_cache_to_storage(cache)
|
103
stats.ipynb
Normal file
103
stats.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user