added cache, added audio feature prep

This commit is contained in:
aj 2020-01-25 20:20:17 +00:00
parent d46f441313
commit 7a9f1fab27
5 changed files with 169 additions and 29 deletions

0
analysis/__init__.py Normal file
View File

47
analysis/cache.py Normal file
View File

@ -0,0 +1,47 @@
import os
import logging
import json
logger = logging.getLogger(__name__)
class Cache:
def __init__(self, cache):
self.cache = cache
def set_track(self, name, artist, uri=None, audio_features=None):
name = str(name).lower()
artist = str(artist).lower()
if self.cache['cache'].get(artist) is None:
self.cache['cache'][artist] = {name: {}}
if self.cache['cache'][artist].get(name) is None:
self.cache['cache'][artist][name] = {}
if uri is not None:
self.cache['cache'][artist][name]['uri'] = uri
if audio_features is not None:
self.cache['cache'][artist][name]['features'] = audio_features
def get_track(self, name, artist):
name = str(name)
artist = str(artist)
try:
return self.cache['cache'][artist][name]
except KeyError:
return None
def load_cache_from_storage(path: str = '.', name: str = 'cache.json'):
if os.path.exists(os.path.join(path, name)):
with open(os.path.join(path, name), 'r') as file:
return Cache(json.loads(file.read()))
else:
logger.error(f'{os.path.join(path, name)} does not exist')
return {'cache': {}}
def write_cache_to_storage(cache: Cache, path: str = '.', name: str = 'cache.json'):
with open(os.path.join(path, name), 'w') as file:
file.write(json.dumps(cache.cache))

View File

@ -3,17 +3,91 @@ from fmframework.net.network import Network as FmNet
from spotframework.net.network import Network as SpotNet
from spotframework.net.user import NetworkUser
from spotframework.model.uri import Uri
from google.cloud import bigquery
from csv import DictWriter
import os
import datetime
import json
import os
from log import logger
import analysis.cache
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
client_secret=os.environ['SPOT_SECRET'],
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
cache = analysis.cache.load_cache_from_storage()
client = bigquery.Client()
# Perform a query.
QUERY = (
'SELECT '
' DISTINCT uri, track, album, artist '
'FROM `sarsooxyz.scrobbles.*` '
'WHERE '
' uri IS NOT NULL '
'ORDER BY artist '
)
logger.info('querying uris')
query_job = client.query(QUERY)
rows = query_job.result()
features = []
for_pulling = []
logger.info('polling cache')
for row in rows:
cache_entry = cache.get_track(row.track, row.artist)
if cache_entry is not None:
if cache_entry.get('features') is None:
features.append(cache_entry)
continue
for_pulling.append(row)
logger.info('pulling tracks')
tracks = spotnet.get_tracks(uri_strings=[i.uri for i in for_pulling])
if tracks is not None:
logger.info('populating features')
tracks = spotnet.populate_track_audio_features(tracks)
features += [i.audio_features.to_dict() for i in tracks if i.audio_features is not None]
logger.info('caching pulled')
for cacheable in for_pulling:
track = next((i for i in tracks if str(i.uri) == cacheable.uri), None)
if track is not None and track.audio_features is not None:
cache.set_track(name=cacheable.track, artist=cacheable.artist, audio_features=track.audio_features.to_dict())
logger.info('dumping')
date = str(datetime.date.today())
with open(f'{date}_features.csv', 'w', newline='') as fileobj:
headers = ['acousticness',
'analysis_url',
'danceability',
'duration_ms',
'energy',
'uri',
'instrumentalness',
'key',
'key_code',
'liveness',
'loudness',
'mode',
'speechiness',
'tempo',
'time_signature',
'track_href',
'valence']
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
writer.writeheader()
for feature in features:
writer.writerow(feature)
analysis.cache.write_cache_to_storage(cache)

32
cache-transform.py Normal file
View File

@ -0,0 +1,32 @@
import os, json, pprint
uri_cache_name = 'cache.json'
if os.path.isfile(uri_cache_name):
with open(uri_cache_name, 'r') as uri_cache:
uris = json.loads(uri_cache.read())
new_cache = {
'cache': {}
}
for uri in uris:
try:
new_cache['cache'][uri['artist']]
except KeyError:
new_cache['cache'][uri['artist']] = {}
try:
new_cache['cache'][uri['artist']][uri['name']]
except KeyError:
new_cache['cache'][uri['artist']][uri['name']] = {}
new_cache['cache'][uri['artist']][uri['name']]['uri'] = uri['uri']
pprint.pprint(new_cache)
with open(uri_cache_name, 'w') as uri_cache:
uri_cache.write(json.dumps(new_cache))

View File

@ -8,58 +8,46 @@ from csv import DictWriter
import os
import datetime
import json
from log import logger
import analysis.cache
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
client_secret=os.environ['SPOT_SECRET'],
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
# initialise cache
uri_cache_name = 'uris.json'
if os.path.isfile(uri_cache_name):
with open(uri_cache_name, 'r') as uri_cache:
uris = json.loads(uri_cache.read())
else:
uris = []
cache = analysis.cache.load_cache_from_storage()
# scrobble range
from_date = datetime.datetime(year=2018, month=1, day=1)
to_date = datetime.datetime(year=2019, month=1, day=1)
from_date = datetime.datetime(year=2019, month=1, day=1)
to_date = datetime.datetime(year=2020, month=1, day=1)
scrobbles = fmnet.get_recent_tracks(from_time=from_date, to_time=to_date, page_limit=200)
# populate with uris
for scrobble in scrobbles:
cache_entry = [i for i in uris if
i['name'] == scrobble.track.name.lower() and
i['artist'] == scrobble.track.artist.name.lower()]
cache_entry = cache.get_track(name=scrobble.track.name.lower(), artist=scrobble.track.artist.name.lower())
# check cache
if len(cache_entry) == 0:
if cache_entry is not None and cache_entry.get('uri'):
scrobble.uri = cache_entry.get('uri')
else:
logger.info(f'pulling {scrobble.track}')
spotify_search = spotnet.search(query_types=[Uri.ObjectType.track],
track=scrobble.track.name,
artist=scrobble.track.artist.name,
response_limit=5).tracks
if len(spotify_search) > 0:
uris.append({
'name': scrobble.track.name.lower(),
'artist': scrobble.track.artist.name.lower(),
'uri': str(spotify_search[0].uri)
})
scrobble.uri = spotify_search[0].uri
cache.set_track(name=scrobble.track.name.lower(),
artist=scrobble.track.artist.name.lower(),
uri=str(spotify_search[0].uri))
scrobble.uri = str(spotify_search[0].uri)
else:
logger.debug('no search tracks returned')
scrobble.uri = None
# cache entry available
else:
# logger.info(f'{scrobble.track} found in cache')
scrobble.uri = cache_entry[0]['uri']
date = str(datetime.date.today())
with open(f'{date}_scrobbles.csv', 'w', newline='') as fileobj:
@ -79,5 +67,4 @@ with open(f'{date}_scrobbles.csv', 'w', newline='') as fileobj:
'uri': str(scrobble.uri) if scrobble.uri is not None else ''
})
with open(uri_cache_name, 'w') as uri_cache:
uri_cache.write(json.dumps(uris))
analysis.cache.write_cache_to_storage(cache)