added cache, added audio feature prep
This commit is contained in:
parent
d46f441313
commit
7a9f1fab27
0
analysis/__init__.py
Normal file
0
analysis/__init__.py
Normal file
47
analysis/cache.py
Normal file
47
analysis/cache.py
Normal file
@ -0,0 +1,47 @@
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Cache:
|
||||
def __init__(self, cache):
|
||||
self.cache = cache
|
||||
|
||||
def set_track(self, name, artist, uri=None, audio_features=None):
|
||||
name = str(name).lower()
|
||||
artist = str(artist).lower()
|
||||
|
||||
if self.cache['cache'].get(artist) is None:
|
||||
self.cache['cache'][artist] = {name: {}}
|
||||
if self.cache['cache'][artist].get(name) is None:
|
||||
self.cache['cache'][artist][name] = {}
|
||||
|
||||
if uri is not None:
|
||||
self.cache['cache'][artist][name]['uri'] = uri
|
||||
if audio_features is not None:
|
||||
self.cache['cache'][artist][name]['features'] = audio_features
|
||||
|
||||
def get_track(self, name, artist):
|
||||
name = str(name)
|
||||
artist = str(artist)
|
||||
try:
|
||||
return self.cache['cache'][artist][name]
|
||||
except KeyError:
|
||||
return None
|
||||
|
||||
|
||||
def load_cache_from_storage(path: str = '.', name: str = 'cache.json'):
|
||||
|
||||
if os.path.exists(os.path.join(path, name)):
|
||||
with open(os.path.join(path, name), 'r') as file:
|
||||
return Cache(json.loads(file.read()))
|
||||
else:
|
||||
logger.error(f'{os.path.join(path, name)} does not exist')
|
||||
return {'cache': {}}
|
||||
|
||||
|
||||
def write_cache_to_storage(cache: Cache, path: str = '.', name: str = 'cache.json'):
|
||||
with open(os.path.join(path, name), 'w') as file:
|
||||
file.write(json.dumps(cache.cache))
|
@ -3,17 +3,91 @@ from fmframework.net.network import Network as FmNet
|
||||
from spotframework.net.network import Network as SpotNet
|
||||
from spotframework.net.user import NetworkUser
|
||||
from spotframework.model.uri import Uri
|
||||
from google.cloud import bigquery
|
||||
|
||||
from csv import DictWriter
|
||||
|
||||
import os
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
from log import logger
|
||||
|
||||
import analysis.cache
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
|
||||
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
|
||||
cache = analysis.cache.load_cache_from_storage()
|
||||
|
||||
client = bigquery.Client()
|
||||
|
||||
# Perform a query.
|
||||
QUERY = (
|
||||
'SELECT '
|
||||
' DISTINCT uri, track, album, artist '
|
||||
'FROM `sarsooxyz.scrobbles.*` '
|
||||
'WHERE '
|
||||
' uri IS NOT NULL '
|
||||
'ORDER BY artist '
|
||||
)
|
||||
logger.info('querying uris')
|
||||
query_job = client.query(QUERY)
|
||||
rows = query_job.result()
|
||||
|
||||
features = []
|
||||
for_pulling = []
|
||||
|
||||
logger.info('polling cache')
|
||||
for row in rows:
|
||||
cache_entry = cache.get_track(row.track, row.artist)
|
||||
|
||||
if cache_entry is not None:
|
||||
if cache_entry.get('features') is None:
|
||||
features.append(cache_entry)
|
||||
continue
|
||||
|
||||
for_pulling.append(row)
|
||||
|
||||
logger.info('pulling tracks')
|
||||
tracks = spotnet.get_tracks(uri_strings=[i.uri for i in for_pulling])
|
||||
|
||||
if tracks is not None:
|
||||
logger.info('populating features')
|
||||
tracks = spotnet.populate_track_audio_features(tracks)
|
||||
features += [i.audio_features.to_dict() for i in tracks if i.audio_features is not None]
|
||||
|
||||
logger.info('caching pulled')
|
||||
for cacheable in for_pulling:
|
||||
track = next((i for i in tracks if str(i.uri) == cacheable.uri), None)
|
||||
if track is not None and track.audio_features is not None:
|
||||
cache.set_track(name=cacheable.track, artist=cacheable.artist, audio_features=track.audio_features.to_dict())
|
||||
|
||||
logger.info('dumping')
|
||||
date = str(datetime.date.today())
|
||||
with open(f'{date}_features.csv', 'w', newline='') as fileobj:
|
||||
|
||||
headers = ['acousticness',
|
||||
'analysis_url',
|
||||
'danceability',
|
||||
'duration_ms',
|
||||
'energy',
|
||||
'uri',
|
||||
'instrumentalness',
|
||||
'key',
|
||||
'key_code',
|
||||
'liveness',
|
||||
'loudness',
|
||||
'mode',
|
||||
'speechiness',
|
||||
'tempo',
|
||||
'time_signature',
|
||||
'track_href',
|
||||
'valence']
|
||||
writer = DictWriter(fileobj, fieldnames=headers, dialect='excel-tab')
|
||||
writer.writeheader()
|
||||
|
||||
for feature in features:
|
||||
writer.writerow(feature)
|
||||
|
||||
analysis.cache.write_cache_to_storage(cache)
|
||||
|
32
cache-transform.py
Normal file
32
cache-transform.py
Normal file
@ -0,0 +1,32 @@
|
||||
import os, json, pprint
|
||||
|
||||
uri_cache_name = 'cache.json'
|
||||
if os.path.isfile(uri_cache_name):
|
||||
with open(uri_cache_name, 'r') as uri_cache:
|
||||
uris = json.loads(uri_cache.read())
|
||||
|
||||
new_cache = {
|
||||
'cache': {}
|
||||
}
|
||||
|
||||
for uri in uris:
|
||||
|
||||
try:
|
||||
new_cache['cache'][uri['artist']]
|
||||
except KeyError:
|
||||
new_cache['cache'][uri['artist']] = {}
|
||||
|
||||
try:
|
||||
new_cache['cache'][uri['artist']][uri['name']]
|
||||
except KeyError:
|
||||
new_cache['cache'][uri['artist']][uri['name']] = {}
|
||||
|
||||
new_cache['cache'][uri['artist']][uri['name']]['uri'] = uri['uri']
|
||||
|
||||
pprint.pprint(new_cache)
|
||||
|
||||
with open(uri_cache_name, 'w') as uri_cache:
|
||||
uri_cache.write(json.dumps(new_cache))
|
||||
|
||||
|
||||
|
@ -8,58 +8,46 @@ from csv import DictWriter
|
||||
|
||||
import os
|
||||
import datetime
|
||||
import json
|
||||
from log import logger
|
||||
|
||||
import analysis.cache
|
||||
|
||||
spotnet = SpotNet(NetworkUser(client_id=os.environ['SPOT_CLIENT'],
|
||||
client_secret=os.environ['SPOT_SECRET'],
|
||||
refresh_token=os.environ['SPOT_REFRESH']).refresh_access_token())
|
||||
fmnet = FmNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
|
||||
# initialise cache
|
||||
uri_cache_name = 'uris.json'
|
||||
if os.path.isfile(uri_cache_name):
|
||||
with open(uri_cache_name, 'r') as uri_cache:
|
||||
uris = json.loads(uri_cache.read())
|
||||
else:
|
||||
uris = []
|
||||
cache = analysis.cache.load_cache_from_storage()
|
||||
|
||||
# scrobble range
|
||||
from_date = datetime.datetime(year=2018, month=1, day=1)
|
||||
to_date = datetime.datetime(year=2019, month=1, day=1)
|
||||
from_date = datetime.datetime(year=2019, month=1, day=1)
|
||||
to_date = datetime.datetime(year=2020, month=1, day=1)
|
||||
|
||||
scrobbles = fmnet.get_recent_tracks(from_time=from_date, to_time=to_date, page_limit=200)
|
||||
|
||||
# populate with uris
|
||||
for scrobble in scrobbles:
|
||||
|
||||
cache_entry = [i for i in uris if
|
||||
i['name'] == scrobble.track.name.lower() and
|
||||
i['artist'] == scrobble.track.artist.name.lower()]
|
||||
cache_entry = cache.get_track(name=scrobble.track.name.lower(), artist=scrobble.track.artist.name.lower())
|
||||
|
||||
# check cache
|
||||
if len(cache_entry) == 0:
|
||||
if cache_entry is not None and cache_entry.get('uri'):
|
||||
scrobble.uri = cache_entry.get('uri')
|
||||
else:
|
||||
logger.info(f'pulling {scrobble.track}')
|
||||
spotify_search = spotnet.search(query_types=[Uri.ObjectType.track],
|
||||
track=scrobble.track.name,
|
||||
artist=scrobble.track.artist.name,
|
||||
response_limit=5).tracks
|
||||
if len(spotify_search) > 0:
|
||||
uris.append({
|
||||
'name': scrobble.track.name.lower(),
|
||||
'artist': scrobble.track.artist.name.lower(),
|
||||
'uri': str(spotify_search[0].uri)
|
||||
})
|
||||
scrobble.uri = spotify_search[0].uri
|
||||
cache.set_track(name=scrobble.track.name.lower(),
|
||||
artist=scrobble.track.artist.name.lower(),
|
||||
uri=str(spotify_search[0].uri))
|
||||
scrobble.uri = str(spotify_search[0].uri)
|
||||
else:
|
||||
logger.debug('no search tracks returned')
|
||||
scrobble.uri = None
|
||||
|
||||
# cache entry available
|
||||
else:
|
||||
# logger.info(f'{scrobble.track} found in cache')
|
||||
scrobble.uri = cache_entry[0]['uri']
|
||||
|
||||
date = str(datetime.date.today())
|
||||
with open(f'{date}_scrobbles.csv', 'w', newline='') as fileobj:
|
||||
|
||||
@ -79,5 +67,4 @@ with open(f'{date}_scrobbles.csv', 'w', newline='') as fileobj:
|
||||
'uri': str(scrobble.uri) if scrobble.uri is not None else ''
|
||||
})
|
||||
|
||||
with open(uri_cache_name, 'w') as uri_cache:
|
||||
uri_cache.write(json.dumps(uris))
|
||||
analysis.cache.write_cache_to_storage(cache)
|
||||
|
Loading…
Reference in New Issue
Block a user