fixed query numbers, quick cache for playlists

This commit is contained in:
aj 2021-02-03 16:08:06 +00:00
parent 0202649cfa
commit 5e703b011f
7 changed files with 580 additions and 114 deletions

309
album.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -10,7 +10,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.4-final"
"version": "3.8.6-final"
},
"orig_nbformat": 2,
"kernelspec": {
@ -291,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -324,11 +324,11 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"scrobbles = get_query()"
"scrobbles = get_query(cache=cache)"
]
},
{
@ -340,11 +340,11 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"scrobbles.to_csv(cache, sep='\\t')"
"scrobbles.reset_index().to_csv(cache, sep='\\t')"
]
}
]

View File

@ -1,6 +1,9 @@
from datetime import datetime
import logging
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
float_headers = ["acousticness", "danceability", "energy", "instrumentalness", "liveness", "speechiness", "valence"]
spotify_descriptor_headers = ["duration_ms", "mode", "loudness", "key", "tempo", "time_signature"] + float_headers
@ -28,5 +31,6 @@ def init_log():
spotfm_logger.addHandler(stream_handler)
def days_since(in_date):
now = datetime.now()
# only using up to end of 2020 in dataset at the moment
now = datetime(year=2021, month=1, day=1)
return now - in_date

View File

@ -15,11 +15,19 @@ def get_spotnet():
def get_fmnet():
return FMNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
playlist_cache = dict() # low-tech caches for repeated pulling
all_playlists = list()
def get_playlist(name: str, spotnet: SpotNet):
playlists = spotnet.playlists()
playlist = [i for i in playlists if i.name == name][0]
playlist.tracks = spotnet.playlist_tracks(uri=playlist.uri)
return playlist
global all_playlists
try:
return playlist_cache[name]
except KeyError:
if len(all_playlists) == 0:
all_playlists = spotnet.playlists()
playlist = [i for i in all_playlists if i.name == name][0]
playlist.tracks = spotnet.playlist_tracks(uri=playlist.uri)
playlist_cache[name] = playlist
return playlist
def track_frame(tracks: List[PlaylistTrack]):
return pd.DataFrame(

View File

@ -6,7 +6,7 @@ client = bigquery.Client()
def all_joined(limit: int = 200):
query = (
'SELECT '
'SELECT DISTINCT'
' Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '
' Features.acousticness, Features.danceability, Features.duration_ms, '
' Features.energy, Features.instrumentalness, Features.key, Features.liveness, '
@ -14,7 +14,7 @@ def all_joined(limit: int = 200):
' Features.time_signature, Features.valence '
'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
'INNER JOIN `sarsooxyz.audio_features.features` AS Features '
'LEFT JOIN `sarsooxyz.audio_features.features` AS Features '
'ON Scrobbles.uri = Features.uri '
)
@ -27,7 +27,11 @@ def get_query(pull=False, cache="query.csv"):
if pull:
scrobbles = all_joined(limit=-1) # load dataset as panda frame
else:
scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
try:
scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
except FileNotFoundError:
print(f'{cache} not found, pulling')
scrobbles = all_joined(limit=-1) # load dataset as panda frame
scrobbles['time'] = pd.to_datetime(scrobbles['time'])
scrobbles = scrobbles.set_index('time')
return scrobbles

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long