fixed query numbers, quick cache for playlists
This commit is contained in:
parent
0202649cfa
commit
5e703b011f
309
album.ipynb
Normal file
309
album.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -10,7 +10,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.4-final"
|
||||
"version": "3.8.6-final"
|
||||
},
|
||||
"orig_nbformat": 2,
|
||||
"kernelspec": {
|
||||
@ -291,7 +291,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -324,11 +324,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scrobbles = get_query()"
|
||||
"scrobbles = get_query(cache=cache)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -340,11 +340,11 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scrobbles.to_csv(cache, sep='\\t')"
|
||||
"scrobbles.reset_index().to_csv(cache, sep='\\t')"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
@ -1,6 +1,9 @@
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import pandas as pd
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
float_headers = ["acousticness", "danceability", "energy", "instrumentalness", "liveness", "speechiness", "valence"]
|
||||
spotify_descriptor_headers = ["duration_ms", "mode", "loudness", "key", "tempo", "time_signature"] + float_headers
|
||||
@ -28,5 +31,6 @@ def init_log():
|
||||
spotfm_logger.addHandler(stream_handler)
|
||||
|
||||
def days_since(in_date):
|
||||
now = datetime.now()
|
||||
# only using up to end of 2020 in dataset at the moment
|
||||
now = datetime(year=2021, month=1, day=1)
|
||||
return now - in_date
|
@ -15,11 +15,19 @@ def get_spotnet():
|
||||
def get_fmnet():
|
||||
return FMNet(username='sarsoo', api_key=os.environ['FM_CLIENT'])
|
||||
|
||||
playlist_cache = dict() # low-tech caches for repeated pulling
|
||||
all_playlists = list()
|
||||
def get_playlist(name: str, spotnet: SpotNet):
|
||||
playlists = spotnet.playlists()
|
||||
playlist = [i for i in playlists if i.name == name][0]
|
||||
playlist.tracks = spotnet.playlist_tracks(uri=playlist.uri)
|
||||
return playlist
|
||||
global all_playlists
|
||||
try:
|
||||
return playlist_cache[name]
|
||||
except KeyError:
|
||||
if len(all_playlists) == 0:
|
||||
all_playlists = spotnet.playlists()
|
||||
playlist = [i for i in all_playlists if i.name == name][0]
|
||||
playlist.tracks = spotnet.playlist_tracks(uri=playlist.uri)
|
||||
playlist_cache[name] = playlist
|
||||
return playlist
|
||||
|
||||
def track_frame(tracks: List[PlaylistTrack]):
|
||||
return pd.DataFrame(
|
||||
|
@ -6,7 +6,7 @@ client = bigquery.Client()
|
||||
|
||||
def all_joined(limit: int = 200):
|
||||
query = (
|
||||
'SELECT '
|
||||
'SELECT DISTINCT'
|
||||
' Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '
|
||||
' Features.acousticness, Features.danceability, Features.duration_ms, '
|
||||
' Features.energy, Features.instrumentalness, Features.key, Features.liveness, '
|
||||
@ -14,7 +14,7 @@ def all_joined(limit: int = 200):
|
||||
' Features.time_signature, Features.valence '
|
||||
|
||||
'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
|
||||
'INNER JOIN `sarsooxyz.audio_features.features` AS Features '
|
||||
'LEFT JOIN `sarsooxyz.audio_features.features` AS Features '
|
||||
'ON Scrobbles.uri = Features.uri '
|
||||
)
|
||||
|
||||
@ -27,7 +27,11 @@ def get_query(pull=False, cache="query.csv"):
|
||||
if pull:
|
||||
scrobbles = all_joined(limit=-1) # load dataset as panda frame
|
||||
else:
|
||||
scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
|
||||
try:
|
||||
scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
|
||||
except FileNotFoundError:
|
||||
print(f'{cache} not found, pulling')
|
||||
scrobbles = all_joined(limit=-1) # load dataset as panda frame
|
||||
scrobbles['time'] = pd.to_datetime(scrobbles['time'])
|
||||
scrobbles = scrobbles.set_index('time')
|
||||
return scrobbles
|
146
artist.ipynb
146
artist.ipynb
File diff suppressed because one or more lines are too long
195
playlist.ipynb
195
playlist.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user