listening-analysis/analysis/query.py

33 lines
1.1 KiB
Python

from google.cloud import bigquery
import pandas as pd
client = bigquery.Client()
def all_joined(limit: int = 200):
query = (
'SELECT '
' Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '
' Features.acousticness, Features.danceability, Features.duration_ms, '
' Features.energy, Features.instrumentalness, Features.key, Features.liveness, '
' Features.loudness, Features.mode, Features.speechiness, Features.tempo, '
' Features.time_signature, Features.valence '
'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
'INNER JOIN `sarsooxyz.audio_features.features` AS Features '
'ON Scrobbles.uri = Features.uri '
)
if limit >= 0:
query += f' LIMIT {limit}'
return client.query(query).to_dataframe()
def get_query(pull=False, cache="query.csv"):
if pull:
scrobbles = all_joined(limit=-1) # load dataset as panda frame
else:
scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
scrobbles['time'] = pd.to_datetime(scrobbles['time'])
scrobbles = scrobbles.set_index('time')
return scrobbles