listening-analysis/analysis/query.py


from google.cloud import bigquery
import pandas as pd

client = bigquery.Client()

def all_joined(limit: int = 200):
    query = (
        'SELECT DISTINCT' 
        '   Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '
        '   Features.acousticness, Features.danceability, Features.duration_ms, '
        '   Features.energy, Features.instrumentalness, Features.key, Features.liveness, '
        '   Features.loudness, Features.mode, Features.speechiness, Features.tempo, ' 
        '   Features.time_signature, Features.valence '

        'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
        'LEFT JOIN `sarsooxyz.audio_features.features` AS Features '
        'ON Scrobbles.uri = Features.uri '
    )

    if limit >= 0:
        query += f' LIMIT {limit}'

    return client.query(query).to_dataframe()

def get_query(pull=False, cache="query.csv"):
    if pull:
        scrobbles = all_joined(limit=-1) # load dataset as panda frame
    else:
        try:
            scrobbles = pd.read_csv(cache, sep='\t', index_col=0)
        except FileNotFoundError:
            print(f'{cache} not found, pulling')
            scrobbles = all_joined(limit=-1) # load dataset as panda frame
    scrobbles['time'] = pd.to_datetime(scrobbles['time'])
    scrobbles = scrobbles.set_index('time')
    return scrobbles
restructured, added notebooks, refreshed data, poetry 2021-02-01 01:37:22 +00:00
			`from google.cloud import bigquery`
added playlist and artists books 2021-02-01 21:43:27 +00:00			`import pandas as pd`
restructured, added notebooks, refreshed data, poetry 2021-02-01 01:37:22 +00:00
			`client = bigquery.Client()`

			`def all_joined(limit: int = 200):`
			`query = (`
fixed query numbers, quick cache for playlists 2021-02-03 16:08:06 +00:00			`'SELECT DISTINCT'`
restructured, added notebooks, refreshed data, poetry 2021-02-01 01:37:22 +00:00			`' Scrobbles.track, Scrobbles.album, Scrobbles.artist, Scrobbles.time, Scrobbles.uri, '`
			`' Features.acousticness, Features.danceability, Features.duration_ms, '`
			`' Features.energy, Features.instrumentalness, Features.key, Features.liveness, '`
			`' Features.loudness, Features.mode, Features.speechiness, Features.tempo, '`
			`' Features.time_signature, Features.valence '`

			'FROM `sarsooxyz.scrobbles.*` AS Scrobbles '
fixed query numbers, quick cache for playlists 2021-02-03 16:08:06 +00:00			'LEFT JOIN `sarsooxyz.audio_features.features` AS Features '
restructured, added notebooks, refreshed data, poetry 2021-02-01 01:37:22 +00:00			`'ON Scrobbles.uri = Features.uri '`
			`)`

			`if limit >= 0:`
			`query += f' LIMIT {limit}'`

added playlist and artists books 2021-02-01 21:43:27 +00:00			`return client.query(query).to_dataframe()`

			`def get_query(pull=False, cache="query.csv"):`
			`if pull:`
			`scrobbles = all_joined(limit=-1) # load dataset as panda frame`
			`else:`
fixed query numbers, quick cache for playlists 2021-02-03 16:08:06 +00:00			`try:`
			`scrobbles = pd.read_csv(cache, sep='\t', index_col=0)`
			`except FileNotFoundError:`
			`print(f'{cache} not found, pulling')`
			`scrobbles = all_joined(limit=-1) # load dataset as panda frame`
added playlist and artists books 2021-02-01 21:43:27 +00:00			`scrobbles['time'] = pd.to_datetime(scrobbles['time'])`
			`scrobbles = scrobbles.set_index('time')`
			`return scrobbles`