deduplicate script and scrobble while listening catching

This commit is contained in:
aj 2020-07-20 14:59:39 +01:00
parent c02fcb117f
commit 478f2eaa4d
4 changed files with 142 additions and 9 deletions

2
.gitignore vendored
View File

@ -1,6 +1,8 @@
venv venv
__pycache__ __pycache__
*.csv *.csv
*.build/
*.dist/
.idea .idea
.fm .fm
scratch.py scratch.py

114
duplicate.py Normal file
View File

@ -0,0 +1,114 @@
from fmframework.net.network import Network, LastFMNetworkException
from csv import DictWriter
import os
import logging
username = 'sarsoo'
logger = logging.getLogger('fmframework')
directory = '.fm'
if not os.path.exists(directory):
os.makedirs(directory)
file_handler = logging.FileHandler(f"{directory}/deduplicate.log")
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s - %(funcName)s - %(message)s'))
logger.addHandler(file_handler)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(funcName)s - %(message)s'))
logger.addHandler(stream_handler)
# chunk scrobbles into successive groups of sample size
def neighbouring_scrobbles(scrobbles, sample_size):
if len(scrobbles) < sample_size:
logger.warning(f'less scrobbles than provided sample size {len(scrobbles)}/{sample_size}')
start_idx = 0
final_idx = min(sample_size, len(scrobbles) - 1)
while start_idx < len(scrobbles):
yield scrobbles[start_idx:final_idx]
start_idx += 1
final_idx = min(final_idx + 1, len(scrobbles) - 1)
def check_for_duplicates(fmkey, retrieval_limit):
net = Network(username=username, api_key=fmkey)
net.retry_counter = 20
try:
scrobbles = net.get_recent_tracks(limit=retrieval_limit, page_limit=200)
if not scrobbles:
logger.error('No scrobbles returned')
return
duplicates_found = []
for scrobble_group in neighbouring_scrobbles(scrobbles, 7):
for idx, to_check in enumerate(scrobble_group[1:]):
if scrobble_group[0].track == to_check.track:
duplicates_found.append((scrobble_group[0], to_check, idx + 1))
print(f'Found {len(duplicates_found)} duplicates')
print()
for duplicate in duplicates_found:
print(f'{duplicate[1].time} - {duplicate[0].time}, {duplicate[0].track}')
print(f'https://www.last.fm/user/{username}/library/music/'
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
f'{duplicate[0].track.name.replace(" ", "+")}')
print(f'https://www.last.fm/user/{username}/library'
f'?from={duplicate[0].time.strftime("%Y-%m-%d")}'
f'&to={duplicate[1].time.strftime("%Y-%m-%d")}')
print()
headers = ['initial', 'duplicate', 'scrobble difference', 'difference minutes', 'track',
'album', 'artist', 'track url', 'scrobbles url']
with open('duplicates.csv', 'w', newline='', encoding='utf-16') as fileobj:
writer = DictWriter(fileobj, fieldnames=headers)
writer.writeheader()
for duplicate in duplicates_found:
writer.writerow({
'initial': duplicate[1].time,
'duplicate': duplicate[0].time,
'scrobble difference': duplicate[2],
'difference minutes': (duplicate[0].time - duplicate[1].time).total_seconds() / 60,
'track': duplicate[0].track.name,
'album': duplicate[0].track.album.name,
'artist': duplicate[0].track.artist.name,
'track url': f'https://www.last.fm/user/{username}/library/music/'
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
f'{duplicate[0].track.name.replace(" ", "+")}',
'scrobbles url': f'https://www.last.fm/user/{username}/library'
f'?from={duplicate[1].time.strftime("%Y-%m-%d")}'
f'&to={duplicate[0].time.strftime("%Y-%m-%d")}'
})
except LastFMNetworkException:
logger.exception('error during scrobble retrieval')
if __name__ == '__main__':
key = os.environ.get('FMKEY')
if key is None:
key = input('enter Last.fm key: ')
limit = input('limit? (0 for none): ')
if limit.isdigit():
limit = int(limit)
if limit == 0:
limit = None
else:
print('not a number, setting to none')
limit = None
check_for_duplicates(key, limit)
input('done, hit key to quit...')

View File

@ -47,21 +47,29 @@ class LastFM:
return self.name return self.name
@dataclass @dataclass(eq=False)
class Artist(LastFM): class Artist(LastFM):
def __str__(self): def __str__(self):
return f'{self.name}' return f'{self.name}'
def __eq__(self, other):
return self.__class__ == other.__class__ and self.name == other.name
@dataclass
@dataclass(eq=False)
class Album(LastFM): class Album(LastFM):
artist: Artist = None artist: Artist = None
def __str__(self): def __str__(self):
return f'{self.name} / {self.artist}' return f'{self.name} / {self.artist}'
def __eq__(self, other):
return self.__class__ == other.__class__ \
and \
(self.name, self.artist) == (other.name, other.artist)
@dataclass
@dataclass(eq=False)
class Track(LastFM): class Track(LastFM):
album: Album = None album: Album = None
artist: Artist = None artist: Artist = None
@ -69,6 +77,11 @@ class Track(LastFM):
def __str__(self): def __str__(self):
return f'{self.name} / {self.album} / {self.artist}' return f'{self.name} / {self.album} / {self.artist}'
def __eq__(self, other):
return self.__class__ == other.__class__ \
and \
(self.name, self.album, self.artist) == (other.name, self.album, other.artist)
class WeeklyChart: class WeeklyChart:
def __init__(self, from_time, to_time): def __init__(self, from_time, to_time):
@ -94,3 +107,8 @@ class Scrobble:
def __str__(self): def __str__(self):
return self.track return self.track
def __eq__(self, other):
return self.__class__ == other.__class__ \
and \
(self.track, self.time) == (other.track, self.time)

View File

@ -3,6 +3,7 @@ from dataclasses import dataclass
from typing import Optional, List from typing import Optional, List
from copy import deepcopy from copy import deepcopy
import logging import logging
from time import sleep
from enum import Enum from enum import Enum
from datetime import datetime, date, time, timedelta from datetime import datetime, date, time, timedelta
@ -59,6 +60,7 @@ class Network:
if 200 <= response.status_code < 300: if 200 <= response.status_code < 300:
logger.debug(f'{http_method} {method} {response.status_code}') logger.debug(f'{http_method} {method} {response.status_code}')
self.retry_counter = 0
return resp return resp
code = resp.get('error', None) code = resp.get('error', None)
@ -68,6 +70,7 @@ class Network:
if code in [8, 11, 16]: if code in [8, 11, 16]:
if self.retry_counter < 5: if self.retry_counter < 5:
self.retry_counter += 1 self.retry_counter += 1
sleep(2)
logger.warning(f'{method} {response.status_code} {code} {message} retyring') logger.warning(f'{method} {response.status_code} {code} {message} retyring')
return self.net_call(http_method=http_method, return self.net_call(http_method=http_method,
method=method, method=method,
@ -134,11 +137,7 @@ class Network:
items = iterator.items items = iterator.items
if len(items) >= 1: return [self.parse_scrobble(i) for i in items[:limit] if i.get('date')]
if items[0].get('@attr', {}).get('nowplaying', None):
items.pop(0)
return [self.parse_scrobble(i) for i in items[:limit]]
def get_scrobbles_from_date(self, def get_scrobbles_from_date(self,
input_date: date, input_date: date,
@ -423,7 +422,7 @@ class PageCollection:
self.method = method self.method = method
self.params = params self.params = params
self.pages: List[Page] = [] self.pages: List[Page] = []
self.page_limit = page_limit self.page_limit = min(page_limit, 200)
self.response_limit = response_limit self.response_limit = response_limit
self.counter = 0 self.counter = 0