deduplicate script and scrobble while listening catching
This commit is contained in:
parent
c02fcb117f
commit
478f2eaa4d
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,8 @@
|
||||
venv
|
||||
__pycache__
|
||||
*.csv
|
||||
*.build/
|
||||
*.dist/
|
||||
.idea
|
||||
.fm
|
||||
scratch.py
|
114
duplicate.py
Normal file
114
duplicate.py
Normal file
@ -0,0 +1,114 @@
|
||||
from fmframework.net.network import Network, LastFMNetworkException
|
||||
|
||||
from csv import DictWriter
|
||||
import os
|
||||
import logging
|
||||
|
||||
username = 'sarsoo'
|
||||
|
||||
logger = logging.getLogger('fmframework')
|
||||
|
||||
directory = '.fm'
|
||||
|
||||
if not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
|
||||
file_handler = logging.FileHandler(f"{directory}/deduplicate.log")
|
||||
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s - %(funcName)s - %(message)s'))
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
stream_handler = logging.StreamHandler()
|
||||
stream_handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(funcName)s - %(message)s'))
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
|
||||
# chunk scrobbles into successive groups of sample size
|
||||
def neighbouring_scrobbles(scrobbles, sample_size):
|
||||
|
||||
if len(scrobbles) < sample_size:
|
||||
logger.warning(f'less scrobbles than provided sample size {len(scrobbles)}/{sample_size}')
|
||||
|
||||
start_idx = 0
|
||||
final_idx = min(sample_size, len(scrobbles) - 1)
|
||||
|
||||
while start_idx < len(scrobbles):
|
||||
yield scrobbles[start_idx:final_idx]
|
||||
start_idx += 1
|
||||
final_idx = min(final_idx + 1, len(scrobbles) - 1)
|
||||
|
||||
|
||||
def check_for_duplicates(fmkey, retrieval_limit):
|
||||
net = Network(username=username, api_key=fmkey)
|
||||
net.retry_counter = 20
|
||||
|
||||
try:
|
||||
scrobbles = net.get_recent_tracks(limit=retrieval_limit, page_limit=200)
|
||||
|
||||
if not scrobbles:
|
||||
logger.error('No scrobbles returned')
|
||||
return
|
||||
|
||||
duplicates_found = []
|
||||
for scrobble_group in neighbouring_scrobbles(scrobbles, 7):
|
||||
for idx, to_check in enumerate(scrobble_group[1:]):
|
||||
if scrobble_group[0].track == to_check.track:
|
||||
duplicates_found.append((scrobble_group[0], to_check, idx + 1))
|
||||
|
||||
print(f'Found {len(duplicates_found)} duplicates')
|
||||
print()
|
||||
|
||||
for duplicate in duplicates_found:
|
||||
print(f'{duplicate[1].time} - {duplicate[0].time}, {duplicate[0].track}')
|
||||
print(f'https://www.last.fm/user/{username}/library/music/'
|
||||
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
|
||||
f'{duplicate[0].track.name.replace(" ", "+")}')
|
||||
print(f'https://www.last.fm/user/{username}/library'
|
||||
f'?from={duplicate[0].time.strftime("%Y-%m-%d")}'
|
||||
f'&to={duplicate[1].time.strftime("%Y-%m-%d")}')
|
||||
print()
|
||||
|
||||
headers = ['initial', 'duplicate', 'scrobble difference', 'difference minutes', 'track',
|
||||
'album', 'artist', 'track url', 'scrobbles url']
|
||||
with open('duplicates.csv', 'w', newline='', encoding='utf-16') as fileobj:
|
||||
|
||||
writer = DictWriter(fileobj, fieldnames=headers)
|
||||
writer.writeheader()
|
||||
|
||||
for duplicate in duplicates_found:
|
||||
writer.writerow({
|
||||
'initial': duplicate[1].time,
|
||||
'duplicate': duplicate[0].time,
|
||||
'scrobble difference': duplicate[2],
|
||||
'difference minutes': (duplicate[0].time - duplicate[1].time).total_seconds() / 60,
|
||||
'track': duplicate[0].track.name,
|
||||
'album': duplicate[0].track.album.name,
|
||||
'artist': duplicate[0].track.artist.name,
|
||||
'track url': f'https://www.last.fm/user/{username}/library/music/'
|
||||
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
|
||||
f'{duplicate[0].track.name.replace(" ", "+")}',
|
||||
'scrobbles url': f'https://www.last.fm/user/{username}/library'
|
||||
f'?from={duplicate[1].time.strftime("%Y-%m-%d")}'
|
||||
f'&to={duplicate[0].time.strftime("%Y-%m-%d")}'
|
||||
})
|
||||
|
||||
except LastFMNetworkException:
|
||||
logger.exception('error during scrobble retrieval')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
key = os.environ.get('FMKEY')
|
||||
if key is None:
|
||||
key = input('enter Last.fm key: ')
|
||||
|
||||
limit = input('limit? (0 for none): ')
|
||||
|
||||
if limit.isdigit():
|
||||
limit = int(limit)
|
||||
if limit == 0:
|
||||
limit = None
|
||||
else:
|
||||
print('not a number, setting to none')
|
||||
limit = None
|
||||
|
||||
check_for_duplicates(key, limit)
|
||||
input('done, hit key to quit...')
|
@ -47,21 +47,29 @@ class LastFM:
|
||||
return self.name
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(eq=False)
|
||||
class Artist(LastFM):
|
||||
def __str__(self):
|
||||
return f'{self.name}'
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ and self.name == other.name
|
||||
|
||||
@dataclass
|
||||
|
||||
@dataclass(eq=False)
|
||||
class Album(LastFM):
|
||||
artist: Artist = None
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.name} / {self.artist}'
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ \
|
||||
and \
|
||||
(self.name, self.artist) == (other.name, other.artist)
|
||||
|
||||
@dataclass
|
||||
|
||||
@dataclass(eq=False)
|
||||
class Track(LastFM):
|
||||
album: Album = None
|
||||
artist: Artist = None
|
||||
@ -69,6 +77,11 @@ class Track(LastFM):
|
||||
def __str__(self):
|
||||
return f'{self.name} / {self.album} / {self.artist}'
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ \
|
||||
and \
|
||||
(self.name, self.album, self.artist) == (other.name, self.album, other.artist)
|
||||
|
||||
|
||||
class WeeklyChart:
|
||||
def __init__(self, from_time, to_time):
|
||||
@ -94,3 +107,8 @@ class Scrobble:
|
||||
|
||||
def __str__(self):
|
||||
return self.track
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.__class__ == other.__class__ \
|
||||
and \
|
||||
(self.track, self.time) == (other.track, self.time)
|
||||
|
@ -3,6 +3,7 @@ from dataclasses import dataclass
|
||||
from typing import Optional, List
|
||||
from copy import deepcopy
|
||||
import logging
|
||||
from time import sleep
|
||||
from enum import Enum
|
||||
from datetime import datetime, date, time, timedelta
|
||||
|
||||
@ -59,6 +60,7 @@ class Network:
|
||||
|
||||
if 200 <= response.status_code < 300:
|
||||
logger.debug(f'{http_method} {method} {response.status_code}')
|
||||
self.retry_counter = 0
|
||||
return resp
|
||||
|
||||
code = resp.get('error', None)
|
||||
@ -68,6 +70,7 @@ class Network:
|
||||
if code in [8, 11, 16]:
|
||||
if self.retry_counter < 5:
|
||||
self.retry_counter += 1
|
||||
sleep(2)
|
||||
logger.warning(f'{method} {response.status_code} {code} {message} retyring')
|
||||
return self.net_call(http_method=http_method,
|
||||
method=method,
|
||||
@ -134,11 +137,7 @@ class Network:
|
||||
|
||||
items = iterator.items
|
||||
|
||||
if len(items) >= 1:
|
||||
if items[0].get('@attr', {}).get('nowplaying', None):
|
||||
items.pop(0)
|
||||
|
||||
return [self.parse_scrobble(i) for i in items[:limit]]
|
||||
return [self.parse_scrobble(i) for i in items[:limit] if i.get('date')]
|
||||
|
||||
def get_scrobbles_from_date(self,
|
||||
input_date: date,
|
||||
@ -423,7 +422,7 @@ class PageCollection:
|
||||
self.method = method
|
||||
self.params = params
|
||||
self.pages: List[Page] = []
|
||||
self.page_limit = page_limit
|
||||
self.page_limit = min(page_limit, 200)
|
||||
self.response_limit = response_limit
|
||||
self.counter = 0
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user