deduplicate script and scrobble while listening catching
This commit is contained in:
parent
c02fcb117f
commit
478f2eaa4d
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,8 @@
|
|||||||
venv
|
venv
|
||||||
__pycache__
|
__pycache__
|
||||||
*.csv
|
*.csv
|
||||||
|
*.build/
|
||||||
|
*.dist/
|
||||||
.idea
|
.idea
|
||||||
.fm
|
.fm
|
||||||
scratch.py
|
scratch.py
|
114
duplicate.py
Normal file
114
duplicate.py
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
from fmframework.net.network import Network, LastFMNetworkException
|
||||||
|
|
||||||
|
from csv import DictWriter
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
|
||||||
|
username = 'sarsoo'
|
||||||
|
|
||||||
|
logger = logging.getLogger('fmframework')
|
||||||
|
|
||||||
|
directory = '.fm'
|
||||||
|
|
||||||
|
if not os.path.exists(directory):
|
||||||
|
os.makedirs(directory)
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler(f"{directory}/deduplicate.log")
|
||||||
|
file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s - %(funcName)s - %(message)s'))
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
stream_handler = logging.StreamHandler()
|
||||||
|
stream_handler.setFormatter(logging.Formatter('%(levelname)s %(name)s:%(funcName)s - %(message)s'))
|
||||||
|
logger.addHandler(stream_handler)
|
||||||
|
|
||||||
|
|
||||||
|
# chunk scrobbles into successive groups of sample size
|
||||||
|
def neighbouring_scrobbles(scrobbles, sample_size):
|
||||||
|
|
||||||
|
if len(scrobbles) < sample_size:
|
||||||
|
logger.warning(f'less scrobbles than provided sample size {len(scrobbles)}/{sample_size}')
|
||||||
|
|
||||||
|
start_idx = 0
|
||||||
|
final_idx = min(sample_size, len(scrobbles) - 1)
|
||||||
|
|
||||||
|
while start_idx < len(scrobbles):
|
||||||
|
yield scrobbles[start_idx:final_idx]
|
||||||
|
start_idx += 1
|
||||||
|
final_idx = min(final_idx + 1, len(scrobbles) - 1)
|
||||||
|
|
||||||
|
|
||||||
|
def check_for_duplicates(fmkey, retrieval_limit):
|
||||||
|
net = Network(username=username, api_key=fmkey)
|
||||||
|
net.retry_counter = 20
|
||||||
|
|
||||||
|
try:
|
||||||
|
scrobbles = net.get_recent_tracks(limit=retrieval_limit, page_limit=200)
|
||||||
|
|
||||||
|
if not scrobbles:
|
||||||
|
logger.error('No scrobbles returned')
|
||||||
|
return
|
||||||
|
|
||||||
|
duplicates_found = []
|
||||||
|
for scrobble_group in neighbouring_scrobbles(scrobbles, 7):
|
||||||
|
for idx, to_check in enumerate(scrobble_group[1:]):
|
||||||
|
if scrobble_group[0].track == to_check.track:
|
||||||
|
duplicates_found.append((scrobble_group[0], to_check, idx + 1))
|
||||||
|
|
||||||
|
print(f'Found {len(duplicates_found)} duplicates')
|
||||||
|
print()
|
||||||
|
|
||||||
|
for duplicate in duplicates_found:
|
||||||
|
print(f'{duplicate[1].time} - {duplicate[0].time}, {duplicate[0].track}')
|
||||||
|
print(f'https://www.last.fm/user/{username}/library/music/'
|
||||||
|
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
|
||||||
|
f'{duplicate[0].track.name.replace(" ", "+")}')
|
||||||
|
print(f'https://www.last.fm/user/{username}/library'
|
||||||
|
f'?from={duplicate[0].time.strftime("%Y-%m-%d")}'
|
||||||
|
f'&to={duplicate[1].time.strftime("%Y-%m-%d")}')
|
||||||
|
print()
|
||||||
|
|
||||||
|
headers = ['initial', 'duplicate', 'scrobble difference', 'difference minutes', 'track',
|
||||||
|
'album', 'artist', 'track url', 'scrobbles url']
|
||||||
|
with open('duplicates.csv', 'w', newline='', encoding='utf-16') as fileobj:
|
||||||
|
|
||||||
|
writer = DictWriter(fileobj, fieldnames=headers)
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
for duplicate in duplicates_found:
|
||||||
|
writer.writerow({
|
||||||
|
'initial': duplicate[1].time,
|
||||||
|
'duplicate': duplicate[0].time,
|
||||||
|
'scrobble difference': duplicate[2],
|
||||||
|
'difference minutes': (duplicate[0].time - duplicate[1].time).total_seconds() / 60,
|
||||||
|
'track': duplicate[0].track.name,
|
||||||
|
'album': duplicate[0].track.album.name,
|
||||||
|
'artist': duplicate[0].track.artist.name,
|
||||||
|
'track url': f'https://www.last.fm/user/{username}/library/music/'
|
||||||
|
f'{duplicate[0].track.artist.name.replace(" ", "+")}/_/'
|
||||||
|
f'{duplicate[0].track.name.replace(" ", "+")}',
|
||||||
|
'scrobbles url': f'https://www.last.fm/user/{username}/library'
|
||||||
|
f'?from={duplicate[1].time.strftime("%Y-%m-%d")}'
|
||||||
|
f'&to={duplicate[0].time.strftime("%Y-%m-%d")}'
|
||||||
|
})
|
||||||
|
|
||||||
|
except LastFMNetworkException:
|
||||||
|
logger.exception('error during scrobble retrieval')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
key = os.environ.get('FMKEY')
|
||||||
|
if key is None:
|
||||||
|
key = input('enter Last.fm key: ')
|
||||||
|
|
||||||
|
limit = input('limit? (0 for none): ')
|
||||||
|
|
||||||
|
if limit.isdigit():
|
||||||
|
limit = int(limit)
|
||||||
|
if limit == 0:
|
||||||
|
limit = None
|
||||||
|
else:
|
||||||
|
print('not a number, setting to none')
|
||||||
|
limit = None
|
||||||
|
|
||||||
|
check_for_duplicates(key, limit)
|
||||||
|
input('done, hit key to quit...')
|
@ -47,21 +47,29 @@ class LastFM:
|
|||||||
return self.name
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass(eq=False)
|
||||||
class Artist(LastFM):
|
class Artist(LastFM):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'{self.name}'
|
return f'{self.name}'
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.__class__ == other.__class__ and self.name == other.name
|
||||||
|
|
||||||
@dataclass
|
|
||||||
|
@dataclass(eq=False)
|
||||||
class Album(LastFM):
|
class Album(LastFM):
|
||||||
artist: Artist = None
|
artist: Artist = None
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'{self.name} / {self.artist}'
|
return f'{self.name} / {self.artist}'
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.__class__ == other.__class__ \
|
||||||
|
and \
|
||||||
|
(self.name, self.artist) == (other.name, other.artist)
|
||||||
|
|
||||||
@dataclass
|
|
||||||
|
@dataclass(eq=False)
|
||||||
class Track(LastFM):
|
class Track(LastFM):
|
||||||
album: Album = None
|
album: Album = None
|
||||||
artist: Artist = None
|
artist: Artist = None
|
||||||
@ -69,6 +77,11 @@ class Track(LastFM):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'{self.name} / {self.album} / {self.artist}'
|
return f'{self.name} / {self.album} / {self.artist}'
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.__class__ == other.__class__ \
|
||||||
|
and \
|
||||||
|
(self.name, self.album, self.artist) == (other.name, self.album, other.artist)
|
||||||
|
|
||||||
|
|
||||||
class WeeklyChart:
|
class WeeklyChart:
|
||||||
def __init__(self, from_time, to_time):
|
def __init__(self, from_time, to_time):
|
||||||
@ -94,3 +107,8 @@ class Scrobble:
|
|||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.track
|
return self.track
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.__class__ == other.__class__ \
|
||||||
|
and \
|
||||||
|
(self.track, self.time) == (other.track, self.time)
|
||||||
|
@ -3,6 +3,7 @@ from dataclasses import dataclass
|
|||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
import logging
|
import logging
|
||||||
|
from time import sleep
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from datetime import datetime, date, time, timedelta
|
from datetime import datetime, date, time, timedelta
|
||||||
|
|
||||||
@ -59,6 +60,7 @@ class Network:
|
|||||||
|
|
||||||
if 200 <= response.status_code < 300:
|
if 200 <= response.status_code < 300:
|
||||||
logger.debug(f'{http_method} {method} {response.status_code}')
|
logger.debug(f'{http_method} {method} {response.status_code}')
|
||||||
|
self.retry_counter = 0
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
code = resp.get('error', None)
|
code = resp.get('error', None)
|
||||||
@ -68,6 +70,7 @@ class Network:
|
|||||||
if code in [8, 11, 16]:
|
if code in [8, 11, 16]:
|
||||||
if self.retry_counter < 5:
|
if self.retry_counter < 5:
|
||||||
self.retry_counter += 1
|
self.retry_counter += 1
|
||||||
|
sleep(2)
|
||||||
logger.warning(f'{method} {response.status_code} {code} {message} retyring')
|
logger.warning(f'{method} {response.status_code} {code} {message} retyring')
|
||||||
return self.net_call(http_method=http_method,
|
return self.net_call(http_method=http_method,
|
||||||
method=method,
|
method=method,
|
||||||
@ -134,11 +137,7 @@ class Network:
|
|||||||
|
|
||||||
items = iterator.items
|
items = iterator.items
|
||||||
|
|
||||||
if len(items) >= 1:
|
return [self.parse_scrobble(i) for i in items[:limit] if i.get('date')]
|
||||||
if items[0].get('@attr', {}).get('nowplaying', None):
|
|
||||||
items.pop(0)
|
|
||||||
|
|
||||||
return [self.parse_scrobble(i) for i in items[:limit]]
|
|
||||||
|
|
||||||
def get_scrobbles_from_date(self,
|
def get_scrobbles_from_date(self,
|
||||||
input_date: date,
|
input_date: date,
|
||||||
@ -423,7 +422,7 @@ class PageCollection:
|
|||||||
self.method = method
|
self.method = method
|
||||||
self.params = params
|
self.params = params
|
||||||
self.pages: List[Page] = []
|
self.pages: List[Page] = []
|
||||||
self.page_limit = page_limit
|
self.page_limit = min(page_limit, 200)
|
||||||
self.response_limit = response_limit
|
self.response_limit = response_limit
|
||||||
self.counter = 0
|
self.counter = 0
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user