Selector/Selector.Data/HistoryPersister.cs

211 lines
6.3 KiB
C#
Raw Normal View History

2022-10-08 17:07:50 +01:00
using System.Text.Json;
using Microsoft.Extensions.Logging;
2022-10-10 11:47:50 +01:00
using Selector.Cache;
2022-10-08 17:07:50 +01:00
using Selector.Model;
2022-10-10 11:47:50 +01:00
using static SpotifyAPI.Web.PlaylistRemoveItemsRequest;
namespace Selector.Data;
public class HistoryPersisterConfig
{
public string Username { get; set; }
public bool InitialClear { get; set; } = true;
public bool Apply50PercentRule { get; set; } = false;
}
public class HistoryPersister
{
private HistoryPersisterConfig Config { get; set; }
private ApplicationDbContext Db { get; set; }
private DataJsonContext Json { get; set; }
2022-10-10 11:47:50 +01:00
private DurationPuller DurationPuller { get; set; }
private ILogger<HistoryPersister> Logger { get; set; }
2022-10-10 11:47:50 +01:00
private readonly Dictionary<string, int> Durations;
public HistoryPersister(
ApplicationDbContext db,
DataJsonContext json,
HistoryPersisterConfig config,
DurationPuller durationPuller = null,
ILogger<HistoryPersister> logger = null)
{
Config = config;
Db = db;
Json = json;
2022-10-10 11:47:50 +01:00
DurationPuller = durationPuller;
Logger = logger;
2022-10-10 11:47:50 +01:00
if (config.Apply50PercentRule && DurationPuller is null)
{
throw new ArgumentNullException(nameof(DurationPuller));
}
Durations = new();
}
public void Process(string input)
{
var parsed = JsonSerializer.Deserialize(input, Json.EndSongArray);
Process(parsed).Wait();
}
public async Task Process(Stream input)
{
var parsed = await JsonSerializer.DeserializeAsync(input, Json.EndSongArray);
await Process(parsed);
}
public async Task Process(IEnumerable<Stream> input)
{
var songs = Enumerable.Empty<EndSong>();
foreach(var singleInput in input)
{
var parsed = await JsonSerializer.DeserializeAsync(singleInput, Json.EndSongArray);
songs = songs.Concat(parsed);
2022-10-10 11:47:50 +01:00
Logger?.LogDebug("Parsed {:n0} items for {}", parsed.Length, Config.Username);
}
await Process(songs);
}
public async Task Process(IEnumerable<EndSong> input)
{
2022-10-10 11:47:50 +01:00
var user = Db.Users.Single(u => u.UserName == Config.Username);
if (Config.InitialClear)
{
2022-10-08 17:07:50 +01:00
var latestTime = input.OrderBy(x => x.ts).Last().ts;
var time = DateTime.Parse(latestTime).ToUniversalTime();
2022-10-10 11:47:50 +01:00
Db.SpotifyListen.RemoveRange(Db.SpotifyListen.Where(x => x.UserId == user.Id && x.Timestamp <= time));
}
2022-10-10 11:47:50 +01:00
var filtered = input.Where(x => x.ms_played > 30000
&& !string.IsNullOrWhiteSpace(x.master_metadata_track_name))
.DistinctBy(x => (x.offline_timestamp, x.ts, x.spotify_track_uri))
.ToArray();
Logger.LogInformation("{:n0} items after filtering", filtered.Length);
2022-10-10 11:47:50 +01:00
var processedCounter = 0;
foreach (var item in filtered.Chunk(1000))
{
IEnumerable<EndSong> toPopulate = item;
2022-10-10 11:47:50 +01:00
if (Config.Apply50PercentRule)
{
Logger.LogDebug("Validating tracks {:n0}/{:n0}", processedCounter + 1, filtered.Length);
2022-10-07 23:33:54 +01:00
2022-10-10 11:47:50 +01:00
toPopulate = Passes50PcRule(toPopulate);
}
Db.SpotifyListen.AddRange(toPopulate.Select(x => new SpotifyListen()
{
TrackName = x.master_metadata_track_name,
AlbumName = x.master_metadata_album_album_name,
ArtistName = x.master_metadata_album_artist_name,
2022-10-07 23:33:54 +01:00
2022-10-10 11:47:50 +01:00
Timestamp = DateTime.Parse(x.ts).ToUniversalTime(),
PlayedDuration = x.ms_played,
TrackUri = x.spotify_track_uri,
UserId = user.Id
}));
processedCounter += item.Length;
}
Logger?.LogInformation("Saving {:n0} historical items for {}", processedCounter, user.UserName);
2022-10-10 11:47:50 +01:00
await Db.SaveChangesAsync();
Logger?.LogInformation("Added {:n0} historical items for {}", processedCounter, user.UserName);
2022-10-10 11:47:50 +01:00
}
private const int FOUR_MINUTES = 4 * 60 * 1000;
public async Task<bool> Passes50PcRule(EndSong song)
{
if (string.IsNullOrWhiteSpace(song.spotify_track_uri)) return true;
int duration;
if (Durations.TryGetValue(song.spotify_track_uri, out duration))
{
2022-10-10 11:47:50 +01:00
}
else
{
var pulledDuration = await DurationPuller.Get(song.spotify_track_uri);
if (pulledDuration is int d)
{
2022-10-10 11:47:50 +01:00
duration = d;
Durations.Add(song.spotify_track_uri, duration);
}
else
{
Logger.LogDebug("No duration returned for {}/{}", song.master_metadata_track_name, song.master_metadata_album_artist_name);
return true; // if can't get duration, just pass
}
}
2022-10-10 11:47:50 +01:00
return CheckDuration(song, duration);
}
2022-10-10 11:47:50 +01:00
public IEnumerable<EndSong> Passes50PcRule(IEnumerable<EndSong> inputTracks)
{
var toPullOverWire = new List<EndSong>();
2022-10-10 11:47:50 +01:00
// quick return items from local cache
foreach(var track in inputTracks)
{
if (string.IsNullOrWhiteSpace(track.spotify_track_uri)) yield return track;
if (Durations.TryGetValue(track.spotify_track_uri, out var duration))
{
if (CheckDuration(track, duration))
{
yield return track;
}
}
else
{
toPullOverWire.Add(track);
}
}
2022-10-10 11:47:50 +01:00
var pulledDuration = DurationPuller.Get(toPullOverWire.Select(x => x.spotify_track_uri)).Result;
2022-10-10 11:47:50 +01:00
// apply results to cache
foreach((var uri, var dur) in pulledDuration)
{
Durations[uri] = dur;
}
// check return acceptable tracks from pulled
foreach(var track in toPullOverWire)
{
if(pulledDuration.TryGetValue(track.spotify_track_uri, out var duration))
{
if(CheckDuration(track, duration))
{
yield return track;
}
}
else
{
yield return track;
}
}
}
2022-10-10 11:47:50 +01:00
public bool CheckDuration(EndSong song, int duration) => song.ms_played >= duration / 2 || song.ms_played >= FOUR_MINUTES;
}