skeleton of spotify history model and insert

This commit is contained in:
Andy Pack 2022-10-07 18:29:33 +01:00
parent 207ed8bb23
commit 2b8e0a2735
Signed by: sarsoo
GPG Key ID: A55BA3536A5E0ED7
20 changed files with 609 additions and 6 deletions

View File

@ -0,0 +1,91 @@
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Logging;
using Selector.CLI.Extensions;
using Selector.Data;
using Selector.Model;
using System;
using System.IO;
using System.CommandLine;
using System.CommandLine.Invocation;
using System.Linq;
using System.Collections.Generic;
namespace Selector.CLI
{
public class SpotifyHistoryCommand : Command
{
public SpotifyHistoryCommand(string name, string description = null) : base(name, description)
{
var connectionString = new Option<string>("--connection", "database to migrate");
connectionString.AddAlias("-c");
AddOption(connectionString);
var pathString = new Option<string>("--path", "path to find data");
pathString.AddAlias("-i");
AddOption(pathString);
var username = new Option<string>("--username", "user to pulls scrobbles for");
username.AddAlias("-u");
AddOption(username);
Handler = CommandHandler.Create((string connectionString, string path, string username) => Execute(connectionString, path, username));
}
public static int Execute(string connectionString, string path, string username)
{
var streams = new List<FileStream>();
try
{
var context = new CommandContext().WithLogger().WithDb(connectionString).WithLastfmApi();
var logger = context.Logger.CreateLogger("Scrobble");
using var db = new ApplicationDbContext(context.DatabaseConfig.Options, context.Logger.CreateLogger<ApplicationDbContext>());
var historyPersister = new HistoryPersister(db, new DataJsonContext(), new()
{
Username = username
}, context.Logger.CreateLogger<HistoryPersister>());
logger.LogInformation("Preparing to parse from {} for {}", path, username);
var directoryContents = Directory.EnumerateFiles(path);
var endSongs = directoryContents.Where(f => f.Contains("endsong_")).ToArray();
foreach(var file in endSongs)
{
streams.Add(File.OpenRead(file));
}
Console.WriteLine("Parse {0} historical data files? (y/n) ", endSongs.Length);
var input = Console.ReadLine();
if (input.Trim().Equals("y", StringComparison.OrdinalIgnoreCase))
{
logger.LogInformation("Parsing files");
historyPersister.Process(streams).Wait();
}
else
{
logger.LogInformation("Exiting");
}
}
catch (Exception ex)
{
Console.WriteLine(ex);
return 1;
}
finally
{
foreach(var stream in streams)
{
stream.Dispose();
}
}
return 0;
}
}
}

View File

@ -119,6 +119,7 @@ namespace Selector.CLI.Extensions
);
services.AddTransient<IScrobbleRepository, ScrobbleRepository>();
services.AddTransient<ISpotifyListenRepository, SpotifyListenRepository>();
services.AddTransient<IScrobbleMappingRepository, ScrobbleMappingRepository>();
services.AddHostedService<MigratorService>();

View File

@ -0,0 +1,39 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.Extensions.Logging;
using SpotifyAPI.Web;
namespace Selector.CLI.Extensions
{
public static class SpotifyExtensions
{
public static async Task<(FullPlaylist, IEnumerable<PlaylistTrack<IPlayableItem>>)> GetPopulated(this ISpotifyClient client, string playlistId, ILogger logger = null)
{
try
{
var playlist = await client.Playlists.Get(playlistId);
var items = await client.Paginate(playlist.Tracks).ToListAsync();
return (playlist, items);
}
catch (APIUnauthorizedException e)
{
logger?.LogDebug("Unauthorised error: [{message}] (should be refreshed and retried?)", e.Message);
throw e;
}
catch (APITooManyRequestsException e)
{
logger?.LogDebug("Too many requests error: [{message}]", e.Message);
throw e;
}
catch (APIException e)
{
logger?.LogDebug("API error: [{message}]", e.Message);
throw e;
}
}
}
}

View File

@ -9,6 +9,7 @@ namespace Selector.CLI
var cmd = new HostRootCommand();
cmd.AddCommand(new ScrobbleCommand("scrobble", "Manipulate scrobbles"));
cmd.AddCommand(new MigrateCommand("migrate", "Migrate database"));
cmd.AddCommand(new SpotifyHistoryCommand("history", "Insert Spotify history"));
cmd.Invoke(args);
}

View File

@ -31,6 +31,7 @@
<ProjectReference Include="..\Selector\Selector.csproj" />
<ProjectReference Include="..\Selector.Model\Selector.Model.csproj" />
<ProjectReference Include="..\Selector.Cache\Selector.Cache.csproj" />
<ProjectReference Include="..\Selector.Data\Selector.Data.csproj" />
<ProjectReference Include="..\Selector.Event\Selector.Event.csproj" />
</ItemGroup>

View File

@ -0,0 +1,14 @@
using System;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Json.Serialization.Metadata;
namespace Selector.Data;
[JsonSerializable(typeof(EndSong))]
[JsonSerializable(typeof(EndSong[]))]
public partial class DataJsonContext : JsonSerializerContext
{
}

27
Selector.Data/EndSong.cs Normal file
View File

@ -0,0 +1,27 @@
namespace Selector.Data;
public record struct EndSong
{
public string conn_country { get; set; }
public string episode_name { get; set; }
public string episode_show_name { get; set; }
public bool? incognito_mode { get; set; }
public string ip_addr_decrypted { get; set; }
public string master_metadata_album_album_name { get; set; }
public string master_metadata_album_artist_name { get; set; }
public string master_metadata_track_name { get; set; }
public int ms_played { get; set; }
public bool? offline { get; set; }
public long? offline_timestamp { get; set; }
public string platform { get; set; }
public string reason_end { get; set; }
public string reason_start { get; set; }
public bool shuffle { get; set; }
public bool? skipped { get; set; }
public string spotify_episode_uri { get; set; }
public string spotify_track_uri { get; set; }
public string ts { get; set; }
public string user_agent_decrypted { get; set; }
public string username { get; set; }
}

View File

@ -0,0 +1,96 @@
using System;
using System.Text.Json;
using Selector.Model;
using Microsoft.Extensions.Logging;
using System.Diagnostics.Metrics;
namespace Selector.Data;
public class HistoryPersisterConfig
{
public string Username { get; set; }
public bool InitialClear { get; set; } = true;
public bool Apply50PercentRule { get; set; } = false;
}
public class HistoryPersister
{
private HistoryPersisterConfig Config { get; set; }
private ApplicationDbContext Db { get; set; }
private DataJsonContext Json { get; set; }
private ILogger<HistoryPersister> Logger { get; set; }
public HistoryPersister(ApplicationDbContext db, DataJsonContext json, HistoryPersisterConfig config, ILogger<HistoryPersister> logger = null)
{
Config = config;
Db = db;
Json = json;
Logger = logger;
}
public void Process(string input)
{
var parsed = JsonSerializer.Deserialize(input, Json.EndSongArray);
Process(parsed).Wait();
}
public async Task Process(Stream input)
{
var parsed = await JsonSerializer.DeserializeAsync(input, Json.EndSongArray);
await Process(parsed);
}
public async Task Process(IEnumerable<Stream> input)
{
var songs = Enumerable.Empty<EndSong>();
foreach(var singleInput in input)
{
var parsed = await JsonSerializer.DeserializeAsync(singleInput, Json.EndSongArray);
songs = songs.Concat(parsed);
Logger?.LogDebug("Parsed {} items for {}", parsed.Length, Config.Username);
}
await Process(songs);
}
public async Task Process(IEnumerable<EndSong> input)
{
if (Config.InitialClear)
{
Db.SpotifyListen.RemoveRange(Db.SpotifyListen.Where(x => x.User.UserName == Config.Username));
}
var user = Db.Users.Single(u => u.UserName == Config.Username);
var counter = 0;
foreach(var item in input)
{
if(!string.IsNullOrWhiteSpace(item.master_metadata_track_name))
{
Db.SpotifyListen.Add(new()
{
TrackName = item.master_metadata_track_name,
AlbumName = item.master_metadata_album_album_name,
ArtistName = item.master_metadata_album_artist_name,
Timestamp = DateTime.Parse(item.ts).ToUniversalTime(),
PlayedDuration = item.ms_played,
TrackUri = item.spotify_track_uri,
UserId = user.Id
});
counter++;
}
}
Logger?.LogInformation("Added {} historical items for {}", counter, user.UserName);
await Db.SaveChangesAsync();
}
}

View File

@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\Selector.Model\Selector.Model.csproj" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="6.0.9" />
</ItemGroup>
</Project>

View File

@ -24,6 +24,8 @@ namespace Selector.Model
public DbSet<AlbumLastfmSpotifyMapping> AlbumMapping { get; set; }
public DbSet<ArtistLastfmSpotifyMapping> ArtistMapping { get; set; }
public DbSet<SpotifyListen> SpotifyListen { get; set; }
public ApplicationDbContext(
DbContextOptions<ApplicationDbContext> options,
ILogger<ApplicationDbContext> logger
@ -85,9 +87,15 @@ namespace Selector.Model
.Property(s => s.LastfmArtistName)
.UseCollation("case_insensitive");
modelBuilder.Entity<ArtistLastfmSpotifyMapping>().HasKey(s => s.SpotifyUri);
modelBuilder.Entity<ArtistLastfmSpotifyMapping>()
.Property(s => s.LastfmArtistName)
modelBuilder.Entity<SpotifyListen>().HasKey(s => s.Timestamp);
modelBuilder.Entity<SpotifyListen>()
.Property(s => s.TrackName)
.UseCollation("case_insensitive");
modelBuilder.Entity<SpotifyListen>()
.Property(s => s.AlbumName)
.UseCollation("case_insensitive");
modelBuilder.Entity<SpotifyListen>()
.Property(s => s.ArtistName)
.UseCollation("case_insensitive");
SeedData.Seed(modelBuilder);

View File

@ -0,0 +1,21 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace Selector.Model
{
public interface ISpotifyListenRepository
{
void Add(SpotifyListen item);
void AddRange(IEnumerable<SpotifyListen> item);
IEnumerable<SpotifyListen> GetAll(string include = null, string userId = null, string username = null, string trackName = null, string albumName = null, string artistName = null, DateTime? from = null, DateTime? to = null);
SpotifyListen Find(DateTime key, string include = null);
void Remove(DateTime key);
public void Remove(SpotifyListen scrobble);
public void RemoveRange(IEnumerable<SpotifyListen> scrobbles);
void Update(SpotifyListen item);
Task<int> Save();
int Count(string include = null, string userId = null, string username = null, string trackName = null, string albumName = null, string artistName = null, DateTime? from = null, DateTime? to = null);
}
}

View File

@ -0,0 +1,14 @@
using System;
namespace Selector.Model;
public class SpotifyListen: Listen
{
public int? PlayedDuration { get; set; }
public string TrackUri { get; set; }
public string UserId { get; set; }
public ApplicationUser User { get; set; }
}

View File

@ -0,0 +1,129 @@
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;
namespace Selector.Model
{
public class SpotifyListenRepository : ISpotifyListenRepository
{
private readonly ApplicationDbContext db;
public SpotifyListenRepository(ApplicationDbContext context)
{
db = context;
}
public void Add(SpotifyListen item)
{
db.SpotifyListen.Add(item);
}
public void AddRange(IEnumerable<SpotifyListen> item)
{
db.SpotifyListen.AddRange(item);
}
public SpotifyListen Find(DateTime key, string include = null)
{
var listens = db.SpotifyListen.Where(s => s.Timestamp == key);
if (!string.IsNullOrWhiteSpace(include))
{
listens = listens.Include(include);
}
return listens.FirstOrDefault();
}
private IQueryable<SpotifyListen> GetAllQueryable(string include = null, string userId = null, string username = null, string trackName = null, string albumName = null, string artistName = null, DateTime? from = null, DateTime? to = null)
{
var listens = db.SpotifyListen.AsQueryable();
if (!string.IsNullOrWhiteSpace(include))
{
listens = listens.Include(include);
}
if (!string.IsNullOrWhiteSpace(userId))
{
listens = listens.Where(s => s.UserId == userId);
}
if (!string.IsNullOrWhiteSpace(username))
{
var normalUsername = username.ToUpperInvariant();
var user = db.Users.AsNoTracking().Where(u => u.NormalizedUserName == normalUsername).FirstOrDefault();
if (user is not null)
{
listens = listens.Where(s => s.UserId == user.Id);
}
else
{
listens = Enumerable.Empty<SpotifyListen>().AsQueryable();
}
}
if (!string.IsNullOrWhiteSpace(trackName))
{
listens = listens.Where(s => s.TrackName == trackName);
}
if (!string.IsNullOrWhiteSpace(albumName))
{
listens = listens.Where(s => s.AlbumName == albumName);
}
if (!string.IsNullOrWhiteSpace(artistName))
{
listens = listens.Where(s => s.ArtistName == artistName);
}
if (from is not null)
{
listens = listens.Where(u => u.Timestamp >= from.Value);
}
if (to is not null)
{
listens = listens.Where(u => u.Timestamp < to.Value);
}
return listens;
}
public IEnumerable<SpotifyListen> GetAll(string include = null, string userId = null, string username = null, string trackName = null, string albumName = null, string artistName = null, DateTime? from = null, DateTime? to = null)
=> GetAllQueryable(include: include, userId: userId, username: username, trackName: trackName, albumName: albumName, artistName: artistName, from: from, to: to).AsEnumerable();
public void Remove(DateTime key)
{
Remove(Find(key));
}
public void Remove(SpotifyListen scrobble)
{
db.SpotifyListen.Remove(scrobble);
}
public void RemoveRange(IEnumerable<SpotifyListen> scrobbles)
{
db.SpotifyListen.RemoveRange(scrobbles);
}
public void Update(SpotifyListen item)
{
db.SpotifyListen.Update(item);
}
public Task<int> Save()
{
return db.SaveChangesAsync();
}
public int Count(string include = null, string userId = null, string username = null, string trackName = null, string albumName = null, string artistName = null, DateTime? from = null, DateTime? to = null)
=> GetAllQueryable(include: include, userId: userId, username: username, trackName: trackName, albumName: albumName, artistName: artistName, from: from, to: to).Count();
}
}

View File

@ -30,6 +30,10 @@
<ItemGroup>
<Folder Include="Migrations\" />
<Folder Include="Listen\" />
</ItemGroup>
<ItemGroup>
<None Remove="Listen\" />
</ItemGroup>
</Project>

View File

@ -24,6 +24,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Selector.Cache", "Selector.
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Selector.Event", "Selector.Event\Selector.Event.csproj", "{C2FF1673-CB1A-43B7-A814-07BB3CB3A0D6}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Selector.Data", "Selector.Data\Selector.Data.csproj", "{CB62ACCB-94F1-4B78-A195-8B108B9E800D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -58,6 +60,10 @@ Global
{C2FF1673-CB1A-43B7-A814-07BB3CB3A0D6}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C2FF1673-CB1A-43B7-A814-07BB3CB3A0D6}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C2FF1673-CB1A-43B7-A814-07BB3CB3A0D6}.Release|Any CPU.Build.0 = Release|Any CPU
{CB62ACCB-94F1-4B78-A195-8B108B9E800D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{CB62ACCB-94F1-4B78-A195-8B108B9E800D}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CB62ACCB-94F1-4B78-A195-8B108B9E800D}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CB62ACCB-94F1-4B78-A195-8B108B9E800D}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -0,0 +1,13 @@
using System;
namespace Selector;
public interface IListen
{
DateTime Timestamp { get; set; }
string TrackName { get; set; }
string AlbumName { get; set; }
string ArtistName { get; set; }
}

13
Selector/Listen/Listen.cs Normal file
View File

@ -0,0 +1,13 @@
using System;
namespace Selector;
public class Listen: IListen
{
public string TrackName { get; set; }
public string AlbumName { get; set; }
public string ArtistName { get; set; }
public DateTime Timestamp { get; set; }
}

View File

@ -11,7 +11,7 @@ namespace Selector
public static class Resampler
{
public static IEnumerable<CountSample> Resample(this IEnumerable<Scrobble> scrobbles, TimeSpan window)
public static IEnumerable<CountSample> Resample(this IEnumerable<IListen> scrobbles, TimeSpan window)
{
var sortedScrobbles = scrobbles.OrderBy(s => s.Timestamp).ToList();
@ -68,7 +68,7 @@ namespace Selector
}
}
public static IEnumerable<CountSample> ResampleByMonth(this IEnumerable<Scrobble> scrobbles)
public static IEnumerable<CountSample> ResampleByMonth(this IEnumerable<IListen> scrobbles)
{
var sortedScrobbles = scrobbles.OrderBy(s => s.Timestamp).ToList();

View File

@ -2,7 +2,7 @@
namespace Selector
{
public class Scrobble
public class Scrobble: IListen
{
public DateTime Timestamp { get; set; }
public string TrackName { get; set; }

109
spotify-data.ipynb Normal file
View File

@ -0,0 +1,109 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"from datetime import datetime\n",
"import random\n",
"from pprint import pprint"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total: 445582\n",
"Total Populated: 442034 (99.20%)\n",
"Total New: 130411 (29.27%)\n",
"{'conn_country': 'GB',\n",
" 'episode_name': None,\n",
" 'episode_show_name': None,\n",
" 'incognito_mode': False,\n",
" 'ip_addr_decrypted': '82.132.244.90',\n",
" 'master_metadata_album_album_name': 'Tales Told By Dead Friends',\n",
" 'master_metadata_album_artist_name': 'Mayday Parade',\n",
" 'master_metadata_track_name': 'Three Cheers For Five Years',\n",
" 'ms_played': 33274,\n",
" 'offline': False,\n",
" 'offline_timestamp': 0,\n",
" 'platform': 'iOS 9.2.1 (iPhone7,1)',\n",
" 'reason_end': 'fwdbtn',\n",
" 'reason_start': 'fwdbtn',\n",
" 'shuffle': True,\n",
" 'skipped': True,\n",
" 'spotify_episode_uri': None,\n",
" 'spotify_track_uri': 'spotify:track:1aw8gphDUzqalHEi9Z8M38',\n",
" 'ts': '2016-02-11T08:13:55Z',\n",
" 'user_agent_decrypted': 'unknown',\n",
" 'username': 'sarsoo'}\n"
]
}
],
"source": [
"data = []\n",
"data_with_names = []\n",
"new_data = []\n",
"\n",
"folder = '/Users/andy/lab/backups/spotify-2022-03-07'\n",
"\n",
"for i in os.listdir(folder):\n",
" if i.startswith('endsong_'):\n",
" with open(f'{folder}/{i}') as f:\n",
" data += json.loads(f.read())\n",
"\n",
"data.sort(key = lambda a: a['ts'])\n",
"data_with_names = [i for i in data if i['master_metadata_track_name'] is not None]\n",
"new_data = [i for i in data_with_names if datetime.fromisoformat(i['ts'].split('T')[0]) < datetime(2017, 11, 3)]\n",
"\n",
"print(f'Total: {len(data)}')\n",
"print(f'Total Populated: {len(data_with_names)} ({len(data_with_names)/len(data)*100:.2f}%)')\n",
"print(f'Total New: {len(new_data)} ({len(new_data)/len(data)*100:.2f}%)')\n",
"\n",
"pprint(random.choice(new_data))\n",
"# print(min(i[0] for i in data))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "a0a5145e6c304e2a9afaf5b930a2955b950bd4b81fe94f7c42930f43f42762eb"
},
"kernelspec": {
"display_name": "Python 3.10.7 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}