listening-analysis/playlist-classifier.ipynb

453 lines
333 KiB
Plaintext
Raw Normal View History

2021-02-04 13:34:25 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"# Playlist Classifier\n",
"\n",
"Given a list of playlists, can unknown tracks be correctly classified?"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 12,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [],
"source": [
"playlist_names = [\"RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\", \"POP\"] # super-genres\n",
"playlist_names = [\"ALL RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\", \"POP\"] # super-genres\n",
2021-02-13 11:53:15 +00:00
"# playlist_names = [\"RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\"] # super-genres without POP\n",
2021-02-04 13:34:25 +00:00
"# playlist_names = [\"DNB\", \"HOUSE\", \"TECHNO\", \"GARAGE\", \"DUBSTEP\", \"BASS\"] # EDM playlists\n",
"# playlist_names = [\"20s rap\", \"10s rap\", \"00s rap\", \"90s rap\", \"80s rap\"] # rap decades\n",
"# playlist_names = [\"UK RAP\", \"US RAP\"] # UK/US split\n",
"# playlist_names = [\"uk rap\", \"grime\", \"drill\", \"afro bash\"] # british rap playlists\n",
"# playlist_names = [\"20s rap\", \"10s rap\", \"00s rap\", \"90s rap\", \"80s rap\", \"trap\", \"gangsta rap\", \"industrial rap\", \"weird rap\", \"jazz rap\", \"boom bap\", \"trap metal\"] # american rap playlists\n",
"# playlist_names = [\"rock\", \"indie\", \"punk\", \"pop rock\", \"bluesy rock\", \"hard rock\", \"chilled rock\", \"emo\", \"pop punk\", \"stoner rock/metal\", \"post-hardcore\", \"melodic hardcore\", \"art rock\", \"post-rock\", \"classic pop punk\", \"90s rock & grunge\", \"90s indie & britpop\", \"psych\"] # rock playlists\n",
"# playlist_names = [\"metal\", \"metalcore\", \"mathcore\", \"hardcore\", \"black metal\", \"death metal\", \"doom metal\", \"sludge metal\", \"classic metal\", \"industrial\", \"nu metal\", \"calm metal\", \"thrash metal\"] # metal playlists\n",
"\n",
"# headers = float_headers + [\"duration_ms\", \"mode\", \"loudness\", \"tempo\"]\n",
"headers = float_headers"
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"Pull and process playlist information.\n",
"\n",
"1. Get live playlist track information from spotify\n",
"2. Filter listening history for these tracks\n",
"\n",
"Filter out tracks without features and drop duplicates before taking only the descriptor parameters"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 13,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [],
"source": [
"playlists = [get_playlist(i, spotnet) for i in playlist_names] # 1)\n",
"\n",
"# filter playlists by join with playlist track/artist names\n",
"filtered_playlists = [pd.merge(track_frame(i.tracks), scrobbles, on=['track', 'artist']) for i in playlists] # 2)\n",
"\n",
"filtered_playlists = [i[pd.notnull(i[\"uri\"])] for i in filtered_playlists]\n",
"# distinct on uri\n",
"filtered_playlists = [i.drop_duplicates(['uri']) for i in filtered_playlists]\n",
"# select only descriptor float columns\n",
"filtered_playlists = [i.loc[:, headers] for i in filtered_playlists]"
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"Construct the dataset with associated labels before splitting into a train and test set."
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 14,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.concat(filtered_playlists)\n",
"labels = [np.full(len(plst), idx) for idx, plst in enumerate(filtered_playlists)]\n",
"labels = np.concatenate(labels)\n",
"\n",
"# stratify: maintains class proportions in test and train set\n",
"data_train, data_test, labels_train, labels_test = train_test_split(dataset, labels, test_size=0.25, random_state=70, stratify=labels)"
2021-02-04 13:34:25 +00:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"# SVM\n",
"Support Vector Machine"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 15,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>uw-rbf</th>\n",
" <th>w-rbf</th>\n",
" <th>linear</th>\n",
" <th>poly</th>\n",
" <th>sigmoid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>accuracy %</th>\n",
" <td>75.03</td>\n",
" <td>71.0</td>\n",
" <td>68.98</td>\n",
" <td>71.37</td>\n",
" <td>34.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
2021-02-13 11:53:15 +00:00
"text/plain": [
" uw-rbf w-rbf linear poly sigmoid\n",
"accuracy % 75.03 71.0 68.98 71.37 34.3"
]
2021-02-04 13:34:25 +00:00
},
"execution_count": 15,
2021-02-13 11:53:15 +00:00
"metadata": {},
"output_type": "execute_result"
2021-02-04 13:34:25 +00:00
}
],
"source": [
"### TRAIN ###\n",
"clf = svm.SVC()\n",
"clf.fit(data_train, labels_train)\n",
"\n",
2021-02-13 11:53:15 +00:00
"wclf = svm.SVC(class_weight='balanced') # weight classes based on prevalence\n",
"wclf.fit(data_train, labels_train)\n",
"\n",
"lclf = svm.SVC(kernel='linear', class_weight='balanced')\n",
"lclf.fit(data_train, labels_train)\n",
"\n",
"pclf = svm.SVC(kernel='poly', degree=3, class_weight='balanced')\n",
"pclf.fit(data_train, labels_train)\n",
"\n",
"sclf = svm.SVC(kernel='sigmoid', class_weight='balanced')\n",
"sclf.fit(data_train, labels_train)\n",
2021-02-04 13:34:25 +00:00
"\n",
"### EVALUATE ###\n",
2021-02-13 11:53:15 +00:00
"models = {'uw-rbf': clf, 'w-rbf': wclf, 'linear': lclf, 'poly': pclf, 'sigmoid': sclf}\n",
"accuracy = {i: j.score(data_test, labels_test) for i, j in models.items()}\n",
"\n",
"(pd.DataFrame(accuracy, index=['accuracy %']) * 100).round(decimals=2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
2021-02-13 11:53:15 +00:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA8UAAADRCAYAAADlnRB8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAABJ0AAASdAHeZh94AABN/0lEQVR4nO3dd5gb1dnG4d/r3sDGGINpLjRjiukdbAIECAQIvQanACGB8CWEhBQiFEoghFBCCSEBU0PvLYRi05sB0zvGdHA3Btz2fH+cI1aWpd3VrqQz0jz3de219mgkPStNe2fOOWPOOURERERERETSqFPsACIiIiIiIiKxqCgWERERERGR1FJRLCIiIiIiIqmlolhERERERERSS0WxiIiIiIiIpJaKYhEREREREUktFcUiIiIiIiKSWiqKRUREREREJLVUFIuIiIiIiEhqqSgWERERERGR1FJRLCIiIiIiIqmVuqLYzE40M2dmo8t83kZm9j8zmxKe/3xVAopUgJZzSaKwTI2LnUOk0ZjZ2LB+DYmdRdLJzMaEZXBM7CwdYWajw99xYhnPadcxlyRLu4rivAVmXAvzDAnzTGpvuKQwsyWBO4FNgGuALPCPMl/j4vB5fGlm/SqfUmIysw3D9/tkiccPCI87Mxta5PGeZvZ1WD66Vz9x0YxazqVsueU6do6k0johOXn7gNzPwnAC8gEzOzB2vrYysy3z/obDY+eR6jCzzmZ2mJmNN7NpZjbfzD4zsxfM7F9mtlvsjI3AzHqZ2YywPl0dO0+adYkdoE5sAgwEfu+cO7XcJ5vZEsD+gAN6AgcD51U0ocT2HDAd2NDMlnTOzSp4fDv892/At4B/Fzy+JdAd+J9zbm61w5ag5VyqbU3gy9ghakXrhJSQDb+7AsOB3YFtzWwj59wv48Vqs1wh7MK//xkxi1SBmXUG7gB2AmbgT5h/AHQD1gIOxC+7t+U97WbgCeDjWmatgqfw+6opNXq//YC++PVpTzNb2jk3tUbvLXlS13y6nZYPvz9q5/MPBPoAZwHzgMMqEUqSwznXBIwDOgOjiszyrfD41PDvYo8D3F+FeG2l5Vyqyjn3mnNucuwcNaR1QhbjnDsx/PzeObcXsCP+gPj/kt78ObR22Ad4E7gJfyJ4/aihpBoOwBfEE4EhzrmDnXPHO+d+6ZzbERgAnJD/BOfczLCNnxkhb8U4574Mf0etiuLDgSbgDPzFkUNr9L5SoKZFsZkdEZoHHFYw/Qd5zcu6Fzz2ZGhW2rON7zEp/CxpZn8L/55frG+AmR1qZs+Z2VehScglZrZc3uNDQrPAy8KkS/OaDI0p408/DL/Anw3cDqxrZpu28DcMD1kmmdnckO1hMzuyPfPmNWUfW+L9xhU2f8zvU2Fmm5jZnaH5zDd9lsxsWzP7p5m9Ymazwuf4kpllzKxHiffqbGY/MbNHzWxmeM5boSnOamGeP4f3KbphsOamyneU+gwjyRW0ixS94fMaGh4fD2xb5LllFcVazrWc1yMr0u3G8vpimdneZvaU+X3BNDO7xsxWKPFa/cNn+Gr4fGea2f1m9u0i8/Y1s+PMN1P9wMzmmdnnZnabmW3eUlYzWy58bx+ab+46pow/WetE8+tqnSjBOXc/8Bq+JdHGuenh770xfLdzzew9M7vAzAa19pph+XBm9mAL87xofr/R6uvlORjf6mFs+IHmK8fF3qOXmf3GzJ4xs9lm9kVYZ881s2XbM2+x5TbvsaL9Wq2VfaaZLW9mfwzL5ydhG/GRmV1tZiNa+Ps2MbNrw/Zhrpl9bGb3mtm+4fFqfQ/VtkX4PbZYkRsKx0X+plKffXhsx/DZzgnbk1vCZ7NYX/j87ZaZrWJmN5jZ1LBM3Gtma4f5lgnboY/N1wlPm1mx46vcPuDPZvZ6mHe6mf3XzLYvMu8328Aij21oZveELLPM7D4rsQ9pi/C3bIY/9jsdf/L0x608Zz/z+7pp4W+ZZGb/MbON2jOvtdAf2krsQ/K+t2FmdrT5JvVfWdi/m1k3MzvKzO4yv92aGzLcZ2Y7t/C3rRjW9zfD600zf0xwQni8s5m9Hz77PiVe4+8h294tfY7F1PpKce6Af7uC6bn/9wS+WbjMrC+wIfC4c+6rMt6nG/AAsAdwL3AO8G7BPL/A95eciD9geR34AfCYmS0T5pmBb+Z0a/j/reH/WeD5tgQxfwZ1Q+B+59z7tLITMbNdgGfxZ4peBv4G3Ii/Avnr9s7bAZsDDwM9gEvwhdO88NhvgG/jP4uLgH+Fx04E7jbf/CY/bzfgbuBCYCXgauBcYALwPXwTYsJrNVF6R3tE+F1Wf9caeCD8LrV8PwA8CAzK38ma78u7Eb759bNlvJ+Wcy3njeSnwJXAJOB84CV8s7L7bPGTpYPxn+fxwOf4z+hafJO3e6zgxGuYfgr+874Tvwz9D38y6iEz26lEpv745oCb4a+KnQd82pY/RuvEInm1TrTOwm8HYGa7Ao8B3wXuw3/HrwNHAs9YkbEp8jnnXsPvb0ab2eqLvZnZFsDawK3OuXKau+ZO9FwO3AN8AhxoZr2LvMdS4W84Dd9i4hL8MvAqfj+0Znvm7YCW9pnb4LcnM/Dr0ln4dX9v4CkzG1nk7zssZN4j/D4Tv30ZiN+eVfN7qLZc893FMpfLzPbHr//rA9fj1/OlgMeBIS08dQjwJLAsfvt5L7A9MM78ibQn8CeRrgWuA0bitz8rF7x/P/z3czwwE38sdCN+G3evmR1BG4Tv6uGQ4W78/mAevhVgyZOdrcht68Y656bhT56uaWZbF3l/C8XpNcC6+H3SWSHT1sCu7Zm3g84BTgJeDP9+NEzvH/6/BH5f+zd8U/v1gbvMbLHCPxTqE4Gj8S0WzwWuAmbj9y045xYCF4fXPaDIa+S6KX1C8zFt2znnyv4BRuM33ONamGdImGdSwfT3gM8Ay5v2Eb5gXgiclDd99/AaJ5SRbVJ4zn1A7yKPnxgenwesX/DYWeGxfxdMHxOmj2nHZ/WP8NwDwv+74PtbfAEsWTDvAPwKOw8YVeS1VmznvLnvYmyJjOP8olD0O3bAESWeNyz/e8ybflJ43n4F008N028Duhc81h1YJu//d4R51y6Ybwn8CjIZ6Nye5beaP2FZbir4W3IrdRd8XxwHHJX3+HfDtJu0nGs5r4flvCCrK/xcW5l3XIlldRawTsFjV4fH9i3yXTYB+xdM74cv1L4Cls2b3hcYUGz5wa+zr5b6u/AH/13a8blonUjpOtHK8u+KTN8+LM9NwGB8UTgVf0y0dcG8vwmvc2/B9LFh+pC8aXuHaX8t8p65+XcoI/9m4Tn/zZv21zDtR0Xmz62/FwKdCh7rA/Rt57yLLbd5j42hyH6M1veZA4ElikwfiV9n7y6YPgKYD0wD1iryvPz1sKLfQ42W1fXx25gm4ApgT2BwK89Z7LMP6+10YC4wsmD+02je1uQvt0Pypv++4DknhOnT8NvYTnmPHRIeO6vgOReF6RexaO2xGn5bOrfg/UeH+U/Mm2b41hwO2L3g9Y/Jyzu6jM+4R/g7ZgA9w7Rdw+tcUWT+w8NjT+WvD+GxzsCgds57YqnslNiH5C23HwJDizyve/46kDe9L/6E97Tc3xymd8OfoHLAga2sT4Pw694zLSyDp7RruW/nypJbYMa1ME/ug5xUMP3SMH3d8P8R4f9HAk8Dj+XNe254bIsysk0KzxlZ4vHcl//vIo/1DQvnV+TtuGlnsQD0xh/ozQB65E3P7USOLJj/2DD9nDa8djnzFl2o8x4fR+kDo+fasXz0D8+9JG9a5/A5fAks34bX2CW8xt8Lph8Rpv+xPctutX/wOw9H3kE8/qD7rrz/f0peAUxzkfqzMt5Hy/ni82o5j/BD5Yrik4vMvy0FB5P4g1QHXF/iPXYPj/+0jZly+5mVi2SdCwxsx2eidaJ5WurWiRb+Nhd+Tgw/pwA3AAvC9L+F+Q4K/7+6yGt0ofngceW86WNZvLjogt//TGHRbX2/8H28RZGTGy3k/zd5J3rCtLXDtCcL5h2IL+o/okgR2t55Sy23eY+NoeWieGQ7vrfbgK+BrnnT/h5e7xdteH5
"text/plain": [
"<Figure size 1200x240 with 5 Axes>"
]
2021-02-13 11:53:15 +00:00
},
"metadata": {},
"output_type": "display_data"
2021-02-13 11:53:15 +00:00
}
],
"source": [
"fig, ax = plt.subplots(nrows=1, ncols=len(models))\n",
"fig.set_figwidth(2 * len(models))\n",
"fig.set_figheight(2)\n",
2021-02-04 13:34:25 +00:00
"\n",
2021-02-13 11:53:15 +00:00
"for (name, acc), ax in zip(accuracy.items(), ax):\n",
" ax.pie([acc, 1 - acc], colors=['g', 'r'], startangle=90, counterclock=False)\n",
" ax.set_title(f\"{name.capitalize()} Accuracy\")"
2021-02-04 13:34:25 +00:00
]
},
{
"cell_type": "code",
"execution_count": 17,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/sAAAHUCAYAAABlHTjwAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAABJ0AAASdAHeZh94AAEAAElEQVR4nOzdd3gUVdvA4d/ZZDe9N0pooYRexC4oVQVUbNixYPe1Y9fX+vrZewVsYG/YUVAUKYKK9BYgCSWd9F5293x/zG6Sze6mQBIwPPd15UJn5szO2Zmd8zwzZ84orTVCCCGEEEIIIYToOEwHewOEEEIIIYQQQgjRuiTZF0IIIYQQQgghOhhJ9oUQQgghhBBCiA5Gkn0hhBBCCCGEEKKDkWRfCCGEEEIIIYToYCTZF0IIIYQQQgghOhhJ9oUQQgghhBBCiA5Gkn0hhBBCCCGEEKKDkWRfCCGEEEIIIYToYCTZF0IIIYQQQgghOhhJ9oUQQgghhBBCiA5Gkn0hxH5RSmml1JJWWM8SpZRuhU1qE0qphx11HXOwt0UIIYQ4FCilejraxvdaYV27lFK7Dnyr2oZS6j1HXXse7G0RoqUk2ReiDSilxjSVDNdrKHe135Z1DK11oaGtKKUSlVJzlFI7lVKVSqkypVSqUmqRUupBpVScY7mrHXX5rBnrvM+x7EuO/3ceP1opVaqUCvFSTimlkustO6Y16yqEEKL9KaVGOs7pf3qZf2G9834vD/MDHO1TuVLKr+23uPW05oWGtqKUOkop9aFSardSqkopVexoi79TSt2llApyLPe4oy5PN2Odsx3L3ub4/zH19nGqUkp5KRfs+Hznsj1btbLikCbJvhBifw0ALj3YG3GoUUqNA9YBVwE5wFvAS8DvQE/gEWCIY/GPgVJgqlIqupF1KuBKx//ObjDbCgQBF3opPh5IcCwnhBCiY1gLFAAjlVKhHuaPB5y95sZ5mH8C4Acs11pX7cfnp2PEAffuR9kOTSl1CbAKo13eAbwBvAb8CQwDngI6OxZ/C2M/XaqUMjeyziDgAqAKmNtgthUjvpjopfgFQAgSBxyWJNkXQuwXrfU2rfWeg70dh6BZgD9wudb6eK31jVrr+7TWl2ut+2E09EkAWutSjITfQuMXTsZhJOx/aK03N5j3D5AFXO2l7NUYwcHP+1shIYQQhxattR1YAvgAJ3lYZJxjfh6ek33ntMX7+fk1jjggc3/Kd1RKqUCMxF4DE7XWE7TWt2qt79VaX6S17o5xoSUXQGudCvwCxAGnN7JqZ8L+pdY6v8G8XzDa+cbigEyMeEEcZiTZF+IQU/8ZcaXUuUqpvxzd7PKVUp8opbo2WP5jx/J9G0yf65i+uMH0EKVUjVJqqYfPvlAp9ZtSqtDRvW+rUuoBT138vHWlV0p1Vkq9q5TKUUpVKKXWKaUuq9fd7GEv9fZ1dFXf4ejytlcp9ZRSylJvmctV3fP9J9Xrkua2XqXUMUqpL5RSWUqpasf6Zimlunj5/JFKqZ+UUiWO7m6/KKWO87SsN0qpWKAPUKS1bnjlHQCt9Qat9d56k5x36q9qZNXOBrzhXX0wrtS/CxyplBrWYHuigTOBL4GGwYEQQoh/N2f77pLMO7pp93LM/x0Y66GsW7LvaIdvUEqtcrSD5UqptUqpG5VSLjlDY13plVL9lFJfKqUKlPEY2x9KqSnONlwpdbmnyiilgpRSzyil9jjigJ1Kqbvrd093tPWpjv+9rEEccHmD9Z2ilFqglMp1rC/Zsf5wL58/QSm1zLHN+Uqpr5VS/T0t24jBQCiwSWvt8UKK1voPrXVhvUnOtt1bsl5/nqc4IA+Yj9FLMKb+DKXUUOBojDhB7uwfhiTZF+LQdQPwAbAL4yrxJuB84JcGybezMRnfoLzz/49XSvnXm34S4EuDq/lKqXeAjzCS1S8dn5kPPAb8pJTybWqDHcnuSuByYCvwIkZXw9eBW5oo/hFwE7AMo8tbBXAXxp1yp3UY3eABdjv+2/m3pN52zABWAJOA3xzbsRojoV6tlOreYLuPd3zuBOBH4FWg2rHOY5qqdz1FGI1psFKqc1MLA2itVzvqNUApdULD+UqpKIyEvQjw9my/sxtgw0DhMoxeA3Oasy1CCCH+VX51/Out/f8Vow3srJQa6JypjG7/R2I8BrDGMc0MfI/R9odjtMmzMXKFV3DvOu6RIzleBZyN0Q6/BOwBvsJoy7wxAwuBczDa4beAAOBJ4MF6yy1xrBNgPa5xwLp62/EQ8BNGG/4D8DKwE7gDWKEaPPqglDrX8flHAp9jxB5RGDGN25gHjchz/NtFOZ7Lb4ZvMB77O7lhfOLYtsGOemzXWv/uZR1zML7DyxpMvxojPni7mdsiOhqttfzJn/y18h8wBuPkuqSRZXo6ltnVYPrDjunFwJAG8z5yzDuv3rQEx7TP601LdExb5Ph3fL15Lzimja437XLHtPlAgJftuaXBdLf6YTQmGniqwfRhGF3MNPBwg3lLHNP/ASLrTQ/CaJhtQKemPrvevH4YifpOoGuDeeMd6/uq3jQFbHOsc2qD5W9xTNfAmGbu+y8cyydjBBXHAIFNlLneUeY9D/Nuc8x71cvxs9zx/79gBG4B9ZbZihEcgHHhqNn1kD/5kz/5k79D/w/IAOxATL1pHwIlGBf2BznO/TfWm3+6s82vN83Z1r8C+NSb7lOvbZ9ab7qzDXqvwfYsdky/vsH0SfXa08sbzNvlmL6gQRsWCxQ6/sxNfXa9+WMd8/8AwhvMu9wx74V604IxkvQa4MgGyztjJg30bMb+UMBfjuXXAf8BRgCWJso9hYcYyTHvJce8OxpMH+OY/oHjc3cA2+rND3DEBT87/n95c+shfx3nT+7sC3HoellrvbHBNOcd2qOdE7TWKRgN5dh6Xd2cV/UfxEhu61/1Hw+UYVx5d7oF4470DK11RYPPfAyjEby4sY11dLe/EOMO9P/qz9NarwfmNVYeuFvXew5Na12GEbCYMK60N9f1GFe3b9FapzfYjsXAt8Dpqm70+uMxLo4s1Vp/02Bdr2Ik7S1xNcZFk17AMxjfc4lSar1S6n/KMRJ/Ax9i7JNpDe82UNe931PXvfrmYNyNmQaglBoN9Me4OyKEEKJjWoyR6NXvqj8WWKa1tmpjnJccXLv6u3Thd3TRvwlj/JfbtNY254KO/56JkSQ2FQd0c6x7J6698tBa/4hxUboxN9ePQbTWORh3vcMw2unmutnx79Xatbs8Wuv3MJLw+nWZCkQCH2mjt119D2PENc2itdbAuRg3MoZhxBFrgFKl1J+OxxI8Dag4B+M7vqL+IxOOnpyXYNzEeK+Jz30LSFRKneiYfC5GXCC9+w5jTXbLFUIcNA0bHADns94RDab/CswAhmN0mx8HZGqtVyml/sGR7Due5RoMLNJa1zimBWI0SLnArcrzm1uqMEbdbUwixlXk1VrrEg/zl9P4c+ktqW9jnM/Zn6SUOsrD/FiMOxX9MHoTHOGY7tY1TmttU0otB3o398O11gXAOY5nJk/BuFBxFDDU8Xe9UupUrfXf9coUK6U+xdiHF2M8xuB8vGAg8JfWekMTH/0Vxj68GuPCyjUYdynea+62CyGE+Nf5FSMZHAd8ppQagDHS+wv1llkCTFRKmbQxsF/D5/X7YSS7O4AHvMQBFTQdBwx3/LvS8TkNLcd4XM6TIq31Tg/T9zcOqMG4gD7Nw3wLEKOUitJa59F4HFCklFqH50EQPdLG4MVjHftiIkYccHS9vxuUUmO0MTifs8xOpdRvGPvmFIxHGcB4rCES+ExrndvER7+HcYPmamApRhyQC3zd3G0XHY8k+0K0DWcj11jvGec8Tw0iGN3WGnIOruLTYPpijERxvFJqPcZV/QX15t2llArDaEQUrs/rRzimxQAPNbK9TQlz/JvtZb636QA0vPru4K2+jYly/HtnE8sFO/5taruzWvDZtbTWuzDubMwCUErFY4xdcDrGVfbhDYrMwdiHV+FI9mn+XX201tVKqXnA7Y6BBc8FvnXcGRFCCNExNRy3p/7z+k5LgPOAEUqpPRivf03XWm9zzHe2m31pPA4IbmQ
"text/plain": [
"<Figure size 1080x480 with 2 Axes>"
]
2021-02-04 13:34:25 +00:00
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
2021-02-04 13:34:25 +00:00
}
],
"source": [
2021-02-13 11:53:15 +00:00
"fig, ax = plt.subplots(nrows=1, ncols=2)\n",
"fig.set_figwidth(9)\n",
"# fig.set_figheight(15)\n",
"\n",
"args = {\n",
" 'normalize': 'true',\n",
" 'colorbar': False,\n",
" 'display_labels': playlist_names\n",
"}\n",
"\n",
"plot_confusion_matrix(clf, data_test, labels_test, ax=ax[0], **args)\n",
"ax[0].set_title('Unweighted SVM')\n",
"\n",
"plot_confusion_matrix(wclf, data_test, labels_test, ax=ax[1], **args)\n",
"ax[1].set_title('Weighted SVM')\n",
"\n",
"fig.tight_layout()"
2021-02-04 13:34:25 +00:00
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
2021-02-13 11:53:15 +00:00
"## Unweighted Classes\n",
"\n",
"From the above unweighted scenario, it is clear that the Pop playlists was not effective for classifying similar tracks. This is likely primarily due to the larger size of the Rap (\\~800), EDM (\\~1,300) and Rock (\\~700) playlists compared to Pop (\\~125). Additionally, there is overlap with other genres such as Rap and EDM where much of the confusion occured. Also not helping is that one of the sub-playlists is shared across EDM and Pop, electropop. as EDM is already such a larger playlist it is unsurpising that this performance was poor. The overlap with Rock is understandable as Pop contains an Indie Pop sub-playlist which could have cause some confusion. Quite surprising was the confusion for Jazz as I wouldn't have thought there would be much overlap here.\n",
2021-02-13 11:53:15 +00:00
"\n",
"The other major confusion was with Rock and Metal, specifically classing Metal tracks as Rock. This could be expected due to the similarity in tone.\n",
2021-02-04 13:34:25 +00:00
"\n",
2021-02-13 11:53:15 +00:00
"## Weighted Classes\n",
"\n",
"When weighting the classes by prevalence in the dateset, the model is generally better at classification. The clearest difference is the ability to classify Pop songs. Without weighting, no songs were correctly classified as Pop but were instead mis-identified as Rap, EDM, Rock and Jazz. When re-weighting, the Pop playlist was now correctly classified almost 60% of the time. Mis-identification as Rap, EDM and Rock dropped from a combined 85% to just 20%. Interestingly, the mis-classification of Pop as Jazz increased from 15% to 21%.\n",
"\n",
"The improved accuracy of the Pop model reduced the accuracy of some others. The accuracy of Rap, EDM and Rock decreased as some tracks were instead classified as Pop. EDM and Rock were worse affected than Rap with around 15% Pop error rate compared to Rap's 9%. As discussed previously, this could be due to the overlap in aural tone. The overall of Rap was not significantly affected by this Pop error rate as, to compensate, the EDM error rate dropped from 12% to just 3%."
]
2021-02-04 13:34:25 +00:00
},
2021-02-13 11:53:15 +00:00
{
"cell_type": "code",
"execution_count": 18,
2021-02-13 11:53:15 +00:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/sAAAOzCAYAAADncxY7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAABJ0AAASdAHeZh94AAEAAElEQVR4nOzdd3xTVf/A8c/pSPde7A1lyXDLkq0MRRTcCC4cj3tv8VEf9/w5UFQEtyJuVBRBAQFF9iqrBbr3bpM2Ob8/btI2TdIBLcXyfb9eeRXuvefmnOTmnn2u0lojhBBCCCGEEEKI1sOrpSMghBBCCCGEEEKIpiWVfSGEEEIIIYQQopWRyr4QQgghhBBCCNHKSGVfCCGEEEIIIYRoZaSyL4QQQgghhBBCtDJS2RdCCCGEEEIIIVoZqewLIYQQQgghhBCtjFT2hRBCCCGEEEKIVkYq+0IIIYQQQgghRCsjlX0hhBBCCCGEEKKVkcq+EEIIIYQQQgjRykhlX4gjpJTSSqkVLR2P1kopNdL+Gc9pgnMd09+VUmqFUkq3dDyEEEIcm5RS79vzsi4tHZemppRKUkolNcF5junPSCk1yx6/WS0dF9H6SWVfCDfsN2GpdAFKqQvsn8dnHvbfb99frpTyd7O/q33//uaPbdNqyoaG5qKUGqeU+koplaqUsiil8pRSu5VSXyilblFKKftxH9nTcmMDzrnUfuxU+/8dBROtlPqjjnBdlFI2+f0IIY5nNe6XjpdVKZWtlPpNKXVpS8fPHaWUt1IqXylVoZQKcbO/bY30XOnhHL/b949o/hg3raZqaGguSql2SqmXlFI7lFKlSqkypdRB+2f+pFKqu/24cfbv4K8GnPNS+7Hf1Njm+I5tjnN6CLu8xrGzmiSRollIZV+II9cHuKKlI9GMlgM2YKSj4ljLGEADfsBQD/sBfj3M9/8L4zN+7TDDt1pKqQeApcBk4B/gVeBtYAtwJvAK4G0/fJ797zX1nLMLMBZIA76rtbsSGK6UivcQ/BpA2Y8TQojj3WP219PA78AI4COl1IstGis3tNZWYAXggxHP2hx5uQZG196plAoETgdKgDWHGY0xNd5H2Cml+gNbgdsw8tcFwIvAz0A48AAwyn74r0AicIpS6oR6Tn2t/e/btbZXYuTlV3uIT09gJJLX/ytIZV+II6S13qW1PtjS8WguWutcYDMQC/SvuU8p5QcMAb7CaBBwKQDU2LbsMN+/1P4ZZx9O+NZKKdUZ+C9QCJyotT5Ha32X1vperfU0oA1wNmAF0FqvAHYDg5VSJ9Zx6qsxMvn5WuvaGfn39r8uDQZKKW/gSuBvIOOwEyaEEK2E1nqO/fWg1voC4CyMyvJtx+gQ89/sfz3l5WXAEqorljUNA0zASq11xeG8udZ6n9Z63+GEbeVeBiKBOVrrAVrrG+zX1LVa64FAd2AVgNZaA+/Yw13r9myAUqoHRqfAIeDHWrszgPXAlUopHzfBHWWA2h0C4hgklX0hjpC7eeBKqTn27SOVUtOUUn/Zh13lKqU+VUq193CuSKXUU0qpnfYhWgVKqWVKqfFujg1TSt1tHxaYbB/CnaWU+lYpdUZdcVVKtVFKvaOUSrEPL5xVTzIdFfXaBYAzgABgMbDJzX4wCgWa6kIESqlAZQz/36SUKlFKFSul1iilLnETZ49D6ZVSp9iHnBcppQqVUr8qpc6o+fl7+ByilVJvK6XSlFJmpdT22sMSlVLvY4xqAHi01pDMkbWOvcQ+pC1fGdMZdiqlHrI3hrh7/4uVUv/Yv+NMpdQHSql27o6tw2kYvfbLtdZba+/UWtu01j/bM34HR+++2wJAjQp7zcJCTdsxemxmKqV8a+2bBLSr8R5CCCFq0FovA3ZhNKie4tiulDpJKfWlPT8wK6UOKKXeUEq1re+cSqne9nxpeR3HbFXG8Pz6zucpr3dsW43Rm9zezQgvtw37SqmzlFJLlDGNwayU2qeUek4pFe4mnm6H0tvLOy/byzrlSqldSqk7lFLd7Gl/31OClFLX2dNfrpTKsOf9YTX2j1TGtLPOQOdaef37tc7VWxnrARyyl7kylFIfu/ksHMf3UMaUujx7WedPpdQkT3GtwxD731fc7dRa79da76qx6T2MXvfLlZvplXaOkXjvaq1tbvbPw+g0mFxzoz3vnwX8CexoaAJEy5HKvhDN60bgQyAJeB3YBlwE/Fq7IqiMntp/gPuALGAu8BnGEPaflFK1K2h9gCcxetR/wBjS9QtGhvuHUupsD3GKBNZiDLdbjDE8vr6eWEdFvfbwujE19i8HTlY15voppfphZBZbtdZZ9m3hGC3Q/8PodX4PY0haDPCxUuqJeuLiOPcIYCVGY8ISezrK7PE4tY6g4RgFljOARfb3bge8p5SaWeO4r+37wBh++ViNV1KNeLwHfAz0AL7E+J5zgccxvjenVnGl1O3AJ0A3YCEwHzgBI+OMaEja7XLsf7vZK+kNsQCwAJcoY8hlbROA9sCvWutED+eYh/FdTam1/VqgGCNtQggh3HNMh9MASqnJGPf/czCGYL8IJAA3AOuVUl3rOpm9krccY6pdL5c3U2oIxqi8b7TWafWcaztGeWCgUiqqxjm6Y1SGHXk9eC4PLKsR7lHgJ4zG6R8wpprtBe4CViulQuuKj/0c/vb3vRXIxKjwrgAeBF6oJ/iz9tdmjLw5BSOv+qrGMUkY+XqB/VUzr/+6RjzOBjYAl2GMYHvZntbzgb9UrRFzyhjqvhaYhtFI/gqQbD/n+fWluxZHfu/y/bqjtU7HGIkXAVxQe7+9XDKT6jKYO59gTMmoPZLvXIyRntKw/2+htZaXvORV64WRCetGHLui1rY59u2FwAm19n1s33dhre0rMCruF9faHo7Ra14GxNXYHgZEu4lPByAV2OkpXRiVTJ9GfB5BGJXEfMC7xvbVwC77vyfZzz25xv6b7dterLHtffu2e2q9hz9GocAGDKqxfaT9+Dk1tnkBe+zbJ9Q6z/U10jnSQ/rfqZWOvhit4DtqHe/y3rX2z7LvXwwEeLgGbq2xrYv9c8wFutRKz5eNvO6CMAopGvgDuAroVzNdHsJ9Zg8zy82+b+z7pnlI5xP29y0Afq6xv73985tn/39yQ9MhL3nJS16t7eXpXo6xHorN/uoMBGNU5KzA8FrH3ms/z9Ja2x15aJca26bZtz3v5j0dx49rYNwdZZRpNbZda992OkZjRSawqMb+MHsekA0o+zbHqL4/gfBa7+HIU16qtT0JSKq17WH7sZ84zm3f3hGjY0QD73tI80GgU43tPvb8UgOn1vfeNfZFAHn29PWtta8/RkP3hlrbl1KrDGDfPoXqssisBn4nz9uPTwcexVhTIbSeMBNwUz6175tq3/e9h2s32f7vd+zfa4ca+3/CKAMEYpQJGpwOebXMS3r2hWher2rXIdaO1tCq3mel1ECMuVNfaq0/rXmw1jof4+buT40WWq11gXYzj11rnYzRY91bKdXJTZwswF3adT62R1rrEmAdRoZ+kj3OwRjDEB29/isxCiw1h/85Deuz9xRcDqzXWj9b6z3KMQo3CqhvteIhGD3py7XWteeavY0xN92TUuAObSxG5HjvHRgNF33s6WqoWzEywqu01mW19j2OUYi7rMa2ywBf4P+01kk13t8G3I1RAGwQ+3dyLkZD0HDgXYyRI0XKWJ33Rg/TCBwL8Ti11tuHd07EKMR9UztQrff9GBinquecXoUxpUBa+oUQws4+pWyOMlZLX4RRUVLAy1rrAxgVv0jgM631ylrBX8CogI7zkJfX9DXGoqqzat737SPpLgT20fBFct0N5R8NFGHk3Rqjc6Lmor0jqZ5Wpu3bbrH/vdZejqmitX4fI++qmT96MhMjb7y/xrnRWh/C6F2vy391jTWV7OWe+fb/1jUCsLYrMDpeHrWXF6porbdh5H2DlVJ9AZRSHYBxGAvlvVbr+G8wRgs2xoP294jC6Ej4Hci3T2d4WSnVzU2Yn4EDwJn2UQY1OfL/2gvz1TYP43u9CqpGoI4DPtJalzYyDaKFuFt0QQjRdNa72XbI/rfmkG3HHPsw5f4xbzH2v31
"text/plain": [
"<Figure size 1080x960 with 4 Axes>"
]
2021-02-13 11:53:15 +00:00
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
2021-02-13 11:53:15 +00:00
}
],
"source": [
"fig, ax = plt.subplots(nrows=2, ncols=2)\n",
"fig.set_figwidth(9)\n",
"fig.set_figheight(8)\n",
"\n",
"plot_confusion_matrix(lclf, data_test, labels_test, ax=ax[0][0], **args)\n",
"ax[0][0].set_title('Linear Weighted SVM')\n",
"\n",
"plot_confusion_matrix(pclf, data_test, labels_test, ax=ax[0][1], **args)\n",
"ax[0][1].set_title('Poly Weighted SVM')\n",
"\n",
"plot_confusion_matrix(wclf, data_test, labels_test, ax=ax[1][0], **args)\n",
"ax[1][0].set_title('RBF Weighted SVM')\n",
"\n",
"plot_confusion_matrix(sclf, data_test, labels_test, ax=ax[1][1], **args)\n",
"ax[1][1].set_title('Sigmoid Weighted SVM')\n",
"\n",
"fig.tight_layout()"
2021-02-13 11:53:15 +00:00
]
},
2021-02-04 13:34:25 +00:00
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"## Other Tests\n",
"\n",
"Take a handful of other tracks which I don't listen to and aren't in any playlists to see if they can also be classified"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 19,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
2021-02-04 13:34:25 +00:00
"text": [
"Top Of The World (Five Finger Death Punch) could be ROCK ✓\n",
"Aston Martin Music (Rick Ross) could be ALL RAP ✗\n",
"On The Sunny Side Of The Street (Dizzy Gillespie) could be JAZZ ✓\n",
"Vibez (ZAYN) could be ALL RAP ✗\n",
"Shot In The Dark (AC/DC) could be ROCK ✓\n",
"To Hell and Back (Sabaton) could be ROCK ✗\n",
"Withstand The Fall Of Time (Immortal) could be METAL ✓\n",
"Alone Together - Rudy Van Gelder Remaster (Kenny Dorham) could be JAZZ ✓\n",
"Feel No Ways (Drake) could be ALL RAP ✗\n",
"BO$$ (Fifth Harmony) could be EDM ✗\n",
"\n",
"50.00% Accurate\n"
2021-02-04 13:34:25 +00:00
]
}
],
"source": [
"### PREPARE ###\n",
"test_uris = [\"spotify:track:53yqxU2EKKzbuQZEUEVtxc\",\n",
" \"spotify:track:5W7xC99N2Zzfh69r7I7zWK\",\n",
" \"spotify:track:38R2EViAkYOFG8ZkG3GLtW\",\n",
" \"spotify:track:6T6D9CIrHkALcHPafDFA6L\",\n",
" \"spotify:track:0sfdiwck2xr4PteGOdyOfz\",\n",
" \"spotify:track:1BrgjqSg9du0lj3TUMLluL\",\n",
" \"spotify:track:5nCnSnLtotQ8eB4E189U91\",\n",
" \"spotify:track:3GOZbK2epuHzCt5YvvVFHO\",\n",
" \"spotify:track:3cjF2OFRmip8spwZYQRKxP\",\n",
" \"spotify:track:1COvXs6jaykXC73h9OSBVM\"]\n",
"# inferring what playlists these would go in\n",
"test_labels = [\"ROCK\", \"RAP\", \"JAZZ\", \"POP\", \"ROCK\", \"METAL\", \"METAL\", \"JAZZ\", \"RAP\", \"POP\"] \n",
"\n",
"test_tracks = spotnet.tracks(uris=test_uris)\n",
"spotnet.populate_track_audio_features(tracks=test_tracks)\n",
"\n",
"test_features = [ {j: k for j, k in i.audio_features.to_dict().items() \n",
" if j in headers} \n",
" for i in test_tracks] # filter down to descriptor columns\n",
"\n",
"### PREDICT ###\n",
"predictable_frame = pd.DataFrame(test_features)\n",
"\n",
"predicted_labels = clf.predict(predictable_frame)\n",
2021-02-13 11:53:15 +00:00
"# predicted_labels = wclf.predict(predictable_frame)\n",
2021-02-04 13:34:25 +00:00
"labels_correct = [i == playlist_names[predicted_labels[idx]] for idx, i in enumerate(test_labels)]\n",
"\n",
"### EVALUATE ###\n",
"for track, label, correct in zip(test_tracks, predicted_labels, labels_correct):\n",
" print(f'{track.name} ({track.artists[0].name}) could be {playlist_names[label]} {\"✓\" if correct else \"✗\"}')\n",
"\n",
"correct = sum(labels_correct) / len(labels_correct)\n",
"print(f'\\n{correct*100:.2f}% Accurate')"
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"# Imports & Setup"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
2021-02-13 11:53:15 +00:00
"execution_count": 2,
2021-02-04 13:34:25 +00:00
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"from google.cloud import bigquery\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib as mpl\n",
"mpl.rcParams['figure.dpi'] = 120\n",
"\n",
"from analysis.net import get_spotnet, get_playlist, track_frame\n",
"from analysis.query import *\n",
"from analysis import spotify_descriptor_headers, float_headers, days_since\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from sklearn import svm\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import plot_confusion_matrix\n",
"\n",
"client = bigquery.Client()\n",
"spotnet = get_spotnet()\n",
"cache = 'query.csv'\n",
"first_day = datetime(year=2017, month=11, day=3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"## Read Scrobble Frame"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"scrobbles = get_query(cache=cache)"
]
},
{
"cell_type": "markdown",
"metadata": {},
2021-02-04 13:34:25 +00:00
"source": [
"## Write Scrobble Frame"
]
2021-02-04 13:34:25 +00:00
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"scrobbles.reset_index().to_csv(cache, sep='\\t')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.4"
2021-02-04 13:34:25 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 4
}