{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Playlist Neural Network\n",
"\n",
"Given a list of playlists, can unknown tracks be correctly classified?"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# playlist_names = [\"RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\", \"POP\"] # super-genres\n",
"# playlist_names = [\"ALL RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\", \"POP\"] # super-genres\n",
"# playlist_names = [\"RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\"] # super-genres without POP\n",
"# playlist_names = [\"ALL RAP\", \"EDM\", \"ROCK\", \"METAL\", \"JAZZ\"] # super-genres without POP\n",
"playlist_names = [\"ALL RAP\", \"DNB\", \"4/4\", \"cRock\", \"METAL\", \"cJazz\"] # super-genres with decomposed EDM\n",
"# playlist_names = [\"DNB\", \"HOUSE\", \"TECHNO\", \"GARAGE\", \"DUBSTEP\", \"BASS\"] # EDM playlists\n",
"# playlist_names = [\"20s rap\", \"10s rap\", \"00s rap\", \"90s rap\", \"80s rap\"] # rap decades\n",
"# playlist_names = [\"UK RAP\", \"US RAP\"] # UK/US split\n",
"# playlist_names = [\"uk rap\", \"grime\", \"drill\", \"afro bash\"] # british rap playlists\n",
"# playlist_names = [\"20s rap\", \"10s rap\", \"00s rap\", \"90s rap\", \"80s rap\", \"trap\", \"gangsta rap\", \"industrial rap\", \"weird rap\", \"jazz rap\", \"boom bap\", \"trap metal\"] # american rap playlists\n",
"# playlist_names = [\"rock\", \"indie\", \"punk\", \"pop rock\", \"bluesy rock\", \"hard rock\", \"chilled rock\", \"emo\", \"pop punk\", \"stoner rock/metal\", \"post-hardcore\", \"melodic hardcore\", \"art rock\", \"post-rock\", \"classic pop punk\", \"90s rock & grunge\", \"90s indie & britpop\", \"psych\"] # rock playlists\n",
"# playlist_names = [\"metal\", \"metalcore\", \"mathcore\", \"hardcore\", \"black metal\", \"death metal\", \"doom metal\", \"sludge metal\", \"classic metal\", \"industrial\", \"nu metal\", \"calm metal\", \"thrash metal\"] # metal playlists\n",
"\n",
"# headers = float_headers + [\"duration_ms\", \"mode\", \"loudness\", \"tempo\"]\n",
"headers = float_headers + [\"mode\", \"loudness\", \"tempo\"]\n",
"# headers = float_headers\n",
"\n",
"BALANCED_WEIGHTS = True"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pull and process playlist information.\n",
"\n",
"1. Get live playlist track information from spotify\n",
"2. Filter listening history for these tracks\n",
"\n",
"Filter out tracks without features and drop duplicates before taking only the descriptor parameters"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"playlists = [get_playlist(i, spotnet) for i in playlist_names] # 1)\n",
"\n",
"# filter playlists by join with playlist track/artist names\n",
"filtered_playlists = [pd.merge(track_frame(i.tracks), scrobbles, on=['track', 'artist']) for i in playlists] # 2)\n",
"\n",
"filtered_playlists = [i[pd.notnull(i[\"uri\"])] for i in filtered_playlists]\n",
"# distinct on uri\n",
"filtered_playlists = [i.drop_duplicates(['uri']) for i in filtered_playlists]\n",
"# select only descriptor float columns\n",
"filtered_playlists = [i[headers] for i in filtered_playlists]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Construct the dataset with associated labels before splitting into a train and test set."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.concat(filtered_playlists)\n",
"labels = [np.full(len(plst), idx) for idx, plst in enumerate(filtered_playlists)]\n",
"labels = np.concatenate(labels)\n",
"\n",
"# stratify: maintains class proportions in test and train set\n",
"data_train, data_test, labels_train, labels_test = train_test_split(dataset, labels, \n",
" test_size=0.1, \n",
"# random_state=70, \n",
" stratify=labels\n",
" )\n",
"\n",
"class_weights = class_weight.compute_class_weight('balanced',\n",
" classes=np.unique(labels_train),\n",
" y=labels_train)\n",
"class_weights = {i: j for i, j in zip(range(len(filtered_playlists)), class_weights)}\n",
"\n",
"labels_train = tf.one_hot(labels_train, len(filtered_playlists))\n",
"labels_test = tf.one_hot(labels_test, len(filtered_playlists))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def tensorboard_callback(path='tensorboard-logs', prefix=''):\n",
" return tf.keras.callbacks.TensorBoard(\n",
" log_dir=os.path.normpath(os.path.join(path, prefix + datetime.now().strftime(\"%Y%m%d-%H%M%S\"))), histogram_freq=1\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def get_model(hidden_nodes=128,\n",
" layers=2,\n",
" classes=len(filtered_playlists),\n",
" activation=lambda: 'sigmoid', \n",
" weight_init=lambda: 'glorot_uniform'):\n",
" l = [tf.keras.layers.InputLayer(input_shape=data_train.to_numpy()[0].shape, name='Input')]\n",
" \n",
" for i in range(layers):\n",
" l.append(\n",
" tf.keras.layers.Dense(hidden_nodes, \n",
" activation=activation(), \n",
" kernel_initializer=weight_init(), \n",
" name=f'Hidden{i+1}')\n",
" )\n",
" \n",
" l.append(tf.keras.layers.Dense(classes, \n",
" activation='softmax', \n",
" kernel_initializer=weight_init(), \n",
" name='Output'))\n",
" \n",
" model = tf.keras.models.Sequential(l)\n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Single Model"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"sequential_10\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"Hidden1 (Dense) (None, 64) 704 \n",
"_________________________________________________________________\n",
"Output (Dense) (None, 6) 390 \n",
"=================================================================\n",
"Total params: 1,094\n",
"Trainable params: 1,094\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = get_model(hidden_nodes=64, layers=1)\n",
"\n",
"model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), \n",
"# optimizer=tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9),\n",
" loss='categorical_crossentropy', \n",
" metrics=['accuracy'])\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"if BALANCED_WEIGHTS:\n",
" cw = class_weights\n",
"else:\n",
" cw = None\n",
"history = model.fit(data_train.to_numpy(), labels_train, \n",
" callbacks=[tensorboard_callback()], \n",
" validation_split=0.11,\n",
" verbose=0,\n",
" class_weight=cw,\n",
" epochs=50)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"