neural net updates, better pandas

2021-05-10 00:18:57 +01:00 · 2021-05-10 00:18:57 +01:00 · a33060c0d3
commit a33060c0d3
parent ac81f17248
8 changed files with 788 additions and 736 deletions
--- a/README.md
+++ b/README.md
@ -2,7 +2,7 @@

 Notebooks:
 * [analysis](analysis.ipynb) for a intro to the dataset and premise
-* [artist](artist.ipynb), [album](./album.ipynb) & [playlist](playlist.ipynb) investigations
+* [artist](artist.ipynb), [album](./album.ipynb), [track](./track.ipynb) & [playlist](playlist.ipynb) investigations
 * [stats](stats.ipynb) for high-level stats about the dataset (Spotify feature miss ratio)
 * [playlist SVM](./playlist-svm.ipynb) using Scikit to classify tracks using the contents of playlists as models
 * [playlist NN](./playlist-nn.ipynb) using a multi-layer perceptron to classify tracks using the contents of playlists as models
--- a/album.ipynb
+++ b/album.ipynb
--- a/analysis/nn.py
+++ b/analysis/nn.py
@ -0,0 +1,34 @@
+import tensorflow as tf
+
+def ensem_classify(models, test_data, test_labels, round_predictions=True):
+    predictions = [model(test_data.to_numpy()) for model in models]
+        
+    rounded_sum = sum(tf.math.round(pred) for pred in predictions)
+    urounded_sum = sum(predictions)
+    # round predictions to onehot vectors and sum over all ensemble models
+    # take argmax for ensemble predicted class
+
+    correct = 0 # number of correct ensemble predictions
+    correct_num_models = 0 # when correctly predicted ensembley, number of models correctly classifying
+    individual_accuracy = 0 # proportion of models correctly classifying
+    
+    classes = list()
+     # pc = predicted class, pcr = rounded predicted class, gt = ground truth
+    for pc, pcr, gt in zip(urounded_sum, rounded_sum, test_labels):
+        gt_argmax = tf.math.argmax(gt)
+
+        if round_predictions:
+            pred_val = pcr
+        else:
+            pred_val = pc
+        classes.append(tf.math.argmax(pred_val))
+            
+        correct_models = pcr[gt_argmax] / len(models) # use rounded value so will divide nicely
+        individual_accuracy += correct_models
+
+        if tf.math.argmax(pred_val) == gt_argmax: # ENSEMBLE EVALUATE HERE
+            correct += 1
+            correct_num_models += correct_models
+            
+    return classes, predictions, correct / len(test_data), correct_num_models / correct, individual_accuracy / len(test_data)
+    
--- a/artist.ipynb
+++ b/artist.ipynb
--- a/playlist-nn.ipynb
+++ b/playlist-nn.ipynb
--- a/playlist-svm.ipynb
+++ b/playlist-svm.ipynb
--- a/playlist.ipynb
+++ b/playlist.ipynb
--- a/track.ipynb
+++ b/track.ipynb