adding data, writing, added LR range for exp2
Before Width: | Height: | Size: 110 KiB After Width: | Height: | Size: 115 KiB |
BIN
graphs/exp1-test2-10-error-rate-curves.png
Normal file
After Width: | Height: | Size: 86 KiB |
BIN
graphs/exp1-test2-10-error-rate-std.png
Normal file
After Width: | Height: | Size: 91 KiB |
BIN
graphs/exp1-test2-10-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 124 KiB |
BIN
graphs/exp1-test2-10-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 104 KiB |
BIN
graphs/exp1-test2-11-error-rate-curves.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
graphs/exp1-test2-11-error-rate-std.png
Normal file
After Width: | Height: | Size: 56 KiB |
BIN
graphs/exp1-test2-11-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 92 KiB |
BIN
graphs/exp1-test2-11-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 79 KiB |
BIN
graphs/exp1-test2-12-error-rate-curves.png
Normal file
After Width: | Height: | Size: 86 KiB |
BIN
graphs/exp1-test2-12-error-rate-std.png
Normal file
After Width: | Height: | Size: 103 KiB |
BIN
graphs/exp1-test2-12-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 126 KiB |
BIN
graphs/exp1-test2-12-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 105 KiB |
BIN
graphs/exp1-test2-13-error-rate-curves.png
Normal file
After Width: | Height: | Size: 66 KiB |
BIN
graphs/exp1-test2-13-error-rate-std.png
Normal file
After Width: | Height: | Size: 85 KiB |
BIN
graphs/exp1-test2-13-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
graphs/exp1-test2-13-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 97 KiB |
BIN
graphs/exp1-test2-14-error-rate-curves.png
Normal file
After Width: | Height: | Size: 89 KiB |
BIN
graphs/exp1-test2-14-error-rate-std.png
Normal file
After Width: | Height: | Size: 98 KiB |
BIN
graphs/exp1-test2-14-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 122 KiB |
BIN
graphs/exp1-test2-14-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 107 KiB |
BIN
graphs/exp1-test2-4-error-rate-curves.png
Normal file
After Width: | Height: | Size: 71 KiB |
BIN
graphs/exp1-test2-4-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 107 KiB |
BIN
graphs/exp1-test2-4-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 96 KiB |
BIN
graphs/exp1-test2-5-error-rate-curves.png
Normal file
After Width: | Height: | Size: 108 KiB |
BIN
graphs/exp1-test2-5-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 133 KiB |
BIN
graphs/exp1-test2-5-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 109 KiB |
BIN
graphs/exp1-test2-6-error-rate-curves.png
Normal file
After Width: | Height: | Size: 60 KiB |
BIN
graphs/exp1-test2-6-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
graphs/exp1-test2-6-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 87 KiB |
BIN
graphs/exp1-test2-7-error-rate-curves.png
Normal file
After Width: | Height: | Size: 84 KiB |
BIN
graphs/exp1-test2-7-error-rate-std.png
Normal file
After Width: | Height: | Size: 93 KiB |
BIN
graphs/exp1-test2-7-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 128 KiB |
BIN
graphs/exp1-test2-7-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 112 KiB |
BIN
graphs/exp1-test2-8-error-rate-curves.png
Normal file
After Width: | Height: | Size: 54 KiB |
BIN
graphs/exp1-test2-8-error-rate-std.png
Normal file
After Width: | Height: | Size: 62 KiB |
BIN
graphs/exp1-test2-8-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 104 KiB |
BIN
graphs/exp1-test2-8-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 80 KiB |
BIN
graphs/exp1-test2-9-error-rate-curves.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
graphs/exp1-test2-9-error-rate-std.png
Normal file
After Width: | Height: | Size: 53 KiB |
BIN
graphs/exp1-test2-9-test-train-error-rate-std.png
Normal file
After Width: | Height: | Size: 95 KiB |
BIN
graphs/exp1-test2-9-test-train-error-rate.png
Normal file
After Width: | Height: | Size: 74 KiB |
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 94 KiB |
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 67 KiB |
Before Width: | Height: | Size: 56 KiB After Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 86 KiB |
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 83 KiB |
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 95 KiB |
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 59 KiB |
Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 68 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 68 KiB |
183
nncw.ipynb
349
nncw.py
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
|
|
||||||
# In[1]:
|
# In[2]:
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -32,7 +32,7 @@ fig_dpi = 70
|
|||||||
#
|
#
|
||||||
# Read CSVs dumped from MatLab and parse into Pandas DataFrames
|
# Read CSVs dumped from MatLab and parse into Pandas DataFrames
|
||||||
|
|
||||||
# In[2]:
|
# In[3]:
|
||||||
|
|
||||||
|
|
||||||
data = pd.read_csv('features.csv', header=None).T
|
data = pd.read_csv('features.csv', header=None).T
|
||||||
@ -62,17 +62,19 @@ labels.astype(bool).sum(axis=0)
|
|||||||
#
|
#
|
||||||
# Using a 50/50 split
|
# Using a 50/50 split
|
||||||
|
|
||||||
# In[3]:
|
# In[4]:
|
||||||
|
|
||||||
|
|
||||||
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
|
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5
|
||||||
|
# , stratify=labels
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ## Generate & Retrieve Model
|
# ## Generate & Retrieve Model
|
||||||
#
|
#
|
||||||
# Get a shallow model with a single hidden layer of varying nodes
|
# Get a shallow model with a single hidden layer of varying nodes
|
||||||
|
|
||||||
# In[4]:
|
# In[5]:
|
||||||
|
|
||||||
|
|
||||||
def get_model(hidden_nodes=9, activation=lambda: 'sigmoid', weight_init=lambda: 'glorot_uniform'):
|
def get_model(hidden_nodes=9, activation=lambda: 'sigmoid', weight_init=lambda: 'glorot_uniform'):
|
||||||
@ -86,12 +88,13 @@ def get_model(hidden_nodes=9, activation=lambda: 'sigmoid', weight_init=lambda:
|
|||||||
|
|
||||||
# Get a Keras Tensorboard callback for dumping data for later analysis
|
# Get a Keras Tensorboard callback for dumping data for later analysis
|
||||||
|
|
||||||
# In[5]:
|
# In[6]:
|
||||||
|
|
||||||
|
|
||||||
def tensorboard_callback(path='tensorboard-logs', prefix=''):
|
def tensorboard_callback(path='tensorboard-logs', prefix=''):
|
||||||
return tf.keras.callbacks.TensorBoard(log_dir=os.path.normpath(os.path.join(path, prefix + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
|
return tf.keras.callbacks.TensorBoard(
|
||||||
histogram_freq=1)
|
log_dir=os.path.normpath(os.path.join(path, prefix + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))), histogram_freq=1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# # Example Training
|
# # Example Training
|
||||||
@ -144,7 +147,7 @@ model.metrics[1].result()
|
|||||||
# (Hint2: as epochs increases you can expect the test error rate to reach a minimum and then start increasing, you may need to set the stopping criteria to achieve the desired number of epochs - Hint 3: to find classification error rates for train and test set, you need to check the code from E2, to determine how you may obtain the train and test set patterns)
|
# (Hint2: as epochs increases you can expect the test error rate to reach a minimum and then start increasing, you may need to set the stopping criteria to achieve the desired number of epochs - Hint 3: to find classification error rates for train and test set, you need to check the code from E2, to determine how you may obtain the train and test set patterns)
|
||||||
#
|
#
|
||||||
|
|
||||||
# In[16]:
|
# In[194]:
|
||||||
|
|
||||||
|
|
||||||
# hidden_nodes = [2, 8, 16, 24, 32]
|
# hidden_nodes = [2, 8, 16, 24, 32]
|
||||||
@ -156,6 +159,7 @@ def evaluate_parameters(hidden_nodes=hidden_nodes,
|
|||||||
epochs=epochs,
|
epochs=epochs,
|
||||||
batch_size=128,
|
batch_size=128,
|
||||||
optimizer=lambda: 'sgd',
|
optimizer=lambda: 'sgd',
|
||||||
|
weight_init=lambda: 'glorot_uniform',
|
||||||
loss=lambda: 'categorical_crossentropy',
|
loss=lambda: 'categorical_crossentropy',
|
||||||
metrics=['accuracy'],
|
metrics=['accuracy'],
|
||||||
callbacks=None,
|
callbacks=None,
|
||||||
@ -176,7 +180,7 @@ def evaluate_parameters(hidden_nodes=hidden_nodes,
|
|||||||
if print_params:
|
if print_params:
|
||||||
print(f"Nodes: {hn}, Epochs: {e}")
|
print(f"Nodes: {hn}, Epochs: {e}")
|
||||||
|
|
||||||
model = get_model(hn)
|
model = get_model(hn, weight_init=weight_init)
|
||||||
model.compile(
|
model.compile(
|
||||||
optimizer=optimizer(),
|
optimizer=optimizer(),
|
||||||
loss=loss(),
|
loss=loss(),
|
||||||
@ -266,7 +270,7 @@ ax.set_title("Training vs Validation Accuracy")
|
|||||||
ax.plot(single_history['accuracy'], label="train", lw=2)
|
ax.plot(single_history['accuracy'], label="train", lw=2)
|
||||||
ax.plot(single_history['val_accuracy'], label="validation", lw=2, c=(1,0,0))
|
ax.plot(single_history['val_accuracy'], label="validation", lw=2, c=(1,0,0))
|
||||||
ax.set_xlabel("Epochs")
|
ax.set_xlabel("Epochs")
|
||||||
ax.set_ylim(0, 1)
|
# ax.set_ylim(0, 1)
|
||||||
ax.grid()
|
ax.grid()
|
||||||
ax.legend()
|
ax.legend()
|
||||||
|
|
||||||
@ -296,7 +300,7 @@ ax.set_xlabel('Epochs')
|
|||||||
ax.set_ylabel('Hidden Nodes')
|
ax.set_ylabel('Hidden Nodes')
|
||||||
ax.set_zlabel('Accuracy')
|
ax.set_zlabel('Accuracy')
|
||||||
ax.view_init(30, -110)
|
ax.view_init(30, -110)
|
||||||
ax.set_zlim([0, 1])
|
# ax.set_zlim([0, 1])
|
||||||
fig.colorbar(surf, shrink=0.3, aspect=6)
|
fig.colorbar(surf, shrink=0.3, aspect=6)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
@ -348,31 +352,42 @@ plt.show()
|
|||||||
#
|
#
|
||||||
# ### Pickle Results
|
# ### Pickle Results
|
||||||
#
|
#
|
||||||
# | test | learning rate | momentum | batch size | hidden nodes | epochs |
|
# | test | learning rate | momentum | batch size | hidden nodes | epochs | statified |
|
||||||
# | --- | --- | --- | --- | --- | --- |
|
# | --- | --- | --- | --- | --- | --- | --- |
|
||||||
# |1|0.01|0|128|2, 8, 12, 16, 24, 32, 64, 128, 256|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
|
# |1|0.01|0|128|2, 8, 12, 16, 24, 32, 64, 128, 256|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
|
||||||
# |2|0.5|0.1|128|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100|
|
# |2|0.5|0.1|128|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100| |
|
||||||
# |3|1|0.3|20|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100|
|
# |3|1|0.3|20|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100| |
|
||||||
# |4|0.6|0.1|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
|
# |4|0.6|0.1|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
|
||||||
# |5|0.05|0.01|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
|
# |5|0.05|0.01|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
|
||||||
# |6|1.5|0.5|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
|
# |6|1.5|0.5|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
|
||||||
|
# |2-1|0.01|0|35|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| n |
|
||||||
|
# |2-2|0.1|0|35|2, 16, 32|1, 2, 4, 8, 16, 32, 64, 100| n |
|
||||||
|
# |2-3|0.15|0|35|2, 16, 32|1, 2, 4, 8, 16, 32, 64, 100| n |
|
||||||
|
# |2-4|0.08|0.9|35|1, 2, 8, 16, 32, 64|1, 2, 4, 8, 16, 32, 64, 100| n |
|
||||||
|
# |2-5|0.08|0.2|35|1, 2, 8, 16, 32, 64|1, 2, 4, 8, 16, 32, 64, 100| n |
|
||||||
|
# |2-6|0.01|0.1|35|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| n |
|
||||||
|
|
||||||
# In[30]:
|
# In[214]:
|
||||||
|
|
||||||
|
|
||||||
multi_param_results = list()
|
multi_param_results = list()
|
||||||
multi_iterations = 30
|
multi_iterations = 30
|
||||||
for i in range(multi_iterations):
|
for i in range(multi_iterations):
|
||||||
print(f"Iteration {i+1}/{multi_iterations}")
|
print(f"Iteration {i+1}/{multi_iterations}")
|
||||||
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
|
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5
|
||||||
|
# , stratify=labels
|
||||||
|
)
|
||||||
multi_param_results.append(list(evaluate_parameters(dtrain=data_train,
|
multi_param_results.append(list(evaluate_parameters(dtrain=data_train,
|
||||||
dtest=data_test,
|
dtest=data_test,
|
||||||
ltrain=labels_train,
|
ltrain=labels_train,
|
||||||
ltest=labels_test,
|
ltest=labels_test,
|
||||||
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=1.5, momentum=0.5),
|
hidden_nodes=[2, 16, 32],
|
||||||
|
epochs=[1, 2, 4, 8, 16, 32, 64, 100],
|
||||||
|
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.15, momentum=0.0),
|
||||||
|
weight_init=lambda: 'random_uniform',
|
||||||
return_model=False,
|
return_model=False,
|
||||||
print_params=False,
|
print_params=False,
|
||||||
batch_size=20)))
|
batch_size=35)))
|
||||||
|
|
||||||
|
|
||||||
# ### Accuracy Tensor
|
# ### Accuracy Tensor
|
||||||
@ -381,7 +396,7 @@ for i in range(multi_iterations):
|
|||||||
#
|
#
|
||||||
# (Iterations x [Test/Train] x Number of nodes x Number of epochs)
|
# (Iterations x [Test/Train] x Number of nodes x Number of epochs)
|
||||||
|
|
||||||
# In[173]:
|
# In[301]:
|
||||||
|
|
||||||
|
|
||||||
multi_param_epochs = sorted(list({i["epochs"] for i in multi_param_results[0]}))
|
multi_param_epochs = sorted(list({i["epochs"] for i in multi_param_results[0]}))
|
||||||
@ -405,20 +420,32 @@ print(f'Epochs: {multi_param_epochs}')
|
|||||||
print()
|
print()
|
||||||
print(f'Loss: {multi_param_results[0][0]["loss"]}')
|
print(f'Loss: {multi_param_results[0][0]["loss"]}')
|
||||||
print(f'LR: {multi_param_results[0][0]["optimizer"]["learning_rate"]:.3}')
|
print(f'LR: {multi_param_results[0][0]["optimizer"]["learning_rate"]:.3}')
|
||||||
print(f'Momentum: {multi_param_results[0][0]["optimizer"]["momentum"]:.3}')
|
print(f'Momentum: {multi_param_results[0][0]["optimizer"]["momentum"]:}')
|
||||||
|
|
||||||
|
|
||||||
# #### Export/Import Test Sets
|
# #### Export/Import Test Sets
|
||||||
#
|
#
|
||||||
# Export mean and standard deviations for retrieval and visualisation
|
# Export mean and standard deviations for retrieval and visualisation
|
||||||
pickle.dump(multi_param_results, open("result.p", "wb"))exp1_testname = 'exp1-test5'
|
|
||||||
multi_param_results = pickle.load(open(f"results/{exp1_testname}.p", "rb"))np.savetxt("exp1-mean.csv", mean_param_accuracy, delimiter=',')
|
# In[215]:
|
||||||
|
|
||||||
|
|
||||||
|
pickle.dump(multi_param_results, open("results/exp1-test2-3.p", "wb"))
|
||||||
|
|
||||||
|
|
||||||
|
# In[300]:
|
||||||
|
|
||||||
|
|
||||||
|
exp1_testname = 'exp1-test1'
|
||||||
|
multi_param_results = pickle.load(open(f"results/{exp1_testname}.p", "rb"))
|
||||||
|
|
||||||
|
np.savetxt("exp1-mean.csv", mean_param_accuracy, delimiter=',')
|
||||||
np.savetxt("exp1-std.csv", std_param_accuracy, delimiter=',')mean_param_accuracy = np.loadtxt("results/test1-exp1-mean.csv", delimiter=',')
|
np.savetxt("exp1-std.csv", std_param_accuracy, delimiter=',')mean_param_accuracy = np.loadtxt("results/test1-exp1-mean.csv", delimiter=',')
|
||||||
std_param_accuracy = np.loadtxt("results/test1-exp1-std.csv", delimiter=',')
|
std_param_accuracy = np.loadtxt("results/test1-exp1-std.csv", delimiter=',')
|
||||||
# multi_iterations = 30
|
# multi_iterations = 30
|
||||||
# ### Best Results
|
# ### Best Results
|
||||||
|
|
||||||
# In[141]:
|
# In[302]:
|
||||||
|
|
||||||
|
|
||||||
best_param_accuracy_idx = np.unravel_index(np.argmax(mean_param_accuracy[0, :, :]), mean_param_accuracy.shape)
|
best_param_accuracy_idx = np.unravel_index(np.argmax(mean_param_accuracy[0, :, :]), mean_param_accuracy.shape)
|
||||||
@ -431,7 +458,7 @@ print(f'Nodes: {best_param_accuracy_nodes}, Epochs: {best_param_accuracy_epochs}
|
|||||||
|
|
||||||
# ### Test Accuracy Surface
|
# ### Test Accuracy Surface
|
||||||
|
|
||||||
# In[174]:
|
# In[303]:
|
||||||
|
|
||||||
|
|
||||||
X, Y = np.meshgrid(multi_param_epochs, multi_param_nodes)
|
X, Y = np.meshgrid(multi_param_epochs, multi_param_nodes)
|
||||||
@ -447,7 +474,7 @@ ax.set_xlabel('Epochs')
|
|||||||
ax.set_ylabel('Hidden Nodes')
|
ax.set_ylabel('Hidden Nodes')
|
||||||
ax.set_zlabel('Accuracy')
|
ax.set_zlabel('Accuracy')
|
||||||
ax.view_init(30, -110)
|
ax.view_init(30, -110)
|
||||||
ax.set_zlim([0, 1])
|
# ax.set_zlim([0, 1])
|
||||||
fig.colorbar(surf, shrink=0.3, aspect=6)
|
fig.colorbar(surf, shrink=0.3, aspect=6)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
@ -457,15 +484,15 @@ plt.show()
|
|||||||
|
|
||||||
# ### Test Error Rate Curves
|
# ### Test Error Rate Curves
|
||||||
|
|
||||||
# In[175]:
|
# In[313]:
|
||||||
|
|
||||||
|
|
||||||
# fig = plt.figure(figsize=(7, 5))
|
fig = plt.figure(figsize=(6, 5))
|
||||||
fig = plt.figure()
|
# fig = plt.figure()
|
||||||
fig.set_dpi(fig_dpi)
|
fig.set_dpi(fig_dpi)
|
||||||
|
|
||||||
for idx, layer in enumerate(mean_param_accuracy[0, :, :]):
|
for idx, (layer, std) in enumerate(zip(mean_param_accuracy[0, :, :], std_param_accuracy[0, :, :])):
|
||||||
# plt.errorbar(epochs, 1- layer, yerr=std_param_accuracy[idx], label=f'{hidden_nodes[idx]} Nodes')
|
# plt.errorbar(multi_param_epochs, 1 - layer, yerr=std, capsize=4, label=f'{multi_param_nodes[idx]} Nodes')
|
||||||
plt.plot(multi_param_epochs, 1 - layer, '-', label=f'{multi_param_nodes[idx]} Nodes', lw=2)
|
plt.plot(multi_param_epochs, 1 - layer, '-', label=f'{multi_param_nodes[idx]} Nodes', lw=2)
|
||||||
|
|
||||||
plt.legend()
|
plt.legend()
|
||||||
@ -473,19 +500,19 @@ plt.grid()
|
|||||||
plt.title(f"Test error rates for different epochs and hidden nodes")
|
plt.title(f"Test error rates for different epochs and hidden nodes")
|
||||||
plt.xlabel("Epochs")
|
plt.xlabel("Epochs")
|
||||||
plt.ylabel("Error Rate")
|
plt.ylabel("Error Rate")
|
||||||
plt.ylim(0)
|
plt.ylim(0, 0.6)
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
# plt.savefig(f'graphs/{exp1_testname}-error-rate-curves.png')
|
plt.savefig(f'graphs/{exp1_testname}-error-rate-curves.png')
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
# ### Test/Train Error Over Nodes
|
# ### Test/Train Error Over Nodes
|
||||||
|
|
||||||
# In[169]:
|
# In[314]:
|
||||||
|
|
||||||
|
|
||||||
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(8, 8*math.ceil(len(multi_param_nodes) / 2)/3))
|
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(6, 6*math.ceil(len(multi_param_nodes) / 2)/3))
|
||||||
fig.set_dpi(fig_dpi)
|
fig.set_dpi(fig_dpi)
|
||||||
|
|
||||||
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
||||||
@ -499,13 +526,13 @@ for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
|||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
# fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate.png')
|
fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate.png')
|
||||||
|
|
||||||
|
|
||||||
# In[170]:
|
# In[315]:
|
||||||
|
|
||||||
|
|
||||||
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(8, 8*math.ceil(len(multi_param_nodes) / 2)/3))
|
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(6, 6*math.ceil(len(multi_param_nodes) / 2)/3))
|
||||||
fig.set_dpi(fig_dpi)
|
fig.set_dpi(fig_dpi)
|
||||||
|
|
||||||
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
||||||
@ -517,7 +544,7 @@ for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
|
|||||||
ax.grid()
|
ax.grid()
|
||||||
|
|
||||||
fig.tight_layout()
|
fig.tight_layout()
|
||||||
# fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate-std.png')
|
fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate-std.png')
|
||||||
|
|
||||||
|
|
||||||
# # Experiment 2
|
# # Experiment 2
|
||||||
@ -527,7 +554,7 @@ fig.tight_layout()
|
|||||||
# (Hint4: to implement majority vote you need to determine the predicted class labels -probably easier to implement yourself rather than use the ensemble matlab functions)
|
# (Hint4: to implement majority vote you need to determine the predicted class labels -probably easier to implement yourself rather than use the ensemble matlab functions)
|
||||||
#
|
#
|
||||||
|
|
||||||
# In[6]:
|
# In[113]:
|
||||||
|
|
||||||
|
|
||||||
num_models=[1, 3, 9, 15, 25]
|
num_models=[1, 3, 9, 15, 25]
|
||||||
@ -559,7 +586,29 @@ def evaluate_ensemble_vote(hidden_nodes=16,
|
|||||||
if print_params:
|
if print_params:
|
||||||
print(f"Models: {m}")
|
print(f"Models: {m}")
|
||||||
|
|
||||||
|
response = {"epochs": list(),
|
||||||
|
"num_models": m}
|
||||||
|
|
||||||
|
###################
|
||||||
|
## GET MODELS
|
||||||
|
###################
|
||||||
|
if isinstance(hidden_nodes, tuple): # for range of hidden nodes, calculate value per model
|
||||||
|
if m == 1:
|
||||||
|
models = [get_model(int(np.mean(hidden_nodes)), weight_init=weight_init)]
|
||||||
|
response["nodes"] = [int(np.mean(hidden_nodes))]
|
||||||
|
|
||||||
|
else:
|
||||||
|
models = [get_model(int(i), weight_init=weight_init)
|
||||||
|
for i in np.linspace(*hidden_nodes, num=m)]
|
||||||
|
response["nodes"] = [int(i) for i in np.linspace(*hidden_nodes, num=m)]
|
||||||
|
|
||||||
|
elif hidden_nodes == 'm':
|
||||||
|
models = [get_model(i+1, weight_init=weight_init) for i in range(m)]
|
||||||
|
response["nodes"] = [i+1 for i in range(m)]
|
||||||
|
else: # not a range of epochs, just set to given value
|
||||||
models = [get_model(hidden_nodes, weight_init=weight_init) for _ in range(m)]
|
models = [get_model(hidden_nodes, weight_init=weight_init) for _ in range(m)]
|
||||||
|
response["nodes"] = hidden_nodes
|
||||||
|
|
||||||
for model in models:
|
for model in models:
|
||||||
model.compile(
|
model.compile(
|
||||||
optimizer=optimizer(),
|
optimizer=optimizer(),
|
||||||
@ -567,12 +616,6 @@ def evaluate_ensemble_vote(hidden_nodes=16,
|
|||||||
metrics=metrics
|
metrics=metrics
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
response = {"nodes": hidden_nodes,
|
|
||||||
"epochs": list(),
|
|
||||||
"num_models": m}
|
|
||||||
|
|
||||||
if tboard:
|
if tboard:
|
||||||
if callbacks is not None:
|
if callbacks is not None:
|
||||||
cb = [i() for i in callbacks] + [tensorboard_callback(prefix=f'exp{exp}-{m}-')]
|
cb = [i() for i in callbacks] + [tensorboard_callback(prefix=f'exp{exp}-{m}-')]
|
||||||
@ -584,13 +627,13 @@ def evaluate_ensemble_vote(hidden_nodes=16,
|
|||||||
###################
|
###################
|
||||||
histories = list()
|
histories = list()
|
||||||
for idx, model in enumerate(models):
|
for idx, model in enumerate(models):
|
||||||
if isinstance(epochs, tuple):
|
if isinstance(epochs, tuple): # for range of epochs, calculate value per model
|
||||||
if m == 1:
|
if m == 1:
|
||||||
e = (epochs[0] + epochs[1]) / 2 # average, not lower bound if single model
|
e = np.mean(epochs) # average, not lower bound if single model
|
||||||
else:
|
else:
|
||||||
e = np.linspace(epochs[0], epochs[1], num=m)[idx]
|
e = np.linspace(*epochs, num=m)[idx]
|
||||||
e = int(e)
|
e = int(e)
|
||||||
else:
|
else: # not a range of epochs, just set to given value
|
||||||
e = epochs
|
e = epochs
|
||||||
|
|
||||||
# print(m, e) # debug
|
# print(m, e) # debug
|
||||||
@ -669,15 +712,19 @@ def evaluate_ensemble_vote(hidden_nodes=16,
|
|||||||
# ## Single Iteration
|
# ## Single Iteration
|
||||||
# Run a single iteration of ensemble model investigations
|
# Run a single iteration of ensemble model investigations
|
||||||
|
|
||||||
# In[11]:
|
# In[224]:
|
||||||
|
|
||||||
|
|
||||||
single_ensem_results = list()
|
single_ensem_results = list()
|
||||||
for test in evaluate_ensemble_vote(epochs=(5, 300), optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
|
# for test in evaluate_ensemble_vote(epochs=(5, 300), optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
|
||||||
|
for test in evaluate_ensemble_vote(hidden_nodes=(1, 400),
|
||||||
|
epochs=20,
|
||||||
|
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
|
||||||
single_ensem_results.append(test)
|
single_ensem_results.append(test)
|
||||||
|
print(test["nodes"], test["epochs"])
|
||||||
|
|
||||||
|
|
||||||
# In[16]:
|
# In[225]:
|
||||||
|
|
||||||
|
|
||||||
fig = plt.figure(figsize=(8, 5))
|
fig = plt.figure(figsize=(8, 5))
|
||||||
@ -685,15 +732,15 @@ fig.set_dpi(fig_dpi)
|
|||||||
|
|
||||||
ensem_x = [i["num_models"] for i in single_ensem_results]
|
ensem_x = [i["num_models"] for i in single_ensem_results]
|
||||||
|
|
||||||
plt.plot(ensem_x, [i["accuracy"] for i in single_ensem_results], 'x-', label='Ensemble Accuracy')
|
plt.plot(ensem_x, 1 - np.array([i["accuracy"] for i in single_ensem_results]), 'x-', label='Ensemble Test')
|
||||||
plt.plot(ensem_x, [i["individual_accuracy"] for i in single_ensem_results], 'x-', label='Individual Accuracy')
|
plt.plot(ensem_x, 1 - np.array([i["individual_accuracy"] for i in single_ensem_results]), 'x-', label='Individual Test')
|
||||||
plt.plot(ensem_x, [i["agreement"] for i in single_ensem_results], 'x-', label='Agreement')
|
plt.plot(ensem_x, 1 - np.array([i["agreement"] for i in single_ensem_results]), 'x-', label='Disagreement')
|
||||||
|
|
||||||
plt.title("Test Accuracy for Horizontal Model Ensembles")
|
plt.title("Test Error Rates for Horizontal Model Ensembles")
|
||||||
plt.ylim(0, 1)
|
plt.ylim(0)
|
||||||
plt.grid()
|
plt.grid()
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.ylabel("Accuracy")
|
plt.ylabel("Error Rate")
|
||||||
plt.xlabel("Number of Models")
|
plt.xlabel("Number of Models")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
@ -710,24 +757,58 @@ plt.show()
|
|||||||
#
|
#
|
||||||
# ### Pickle Results
|
# ### Pickle Results
|
||||||
#
|
#
|
||||||
# | test | learning rate | momentum | batch size | hidden nodes | epochs | models |
|
# | test | learning rate | momentum | batch size | hidden nodes | epochs | models | stratify |
|
||||||
# | --- | --- | --- | --- | --- | --- | --- |
|
# | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
# |3|0.06|0.05|35|16|1 - 300|1, 3, 9, 15, 25|
|
# |3|0.06|0.05|35|16|1 - 300|1, 3, 9, 15, 25| |
|
||||||
|
# |4|0.06|0.05|35|1 - 50|50|1, 3, 9, 15, 25| |
|
||||||
|
# |5|0.06|0.05|35|1 - 300|50|1, 3, 9, 15, 25| |
|
||||||
|
# |6|0.001|0.01|35|1 - 400|50|1, 3, 9, 15, 25| |
|
||||||
|
# |7|0.01|0.01|35|1 - 400|30 - 150|1, 3, 9, 15, 25| |
|
||||||
|
# |8|0.03|0.01|35|1 - 400|5 - 100|1, 3, 9, 15, 25| |
|
||||||
|
# |9|0.1|0.01|35|1 - 400|20|1, 3, 9, 15, 25| |
|
||||||
|
# |10|0.15|0.01|35|1 - 400|20|1, 3, 9, 15, 25, 35, 45| |
|
||||||
|
# |11|0.15|0.01|35|1 - 400|10|1, 3, 9, 15, 25, 35, 45| |
|
||||||
|
# |12|0.02|0.01|35|m|50|1, 3, 9, 15, 25, 35, 45| |
|
||||||
|
# |13|0.01 exp 0.98, 1|0.01|35|1 - 200|50|1, 3, 9, 15, 25, 35, 45| n |
|
||||||
|
# |14|0.01|0.01|35|1 - 200|50|1, 3, 9, 15, 25, 35, 45| n |
|
||||||
|
# |15|0.01|0.9|35|50 - 100|50|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
|
||||||
|
# |16|0.01|0.1|35|50 - 100|50|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
|
||||||
|
# |17|0.1|0.1|35|50 - 100|50 - 100|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
|
||||||
|
|
||||||
# In[24]:
|
# In[335]:
|
||||||
|
|
||||||
|
|
||||||
|
batch_size=35
|
||||||
|
test_size=0.5
|
||||||
|
epochs=50
|
||||||
|
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(0.01,
|
||||||
|
decay_steps=1,
|
||||||
|
decay_rate=0.98)
|
||||||
|
|
||||||
|
plt.plot(range(epochs+1), [lr_schedule(i) for i in range(epochs+1)])
|
||||||
|
plt.grid()
|
||||||
|
plt.ylim(0)
|
||||||
|
plt.xlabel('Epochs')
|
||||||
|
plt.ylabel('Learning Rate')
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
# In[357]:
|
||||||
|
|
||||||
|
|
||||||
multi_ensem_results = list()
|
multi_ensem_results = list()
|
||||||
multi_ensem_iterations = 2
|
multi_ensem_iterations = 30
|
||||||
for i in range(multi_ensem_iterations):
|
for i in range(multi_ensem_iterations):
|
||||||
print(f"Iteration {i+1}/{multi_ensem_iterations}")
|
print(f"Iteration {i+1}/{multi_ensem_iterations}")
|
||||||
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
|
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=test_size,
|
||||||
multi_ensem_results.append(list(evaluate_ensemble_vote(epochs=(1, 100),
|
# stratify=labels
|
||||||
hidden_nodes=16,
|
)
|
||||||
nmodels=[1, 3, 7, 11, 15],
|
multi_ensem_results.append(list(evaluate_ensemble_vote(epochs=(50, 100),
|
||||||
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.01),
|
hidden_nodes=(50, 100),
|
||||||
|
nmodels=[1, 3, 5, 7, 9, 15, 25, 35, 45],
|
||||||
|
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.1),
|
||||||
weight_init=lambda: 'random_uniform',
|
weight_init=lambda: 'random_uniform',
|
||||||
batch_size=35,
|
batch_size=batch_size,
|
||||||
dtrain=data_train,
|
dtrain=data_train,
|
||||||
dtest=data_test,
|
dtest=data_test,
|
||||||
ltrain=labels_train,
|
ltrain=labels_train,
|
||||||
@ -748,7 +829,17 @@ for i in range(multi_ensem_iterations):
|
|||||||
# 2. Individual Accuracy
|
# 2. Individual Accuracy
|
||||||
# 3. Agreement
|
# 3. Agreement
|
||||||
|
|
||||||
# In[25]:
|
# In[322]:
|
||||||
|
|
||||||
|
|
||||||
|
def test_tensor_data(test):
|
||||||
|
return [test["accuracy"],
|
||||||
|
np.mean([i["accuracy"][-1] for i in test["history"]]), # avg train acc
|
||||||
|
test["individual_accuracy"],
|
||||||
|
test["agreement"]]
|
||||||
|
|
||||||
|
|
||||||
|
# In[362]:
|
||||||
|
|
||||||
|
|
||||||
multi_ensem_models = sorted(list({i["num_models"] for i in multi_ensem_results[0]}))
|
multi_ensem_models = sorted(list({i["num_models"] for i in multi_ensem_results[0]}))
|
||||||
@ -759,10 +850,7 @@ for iter_idx, iteration in enumerate(multi_ensem_results):
|
|||||||
for single_test in iteration:
|
for single_test in iteration:
|
||||||
|
|
||||||
ensem_models_idx = multi_ensem_models.index(single_test['num_models'])
|
ensem_models_idx = multi_ensem_models.index(single_test['num_models'])
|
||||||
accuracy_ensem_tensor[iter_idx, :, ensem_models_idx] = [single_test["accuracy"],
|
accuracy_ensem_tensor[iter_idx, :, ensem_models_idx] = test_tensor_data(single_test)
|
||||||
np.mean([i["accuracy"][-1] for i in single_test["history"]]),
|
|
||||||
single_test["individual_accuracy"],
|
|
||||||
single_test["agreement"]]
|
|
||||||
|
|
||||||
mean_ensem_accuracy = np.mean(accuracy_ensem_tensor, axis=0)
|
mean_ensem_accuracy = np.mean(accuracy_ensem_tensor, axis=0)
|
||||||
std_ensem_accuracy = np.std(accuracy_ensem_tensor, axis=0)
|
std_ensem_accuracy = np.std(accuracy_ensem_tensor, axis=0)
|
||||||
@ -778,18 +866,26 @@ print(f'Momentum: {multi_ensem_results[0][0]["optimizer"]["momentum"]:.3}')
|
|||||||
# #### Export/Import Test Sets
|
# #### Export/Import Test Sets
|
||||||
#
|
#
|
||||||
# Export mean and standard deviations for retrieval and visualisation
|
# Export mean and standard deviations for retrieval and visualisation
|
||||||
pickle.dump(multi_ensem_results, open("result.p", "wb"))
|
|
||||||
# In[22]:
|
# In[358]:
|
||||||
|
|
||||||
|
|
||||||
multi_ensem_results = pickle.load(open("results/exp2-test3.p", "rb"))
|
exp2_testname = 'exp2-test17'
|
||||||
|
pickle.dump(multi_ensem_results, open(f"results/{exp2_testname}.p", "wb"))
|
||||||
|
|
||||||
|
|
||||||
|
# In[349]:
|
||||||
|
|
||||||
|
|
||||||
|
exp2_testname = 'exp2-test16'
|
||||||
|
multi_ensem_results = pickle.load(open(f"results/{exp2_testname}.p", "rb"))
|
||||||
|
|
||||||
np.savetxt("exp2-mean.csv", mean_ensem_accuracy, delimiter=',')
|
np.savetxt("exp2-mean.csv", mean_ensem_accuracy, delimiter=',')
|
||||||
np.savetxt("exp2-std.csv", std_ensem_accuracy, delimiter=',')mean_ensem_accuracy = np.loadtxt("results/test1-exp2-mean.csv", delimiter=',')
|
np.savetxt("exp2-std.csv", std_ensem_accuracy, delimiter=',')mean_ensem_accuracy = np.loadtxt("results/test1-exp2-mean.csv", delimiter=',')
|
||||||
std_ensem_accuracy = np.loadtxt("results/test1-exp2-std.csv", delimiter=',')
|
std_ensem_accuracy = np.loadtxt("results/test1-exp2-std.csv", delimiter=',')
|
||||||
# ### Best Results
|
# ### Best Results
|
||||||
|
|
||||||
# In[26]:
|
# In[363]:
|
||||||
|
|
||||||
|
|
||||||
best_ensem_accuracy_idx = np.unravel_index(np.argmax(mean_ensem_accuracy[0, :]), mean_ensem_accuracy.shape)
|
best_ensem_accuracy_idx = np.unravel_index(np.argmax(mean_ensem_accuracy[0, :]), mean_ensem_accuracy.shape)
|
||||||
@ -801,10 +897,10 @@ print(f'Models: {best_ensem_accuracy_models}, {best_ensem_accuracy * 100:.3}% Ac
|
|||||||
|
|
||||||
# ### Test/Train Error Over Model Numbers
|
# ### Test/Train Error Over Model Numbers
|
||||||
|
|
||||||
# In[29]:
|
# In[364]:
|
||||||
|
|
||||||
|
|
||||||
fig = plt.figure(figsize=(8, 5))
|
fig = plt.figure(figsize=(6, 4))
|
||||||
fig.set_dpi(fig_dpi)
|
fig.set_dpi(fig_dpi)
|
||||||
|
|
||||||
# plt.plot(multi_ensem_models, 1 - mean_ensem_accuracy[0, :], 'x-', label='Ensemble Test')
|
# plt.plot(multi_ensem_models, 1 - mean_ensem_accuracy[0, :], 'x-', label='Ensemble Test')
|
||||||
@ -818,12 +914,16 @@ plt.errorbar(multi_ensem_models, 1 - mean_ensem_accuracy[1, :], yerr=std_ensem_a
|
|||||||
plt.errorbar(multi_ensem_models, 1 - mean_ensem_accuracy[3, :], yerr=std_ensem_accuracy[3, :], capsize=4, label='Disagreement')
|
plt.errorbar(multi_ensem_models, 1 - mean_ensem_accuracy[3, :], yerr=std_ensem_accuracy[3, :], capsize=4, label='Disagreement')
|
||||||
|
|
||||||
plt.title(f"Error Rate for Horizontal Ensemble Models")
|
plt.title(f"Error Rate for Horizontal Ensemble Models")
|
||||||
# plt.ylim(0, 1)
|
# plt.ylim(0, 0.2)
|
||||||
plt.ylim(0, np.max(1 - mean_ensem_accuracy + std_ensem_accuracy) + 0.05)
|
# plt.ylim(0, np.max(1 - mean_ensem_accuracy + std_ensem_accuracy) + 0.05)
|
||||||
plt.grid()
|
plt.grid()
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.xlabel("Number of Models")
|
plt.xlabel("Number of Models")
|
||||||
plt.ylabel("Error Rate")
|
plt.ylabel("Error Rate")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(f'graphs/{exp2_testname}-error-rate-curves.png')
|
||||||
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
@ -831,7 +931,7 @@ plt.show()
|
|||||||
#
|
#
|
||||||
# Repeat Exp 2) for cancer dataset with two different optimisers of your choice e.g. 'trainlm' and 'trainrp'. Comment and discuss the result and decide which is more appropriate training algorithm for the problem. In your discussion, include in your description a detailed account of how the training algorithms (optimisations) work.
|
# Repeat Exp 2) for cancer dataset with two different optimisers of your choice e.g. 'trainlm' and 'trainrp'. Comment and discuss the result and decide which is more appropriate training algorithm for the problem. In your discussion, include in your description a detailed account of how the training algorithms (optimisations) work.
|
||||||
|
|
||||||
# In[7]:
|
# In[127]:
|
||||||
|
|
||||||
|
|
||||||
def evaluate_optimisers(optimizers=[(lambda: 'sgd', 'sgd'),
|
def evaluate_optimisers(optimizers=[(lambda: 'sgd', 'sgd'),
|
||||||
@ -868,17 +968,25 @@ for test in evaluate_optimisers(epochs=(5, 300), nmodels=[1, 3, 5]):
|
|||||||
#
|
#
|
||||||
# ### Pickle Results
|
# ### Pickle Results
|
||||||
#
|
#
|
||||||
# | test | optim1 | optim2 | optim3 | batch size | hidden nodes | epochs | models |
|
# | test | optim1 | optim2 | optim3 | lr | momentum | epsilon | batch size | hidden nodes | epochs | models | stratified |
|
||||||
# | --- | --- | --- | --- | --- | --- | --- | --- |
|
# | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
# | 1 | SGD | Adam | RMSprop | 0.1 | 0.0 | 1e7 | 35 | 16 | 1 - 100 | 1, 3, 9, 15, 25 | y |
|
||||||
|
# | 2 | SGD | Adam | RMSprop | 0.05 | 0.01 | 1e7 | 35 | 16 | 1 - 100 | 1, 3, 9, 15, 25 | y |
|
||||||
|
# | 3 | SGD | Adam | RMSprop | 0.1 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | y |
|
||||||
|
# | 4 | SGD | Adam | RMSprop | 0.075 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | y |
|
||||||
|
# | 5 | SGD | Adam | RMSprop | 0.05 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | n |
|
||||||
|
# | 6 | SGD | Adam | RMSprop | 0.02 | 0.01 | 1e7 | 35 | m | 50 | 1, 3, 9, 15, 25, 35, 45 | n |
|
||||||
|
# | 7 | SGD | Adam | RMSprop | 0.1 | 0.9 | 1e-8 | 35 | 1 - 400 | 50 - 100 | 1, 3, 5, 7, 9, 15, 25 | n |
|
||||||
|
# | 8 | SGD | Adam | RMSprop | 0.05 | 0.9 | 1e-8 | 35 | 1 - 400 | 50 - 100 | 1, 3, 5, 7, 9, 15, 25 | n |
|
||||||
|
|
||||||
# In[36]:
|
# In[27]:
|
||||||
|
|
||||||
|
|
||||||
multi_optim_results = list()
|
multi_optim_results = list()
|
||||||
multi_optim_iterations = 5
|
multi_optim_iterations = 30
|
||||||
|
|
||||||
multi_optim_lr = 0.01
|
multi_optim_lr = 0.05
|
||||||
multi_optim_mom = 0.0
|
multi_optim_mom = 0.01
|
||||||
multi_optim_eps = 1e-07
|
multi_optim_eps = 1e-07
|
||||||
multi_optims = [(lambda: tf_optim.SGD(learning_rate=multi_optim_lr,
|
multi_optims = [(lambda: tf_optim.SGD(learning_rate=multi_optim_lr,
|
||||||
momentum=multi_optim_mom), 'sgd'),
|
momentum=multi_optim_mom), 'sgd'),
|
||||||
@ -890,10 +998,12 @@ multi_optims = [(lambda: tf_optim.SGD(learning_rate=multi_optim_lr,
|
|||||||
|
|
||||||
for i in range(multi_optim_iterations):
|
for i in range(multi_optim_iterations):
|
||||||
print(f"Iteration {i+1}/{multi_optim_iterations}")
|
print(f"Iteration {i+1}/{multi_optim_iterations}")
|
||||||
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
|
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5,
|
||||||
multi_optim_results.append(list(evaluate_optimisers(epochs=(1, 100),
|
# stratify=labels
|
||||||
hidden_nodes=16,
|
)
|
||||||
nmodels=[1, 3, 5, 7],
|
multi_optim_results.append(list(evaluate_optimisers(epochs=(50, 100),
|
||||||
|
hidden_nodes=(1, 400),
|
||||||
|
nmodels=[1, 3, 9, 15, 25],
|
||||||
optimizers=multi_optims,
|
optimizers=multi_optims,
|
||||||
weight_init=lambda: 'random_uniform',
|
weight_init=lambda: 'random_uniform',
|
||||||
batch_size=35,
|
batch_size=35,
|
||||||
@ -917,7 +1027,7 @@ for i in range(multi_optim_iterations):
|
|||||||
# 2. Individual Accuracy
|
# 2. Individual Accuracy
|
||||||
# 3. Agreement
|
# 3. Agreement
|
||||||
|
|
||||||
# In[37]:
|
# In[467]:
|
||||||
|
|
||||||
|
|
||||||
multi_optim_results_dict = dict() # indexed by optimiser name
|
multi_optim_results_dict = dict() # indexed by optimiser name
|
||||||
@ -950,10 +1060,8 @@ for optim, optim_results in multi_optim_results_dict.items():
|
|||||||
for single_test in iteration:
|
for single_test in iteration:
|
||||||
|
|
||||||
optim_models_idx = multi_optim_models.index(single_test['num_models'])
|
optim_models_idx = multi_optim_models.index(single_test['num_models'])
|
||||||
accuracy_optim_tensor[iter_idx, :, optim_models_idx] = [single_test["accuracy"],
|
accuracy_optim_tensor[iter_idx, :, optim_models_idx] = test_tensor_data(single_test)
|
||||||
np.mean([i["accuracy"][-1] for i in single_test["history"]]),
|
|
||||||
single_test["individual_accuracy"],
|
|
||||||
single_test["agreement"]]
|
|
||||||
optim_tensors[optim] = {
|
optim_tensors[optim] = {
|
||||||
"accuracy": accuracy_optim_tensor,
|
"accuracy": accuracy_optim_tensor,
|
||||||
"mean": np.mean(accuracy_optim_tensor, axis=0),
|
"mean": np.mean(accuracy_optim_tensor, axis=0),
|
||||||
@ -970,10 +1078,23 @@ print(f'Loss: {multi_optim_results[0][0][0]["loss"]}')
|
|||||||
# #### Export/Import Test Sets
|
# #### Export/Import Test Sets
|
||||||
#
|
#
|
||||||
# Export mean and standard deviations for retrieval and visualisation
|
# Export mean and standard deviations for retrieval and visualisation
|
||||||
pickle.dump(multi_optim_results, open("result.p", "wb"))multi_optim_results = pickle.load(open("results/exp3-test1.p", "rb"))
|
|
||||||
|
# In[28]:
|
||||||
|
|
||||||
|
|
||||||
|
pickle.dump(multi_optim_results, open("results/exp3-test5.p", "wb"))
|
||||||
|
|
||||||
|
|
||||||
|
# In[466]:
|
||||||
|
|
||||||
|
|
||||||
|
exp3_testname = 'exp3-test8'
|
||||||
|
multi_optim_results = pickle.load(open(f"results/{exp3_testname}.p", "rb"))
|
||||||
|
|
||||||
|
|
||||||
# ### Best Results
|
# ### Best Results
|
||||||
|
|
||||||
# In[38]:
|
# In[468]:
|
||||||
|
|
||||||
|
|
||||||
for optim, optim_results in optim_tensors.items():
|
for optim, optim_results in optim_tensors.items():
|
||||||
@ -986,10 +1107,10 @@ for optim, optim_results in optim_tensors.items():
|
|||||||
|
|
||||||
# ### Optimiser Error Rates
|
# ### Optimiser Error Rates
|
||||||
|
|
||||||
# In[40]:
|
# In[469]:
|
||||||
|
|
||||||
|
|
||||||
fig, axes = plt.subplots(1, 3, figsize=(24, 5))
|
fig, axes = plt.subplots(1, 3, figsize=(12, 3))
|
||||||
fig.set_dpi(fig_dpi)
|
fig.set_dpi(fig_dpi)
|
||||||
|
|
||||||
for idx, ((optimiser_name, tensors_dict), ax) in enumerate(zip(optim_tensors.items(), axes.flatten())):
|
for idx, ((optimiser_name, tensors_dict), ax) in enumerate(zip(optim_tensors.items(), axes.flatten())):
|
||||||
@ -1004,13 +1125,21 @@ for idx, ((optimiser_name, tensors_dict), ax) in enumerate(zip(optim_tensors.ite
|
|||||||
# ax.errorbar(multi_optim_models, 1 - tensors_dict["mean"][3, :], yerr=tensors_dict["std"][3, :], capsize=4, label='Disagreement')
|
# ax.errorbar(multi_optim_models, 1 - tensors_dict["mean"][3, :], yerr=tensors_dict["std"][3, :], capsize=4, label='Disagreement')
|
||||||
|
|
||||||
ax.set_title(f"{optimiser_name} Error Rate for Ensemble Models")
|
ax.set_title(f"{optimiser_name} Error Rate for Ensemble Models")
|
||||||
# ax.set_ylim(0, 1)
|
ax.set_ylim(0, 0.1)
|
||||||
ax.set_ylim(0, np.max([np.max(1 - i["mean"] + i["std"]) for i in optim_tensors.values()]) + 0.03)
|
# ax.set_ylim(0, np.max([np.max(1 - i["mean"] + i["std"]) for i in optim_tensors.values()]) + 0.03)
|
||||||
ax.grid()
|
ax.grid()
|
||||||
|
# if idx > 0:
|
||||||
ax.legend()
|
ax.legend()
|
||||||
ax.set_xlabel("Number of Models")
|
ax.set_xlabel("Number of Models")
|
||||||
ax.set_ylabel("Error Rate")
|
ax.set_ylabel("Error Rate")
|
||||||
|
|
||||||
|
# axes[0].set_ylim(0, 0.4)
|
||||||
|
axes[1].legend()
|
||||||
|
axes[2].legend()
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(f'graphs/{exp3_testname}-error-rate-curves.png')
|
||||||
|
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ description = ""
|
|||||||
authors = ["andy <andy@sarsoo.xyz>"]
|
authors = ["andy <andy@sarsoo.xyz>"]
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "3.8"
|
python = "~3.8"
|
||||||
tensorflow = "^2.4.1"
|
tensorflow = "^2.4.1"
|
||||||
pandas = "^1.2.4"
|
pandas = "^1.2.4"
|
||||||
matplotlib = "^3.4.1"
|
matplotlib = "^3.4.1"
|
||||||
|
@ -52,3 +52,58 @@
|
|||||||
year = {2012}
|
year = {2012}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@misc{tf.keras.optimizers.SGD,
|
||||||
|
author = {TensorFlow},
|
||||||
|
title = {tf.keras.optimizers.SGD - Documentation},
|
||||||
|
url = {https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2019}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{paperspace-mom-rmsprop-adam,
|
||||||
|
author = {Kathuria, Ayoosh},
|
||||||
|
title = {Intro to optimization in deep learning: Momentum, RMSProp and Adam},
|
||||||
|
url = {https://blog.paperspace.com/intro-to-optimization-momentum-rmsprop-adam},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2018}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{understanding-rmsprop,
|
||||||
|
author = {Bushaev, Vitaly},
|
||||||
|
month = sep,
|
||||||
|
organization = {Towards Data Science},
|
||||||
|
title = {Understanding RMSprop — faster neural network learning},
|
||||||
|
url = {https://towardsdatascience.com/understanding-rmsprop-faster-neural-network-learning-62e116fcf29a},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2018}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{rmsprop-hinton,
|
||||||
|
author = {Tieleman, T. and Hinton, G.},
|
||||||
|
howpublished = {COURSERA: Neural Networks for Machine Learning},
|
||||||
|
title = {Lecture 6.5—RmsProp: Divide the gradient by a running average of its recent magnitude},
|
||||||
|
url = {https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2012}
|
||||||
|
}
|
||||||
|
|
||||||
|
@article{adam-paper,
|
||||||
|
author = {Kingma, Diederik and Ba, Jimmy},
|
||||||
|
journal = {International Conference on Learning Representations},
|
||||||
|
month = {12},
|
||||||
|
title = {Adam: A Method for Stochastic Optimization},
|
||||||
|
url = {https://www.researchgate.net/publication/269935079_Adam_A_Method_for_Stochastic_Optimization},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2014}
|
||||||
|
}
|
||||||
|
|
||||||
|
@misc{tds-adam,
|
||||||
|
author = {Bushaev, Vitaly},
|
||||||
|
month = oct,
|
||||||
|
organization = {Towards Data Science},
|
||||||
|
title = {Adam — latest trends in deep learning optimization.},
|
||||||
|
url = {https://towardsdatascience.com/adam-latest-trends-in-deep-learning-optimization-6be9a291375c},
|
||||||
|
urldate = {2021-05-02},
|
||||||
|
year = {2018}
|
||||||
|
}
|
||||||
|
|
||||||
|