adding data, writing, added LR range for exp2

This commit is contained in:
andy 2021-05-03 17:48:15 +01:00
parent 176e2f0945
commit b17cf9a469
684 changed files with 2168 additions and 262 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 110 KiB

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 98 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 107 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 96 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 108 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 93 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 104 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

After

Width:  |  Height:  |  Size: 94 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 67 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 56 KiB

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

After

Width:  |  Height:  |  Size: 68 KiB

File diff suppressed because one or more lines are too long

349
nncw.py
View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
# coding: utf-8
# In[1]:
# In[2]:
import numpy as np
@ -32,7 +32,7 @@ fig_dpi = 70
#
# Read CSVs dumped from MatLab and parse into Pandas DataFrames
# In[2]:
# In[3]:
data = pd.read_csv('features.csv', header=None).T
@ -62,17 +62,19 @@ labels.astype(bool).sum(axis=0)
#
# Using a 50/50 split
# In[3]:
# In[4]:
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5
# , stratify=labels
)
# ## Generate & Retrieve Model
#
# Get a shallow model with a single hidden layer of varying nodes
# In[4]:
# In[5]:
def get_model(hidden_nodes=9, activation=lambda: 'sigmoid', weight_init=lambda: 'glorot_uniform'):
@ -86,12 +88,13 @@ def get_model(hidden_nodes=9, activation=lambda: 'sigmoid', weight_init=lambda:
# Get a Keras Tensorboard callback for dumping data for later analysis
# In[5]:
# In[6]:
def tensorboard_callback(path='tensorboard-logs', prefix=''):
return tf.keras.callbacks.TensorBoard(log_dir=os.path.normpath(os.path.join(path, prefix + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))),
histogram_freq=1)
return tf.keras.callbacks.TensorBoard(
log_dir=os.path.normpath(os.path.join(path, prefix + datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))), histogram_freq=1
)
# # Example Training
@ -144,7 +147,7 @@ model.metrics[1].result()
# (Hint2: as epochs increases you can expect the test error rate to reach a minimum and then start increasing, you may need to set the stopping criteria to achieve the desired number of epochs - Hint 3: to find classification error rates for train and test set, you need to check the code from E2, to determine how you may obtain the train and test set patterns)
#
# In[16]:
# In[194]:
# hidden_nodes = [2, 8, 16, 24, 32]
@ -156,6 +159,7 @@ def evaluate_parameters(hidden_nodes=hidden_nodes,
epochs=epochs,
batch_size=128,
optimizer=lambda: 'sgd',
weight_init=lambda: 'glorot_uniform',
loss=lambda: 'categorical_crossentropy',
metrics=['accuracy'],
callbacks=None,
@ -176,7 +180,7 @@ def evaluate_parameters(hidden_nodes=hidden_nodes,
if print_params:
print(f"Nodes: {hn}, Epochs: {e}")
model = get_model(hn)
model = get_model(hn, weight_init=weight_init)
model.compile(
optimizer=optimizer(),
loss=loss(),
@ -266,7 +270,7 @@ ax.set_title("Training vs Validation Accuracy")
ax.plot(single_history['accuracy'], label="train", lw=2)
ax.plot(single_history['val_accuracy'], label="validation", lw=2, c=(1,0,0))
ax.set_xlabel("Epochs")
ax.set_ylim(0, 1)
# ax.set_ylim(0, 1)
ax.grid()
ax.legend()
@ -296,7 +300,7 @@ ax.set_xlabel('Epochs')
ax.set_ylabel('Hidden Nodes')
ax.set_zlabel('Accuracy')
ax.view_init(30, -110)
ax.set_zlim([0, 1])
# ax.set_zlim([0, 1])
fig.colorbar(surf, shrink=0.3, aspect=6)
plt.tight_layout()
@ -348,31 +352,42 @@ plt.show()
#
# ### Pickle Results
#
# | test | learning rate | momentum | batch size | hidden nodes | epochs |
# | --- | --- | --- | --- | --- | --- |
# |1|0.01|0|128|2, 8, 12, 16, 24, 32, 64, 128, 256|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
# |2|0.5|0.1|128|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100|
# |3|1|0.3|20|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100|
# |4|0.6|0.1|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
# |5|0.05|0.01|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
# |6|1.5|0.5|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200|
# | test | learning rate | momentum | batch size | hidden nodes | epochs | statified |
# | --- | --- | --- | --- | --- | --- | --- |
# |1|0.01|0|128|2, 8, 12, 16, 24, 32, 64, 128, 256|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
# |2|0.5|0.1|128|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100| |
# |3|1|0.3|20|2, 8, 12, 16, 24, 32, 64, 128|1, 2, 4, 8, 16, 32, 64, 100| |
# |4|0.6|0.1|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
# |5|0.05|0.01|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
# |6|1.5|0.5|20|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| |
# |2-1|0.01|0|35|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| n |
# |2-2|0.1|0|35|2, 16, 32|1, 2, 4, 8, 16, 32, 64, 100| n |
# |2-3|0.15|0|35|2, 16, 32|1, 2, 4, 8, 16, 32, 64, 100| n |
# |2-4|0.08|0.9|35|1, 2, 8, 16, 32, 64|1, 2, 4, 8, 16, 32, 64, 100| n |
# |2-5|0.08|0.2|35|1, 2, 8, 16, 32, 64|1, 2, 4, 8, 16, 32, 64, 100| n |
# |2-6|0.01|0.1|35|2, 8, 16, 24, 32|1, 2, 4, 8, 16, 32, 64, 100, 150, 200| n |
# In[30]:
# In[214]:
multi_param_results = list()
multi_iterations = 30
for i in range(multi_iterations):
print(f"Iteration {i+1}/{multi_iterations}")
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5
# , stratify=labels
)
multi_param_results.append(list(evaluate_parameters(dtrain=data_train,
dtest=data_test,
ltrain=labels_train,
ltest=labels_test,
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=1.5, momentum=0.5),
hidden_nodes=[2, 16, 32],
epochs=[1, 2, 4, 8, 16, 32, 64, 100],
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.15, momentum=0.0),
weight_init=lambda: 'random_uniform',
return_model=False,
print_params=False,
batch_size=20)))
batch_size=35)))
# ### Accuracy Tensor
@ -381,7 +396,7 @@ for i in range(multi_iterations):
#
# (Iterations x [Test/Train] x Number of nodes x Number of epochs)
# In[173]:
# In[301]:
multi_param_epochs = sorted(list({i["epochs"] for i in multi_param_results[0]}))
@ -405,20 +420,32 @@ print(f'Epochs: {multi_param_epochs}')
print()
print(f'Loss: {multi_param_results[0][0]["loss"]}')
print(f'LR: {multi_param_results[0][0]["optimizer"]["learning_rate"]:.3}')
print(f'Momentum: {multi_param_results[0][0]["optimizer"]["momentum"]:.3}')
print(f'Momentum: {multi_param_results[0][0]["optimizer"]["momentum"]:}')
# #### Export/Import Test Sets
#
# Export mean and standard deviations for retrieval and visualisation
pickle.dump(multi_param_results, open("result.p", "wb"))exp1_testname = 'exp1-test5'
multi_param_results = pickle.load(open(f"results/{exp1_testname}.p", "rb"))np.savetxt("exp1-mean.csv", mean_param_accuracy, delimiter=',')
# In[215]:
pickle.dump(multi_param_results, open("results/exp1-test2-3.p", "wb"))
# In[300]:
exp1_testname = 'exp1-test1'
multi_param_results = pickle.load(open(f"results/{exp1_testname}.p", "rb"))
np.savetxt("exp1-mean.csv", mean_param_accuracy, delimiter=',')
np.savetxt("exp1-std.csv", std_param_accuracy, delimiter=',')mean_param_accuracy = np.loadtxt("results/test1-exp1-mean.csv", delimiter=',')
std_param_accuracy = np.loadtxt("results/test1-exp1-std.csv", delimiter=',')
# multi_iterations = 30
# ### Best Results
# In[141]:
# In[302]:
best_param_accuracy_idx = np.unravel_index(np.argmax(mean_param_accuracy[0, :, :]), mean_param_accuracy.shape)
@ -431,7 +458,7 @@ print(f'Nodes: {best_param_accuracy_nodes}, Epochs: {best_param_accuracy_epochs}
# ### Test Accuracy Surface
# In[174]:
# In[303]:
X, Y = np.meshgrid(multi_param_epochs, multi_param_nodes)
@ -447,7 +474,7 @@ ax.set_xlabel('Epochs')
ax.set_ylabel('Hidden Nodes')
ax.set_zlabel('Accuracy')
ax.view_init(30, -110)
ax.set_zlim([0, 1])
# ax.set_zlim([0, 1])
fig.colorbar(surf, shrink=0.3, aspect=6)
plt.tight_layout()
@ -457,15 +484,15 @@ plt.show()
# ### Test Error Rate Curves
# In[175]:
# In[313]:
# fig = plt.figure(figsize=(7, 5))
fig = plt.figure()
fig = plt.figure(figsize=(6, 5))
# fig = plt.figure()
fig.set_dpi(fig_dpi)
for idx, layer in enumerate(mean_param_accuracy[0, :, :]):
# plt.errorbar(epochs, 1- layer, yerr=std_param_accuracy[idx], label=f'{hidden_nodes[idx]} Nodes')
for idx, (layer, std) in enumerate(zip(mean_param_accuracy[0, :, :], std_param_accuracy[0, :, :])):
# plt.errorbar(multi_param_epochs, 1 - layer, yerr=std, capsize=4, label=f'{multi_param_nodes[idx]} Nodes')
plt.plot(multi_param_epochs, 1 - layer, '-', label=f'{multi_param_nodes[idx]} Nodes', lw=2)
plt.legend()
@ -473,19 +500,19 @@ plt.grid()
plt.title(f"Test error rates for different epochs and hidden nodes")
plt.xlabel("Epochs")
plt.ylabel("Error Rate")
plt.ylim(0)
plt.ylim(0, 0.6)
plt.tight_layout()
# plt.savefig(f'graphs/{exp1_testname}-error-rate-curves.png')
plt.savefig(f'graphs/{exp1_testname}-error-rate-curves.png')
plt.show()
# ### Test/Train Error Over Nodes
# In[169]:
# In[314]:
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(8, 8*math.ceil(len(multi_param_nodes) / 2)/3))
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(6, 6*math.ceil(len(multi_param_nodes) / 2)/3))
fig.set_dpi(fig_dpi)
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
@ -499,13 +526,13 @@ for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
ax.grid()
fig.tight_layout()
# fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate.png')
fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate.png')
# In[170]:
# In[315]:
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(8, 8*math.ceil(len(multi_param_nodes) / 2)/3))
fig, axes = plt.subplots(math.ceil(len(multi_param_nodes) / 2), 2, figsize=(6, 6*math.ceil(len(multi_param_nodes) / 2)/3))
fig.set_dpi(fig_dpi)
for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
@ -517,7 +544,7 @@ for idx, (nodes, ax) in enumerate(zip(multi_param_nodes, axes.flatten())):
ax.grid()
fig.tight_layout()
# fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate-std.png')
fig.savefig(f'graphs/{exp1_testname}-test-train-error-rate-std.png')
# # Experiment 2
@ -527,7 +554,7 @@ fig.tight_layout()
# (Hint4: to implement majority vote you need to determine the predicted class labels -probably easier to implement yourself rather than use the ensemble matlab functions)
#
# In[6]:
# In[113]:
num_models=[1, 3, 9, 15, 25]
@ -559,7 +586,29 @@ def evaluate_ensemble_vote(hidden_nodes=16,
if print_params:
print(f"Models: {m}")
response = {"epochs": list(),
"num_models": m}
###################
## GET MODELS
###################
if isinstance(hidden_nodes, tuple): # for range of hidden nodes, calculate value per model
if m == 1:
models = [get_model(int(np.mean(hidden_nodes)), weight_init=weight_init)]
response["nodes"] = [int(np.mean(hidden_nodes))]
else:
models = [get_model(int(i), weight_init=weight_init)
for i in np.linspace(*hidden_nodes, num=m)]
response["nodes"] = [int(i) for i in np.linspace(*hidden_nodes, num=m)]
elif hidden_nodes == 'm':
models = [get_model(i+1, weight_init=weight_init) for i in range(m)]
response["nodes"] = [i+1 for i in range(m)]
else: # not a range of epochs, just set to given value
models = [get_model(hidden_nodes, weight_init=weight_init) for _ in range(m)]
response["nodes"] = hidden_nodes
for model in models:
model.compile(
optimizer=optimizer(),
@ -567,12 +616,6 @@ def evaluate_ensemble_vote(hidden_nodes=16,
metrics=metrics
)
response = {"nodes": hidden_nodes,
"epochs": list(),
"num_models": m}
if tboard:
if callbacks is not None:
cb = [i() for i in callbacks] + [tensorboard_callback(prefix=f'exp{exp}-{m}-')]
@ -584,13 +627,13 @@ def evaluate_ensemble_vote(hidden_nodes=16,
###################
histories = list()
for idx, model in enumerate(models):
if isinstance(epochs, tuple):
if isinstance(epochs, tuple): # for range of epochs, calculate value per model
if m == 1:
e = (epochs[0] + epochs[1]) / 2 # average, not lower bound if single model
e = np.mean(epochs) # average, not lower bound if single model
else:
e = np.linspace(epochs[0], epochs[1], num=m)[idx]
e = np.linspace(*epochs, num=m)[idx]
e = int(e)
else:
else: # not a range of epochs, just set to given value
e = epochs
# print(m, e) # debug
@ -669,15 +712,19 @@ def evaluate_ensemble_vote(hidden_nodes=16,
# ## Single Iteration
# Run a single iteration of ensemble model investigations
# In[11]:
# In[224]:
single_ensem_results = list()
for test in evaluate_ensemble_vote(epochs=(5, 300), optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
# for test in evaluate_ensemble_vote(epochs=(5, 300), optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
for test in evaluate_ensemble_vote(hidden_nodes=(1, 400),
epochs=20,
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.02)):
single_ensem_results.append(test)
print(test["nodes"], test["epochs"])
# In[16]:
# In[225]:
fig = plt.figure(figsize=(8, 5))
@ -685,15 +732,15 @@ fig.set_dpi(fig_dpi)
ensem_x = [i["num_models"] for i in single_ensem_results]
plt.plot(ensem_x, [i["accuracy"] for i in single_ensem_results], 'x-', label='Ensemble Accuracy')
plt.plot(ensem_x, [i["individual_accuracy"] for i in single_ensem_results], 'x-', label='Individual Accuracy')
plt.plot(ensem_x, [i["agreement"] for i in single_ensem_results], 'x-', label='Agreement')
plt.plot(ensem_x, 1 - np.array([i["accuracy"] for i in single_ensem_results]), 'x-', label='Ensemble Test')
plt.plot(ensem_x, 1 - np.array([i["individual_accuracy"] for i in single_ensem_results]), 'x-', label='Individual Test')
plt.plot(ensem_x, 1 - np.array([i["agreement"] for i in single_ensem_results]), 'x-', label='Disagreement')
plt.title("Test Accuracy for Horizontal Model Ensembles")
plt.ylim(0, 1)
plt.title("Test Error Rates for Horizontal Model Ensembles")
plt.ylim(0)
plt.grid()
plt.legend()
plt.ylabel("Accuracy")
plt.ylabel("Error Rate")
plt.xlabel("Number of Models")
plt.show()
@ -710,24 +757,58 @@ plt.show()
#
# ### Pickle Results
#
# | test | learning rate | momentum | batch size | hidden nodes | epochs | models |
# | --- | --- | --- | --- | --- | --- | --- |
# |3|0.06|0.05|35|16|1 - 300|1, 3, 9, 15, 25|
# | test | learning rate | momentum | batch size | hidden nodes | epochs | models | stratify |
# | --- | --- | --- | --- | --- | --- | --- | --- |
# |3|0.06|0.05|35|16|1 - 300|1, 3, 9, 15, 25| |
# |4|0.06|0.05|35|1 - 50|50|1, 3, 9, 15, 25| |
# |5|0.06|0.05|35|1 - 300|50|1, 3, 9, 15, 25| |
# |6|0.001|0.01|35|1 - 400|50|1, 3, 9, 15, 25| |
# |7|0.01|0.01|35|1 - 400|30 - 150|1, 3, 9, 15, 25| |
# |8|0.03|0.01|35|1 - 400|5 - 100|1, 3, 9, 15, 25| |
# |9|0.1|0.01|35|1 - 400|20|1, 3, 9, 15, 25| |
# |10|0.15|0.01|35|1 - 400|20|1, 3, 9, 15, 25, 35, 45| |
# |11|0.15|0.01|35|1 - 400|10|1, 3, 9, 15, 25, 35, 45| |
# |12|0.02|0.01|35|m|50|1, 3, 9, 15, 25, 35, 45| |
# |13|0.01 exp 0.98, 1|0.01|35|1 - 200|50|1, 3, 9, 15, 25, 35, 45| n |
# |14|0.01|0.01|35|1 - 200|50|1, 3, 9, 15, 25, 35, 45| n |
# |15|0.01|0.9|35|50 - 100|50|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
# |16|0.01|0.1|35|50 - 100|50|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
# |17|0.1|0.1|35|50 - 100|50 - 100|1, 3, 5, 7, 9, 15, 25, 35, 45| n |
# In[24]:
# In[335]:
batch_size=35
test_size=0.5
epochs=50
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(0.01,
decay_steps=1,
decay_rate=0.98)
plt.plot(range(epochs+1), [lr_schedule(i) for i in range(epochs+1)])
plt.grid()
plt.ylim(0)
plt.xlabel('Epochs')
plt.ylabel('Learning Rate')
plt.show()
# In[357]:
multi_ensem_results = list()
multi_ensem_iterations = 2
multi_ensem_iterations = 30
for i in range(multi_ensem_iterations):
print(f"Iteration {i+1}/{multi_ensem_iterations}")
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
multi_ensem_results.append(list(evaluate_ensemble_vote(epochs=(1, 100),
hidden_nodes=16,
nmodels=[1, 3, 7, 11, 15],
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.05, momentum=0.01),
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=test_size,
# stratify=labels
)
multi_ensem_results.append(list(evaluate_ensemble_vote(epochs=(50, 100),
hidden_nodes=(50, 100),
nmodels=[1, 3, 5, 7, 9, 15, 25, 35, 45],
optimizer=lambda: tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.1),
weight_init=lambda: 'random_uniform',
batch_size=35,
batch_size=batch_size,
dtrain=data_train,
dtest=data_test,
ltrain=labels_train,
@ -748,7 +829,17 @@ for i in range(multi_ensem_iterations):
# 2. Individual Accuracy
# 3. Agreement
# In[25]:
# In[322]:
def test_tensor_data(test):
return [test["accuracy"],
np.mean([i["accuracy"][-1] for i in test["history"]]), # avg train acc
test["individual_accuracy"],
test["agreement"]]
# In[362]:
multi_ensem_models = sorted(list({i["num_models"] for i in multi_ensem_results[0]}))
@ -759,10 +850,7 @@ for iter_idx, iteration in enumerate(multi_ensem_results):
for single_test in iteration:
ensem_models_idx = multi_ensem_models.index(single_test['num_models'])
accuracy_ensem_tensor[iter_idx, :, ensem_models_idx] = [single_test["accuracy"],
np.mean([i["accuracy"][-1] for i in single_test["history"]]),
single_test["individual_accuracy"],
single_test["agreement"]]
accuracy_ensem_tensor[iter_idx, :, ensem_models_idx] = test_tensor_data(single_test)
mean_ensem_accuracy = np.mean(accuracy_ensem_tensor, axis=0)
std_ensem_accuracy = np.std(accuracy_ensem_tensor, axis=0)
@ -778,18 +866,26 @@ print(f'Momentum: {multi_ensem_results[0][0]["optimizer"]["momentum"]:.3}')
# #### Export/Import Test Sets
#
# Export mean and standard deviations for retrieval and visualisation
pickle.dump(multi_ensem_results, open("result.p", "wb"))
# In[22]:
# In[358]:
multi_ensem_results = pickle.load(open("results/exp2-test3.p", "rb"))
exp2_testname = 'exp2-test17'
pickle.dump(multi_ensem_results, open(f"results/{exp2_testname}.p", "wb"))
# In[349]:
exp2_testname = 'exp2-test16'
multi_ensem_results = pickle.load(open(f"results/{exp2_testname}.p", "rb"))
np.savetxt("exp2-mean.csv", mean_ensem_accuracy, delimiter=',')
np.savetxt("exp2-std.csv", std_ensem_accuracy, delimiter=',')mean_ensem_accuracy = np.loadtxt("results/test1-exp2-mean.csv", delimiter=',')
std_ensem_accuracy = np.loadtxt("results/test1-exp2-std.csv", delimiter=',')
# ### Best Results
# In[26]:
# In[363]:
best_ensem_accuracy_idx = np.unravel_index(np.argmax(mean_ensem_accuracy[0, :]), mean_ensem_accuracy.shape)
@ -801,10 +897,10 @@ print(f'Models: {best_ensem_accuracy_models}, {best_ensem_accuracy * 100:.3}% Ac
# ### Test/Train Error Over Model Numbers
# In[29]:
# In[364]:
fig = plt.figure(figsize=(8, 5))
fig = plt.figure(figsize=(6, 4))
fig.set_dpi(fig_dpi)
# plt.plot(multi_ensem_models, 1 - mean_ensem_accuracy[0, :], 'x-', label='Ensemble Test')
@ -818,12 +914,16 @@ plt.errorbar(multi_ensem_models, 1 - mean_ensem_accuracy[1, :], yerr=std_ensem_a
plt.errorbar(multi_ensem_models, 1 - mean_ensem_accuracy[3, :], yerr=std_ensem_accuracy[3, :], capsize=4, label='Disagreement')
plt.title(f"Error Rate for Horizontal Ensemble Models")
# plt.ylim(0, 1)
plt.ylim(0, np.max(1 - mean_ensem_accuracy + std_ensem_accuracy) + 0.05)
# plt.ylim(0, 0.2)
# plt.ylim(0, np.max(1 - mean_ensem_accuracy + std_ensem_accuracy) + 0.05)
plt.grid()
plt.legend()
plt.xlabel("Number of Models")
plt.ylabel("Error Rate")
plt.tight_layout()
plt.savefig(f'graphs/{exp2_testname}-error-rate-curves.png')
plt.show()
@ -831,7 +931,7 @@ plt.show()
#
# Repeat Exp 2) for cancer dataset with two different optimisers of your choice e.g. 'trainlm' and 'trainrp'. Comment and discuss the result and decide which is more appropriate training algorithm for the problem. In your discussion, include in your description a detailed account of how the training algorithms (optimisations) work.
# In[7]:
# In[127]:
def evaluate_optimisers(optimizers=[(lambda: 'sgd', 'sgd'),
@ -868,17 +968,25 @@ for test in evaluate_optimisers(epochs=(5, 300), nmodels=[1, 3, 5]):
#
# ### Pickle Results
#
# | test | optim1 | optim2 | optim3 | batch size | hidden nodes | epochs | models |
# | --- | --- | --- | --- | --- | --- | --- | --- |
# | test | optim1 | optim2 | optim3 | lr | momentum | epsilon | batch size | hidden nodes | epochs | models | stratified |
# | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
# | 1 | SGD | Adam | RMSprop | 0.1 | 0.0 | 1e7 | 35 | 16 | 1 - 100 | 1, 3, 9, 15, 25 | y |
# | 2 | SGD | Adam | RMSprop | 0.05 | 0.01 | 1e7 | 35 | 16 | 1 - 100 | 1, 3, 9, 15, 25 | y |
# | 3 | SGD | Adam | RMSprop | 0.1 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | y |
# | 4 | SGD | Adam | RMSprop | 0.075 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | y |
# | 5 | SGD | Adam | RMSprop | 0.05 | 0.01 | 1e7 | 35 | 1 - 400 | 20 | 1, 3, 9, 15, 25, 35, 45 | n |
# | 6 | SGD | Adam | RMSprop | 0.02 | 0.01 | 1e7 | 35 | m | 50 | 1, 3, 9, 15, 25, 35, 45 | n |
# | 7 | SGD | Adam | RMSprop | 0.1 | 0.9 | 1e-8 | 35 | 1 - 400 | 50 - 100 | 1, 3, 5, 7, 9, 15, 25 | n |
# | 8 | SGD | Adam | RMSprop | 0.05 | 0.9 | 1e-8 | 35 | 1 - 400 | 50 - 100 | 1, 3, 5, 7, 9, 15, 25 | n |
# In[36]:
# In[27]:
multi_optim_results = list()
multi_optim_iterations = 5
multi_optim_iterations = 30
multi_optim_lr = 0.01
multi_optim_mom = 0.0
multi_optim_lr = 0.05
multi_optim_mom = 0.01
multi_optim_eps = 1e-07
multi_optims = [(lambda: tf_optim.SGD(learning_rate=multi_optim_lr,
momentum=multi_optim_mom), 'sgd'),
@ -890,10 +998,12 @@ multi_optims = [(lambda: tf_optim.SGD(learning_rate=multi_optim_lr,
for i in range(multi_optim_iterations):
print(f"Iteration {i+1}/{multi_optim_iterations}")
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5, stratify=labels)
multi_optim_results.append(list(evaluate_optimisers(epochs=(1, 100),
hidden_nodes=16,
nmodels=[1, 3, 5, 7],
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.5,
# stratify=labels
)
multi_optim_results.append(list(evaluate_optimisers(epochs=(50, 100),
hidden_nodes=(1, 400),
nmodels=[1, 3, 9, 15, 25],
optimizers=multi_optims,
weight_init=lambda: 'random_uniform',
batch_size=35,
@ -917,7 +1027,7 @@ for i in range(multi_optim_iterations):
# 2. Individual Accuracy
# 3. Agreement
# In[37]:
# In[467]:
multi_optim_results_dict = dict() # indexed by optimiser name
@ -950,10 +1060,8 @@ for optim, optim_results in multi_optim_results_dict.items():
for single_test in iteration:
optim_models_idx = multi_optim_models.index(single_test['num_models'])
accuracy_optim_tensor[iter_idx, :, optim_models_idx] = [single_test["accuracy"],
np.mean([i["accuracy"][-1] for i in single_test["history"]]),
single_test["individual_accuracy"],
single_test["agreement"]]
accuracy_optim_tensor[iter_idx, :, optim_models_idx] = test_tensor_data(single_test)
optim_tensors[optim] = {
"accuracy": accuracy_optim_tensor,
"mean": np.mean(accuracy_optim_tensor, axis=0),
@ -970,10 +1078,23 @@ print(f'Loss: {multi_optim_results[0][0][0]["loss"]}')
# #### Export/Import Test Sets
#
# Export mean and standard deviations for retrieval and visualisation
pickle.dump(multi_optim_results, open("result.p", "wb"))multi_optim_results = pickle.load(open("results/exp3-test1.p", "rb"))
# In[28]:
pickle.dump(multi_optim_results, open("results/exp3-test5.p", "wb"))
# In[466]:
exp3_testname = 'exp3-test8'
multi_optim_results = pickle.load(open(f"results/{exp3_testname}.p", "rb"))
# ### Best Results
# In[38]:
# In[468]:
for optim, optim_results in optim_tensors.items():
@ -986,10 +1107,10 @@ for optim, optim_results in optim_tensors.items():
# ### Optimiser Error Rates
# In[40]:
# In[469]:
fig, axes = plt.subplots(1, 3, figsize=(24, 5))
fig, axes = plt.subplots(1, 3, figsize=(12, 3))
fig.set_dpi(fig_dpi)
for idx, ((optimiser_name, tensors_dict), ax) in enumerate(zip(optim_tensors.items(), axes.flatten())):
@ -1004,13 +1125,21 @@ for idx, ((optimiser_name, tensors_dict), ax) in enumerate(zip(optim_tensors.ite
# ax.errorbar(multi_optim_models, 1 - tensors_dict["mean"][3, :], yerr=tensors_dict["std"][3, :], capsize=4, label='Disagreement')
ax.set_title(f"{optimiser_name} Error Rate for Ensemble Models")
# ax.set_ylim(0, 1)
ax.set_ylim(0, np.max([np.max(1 - i["mean"] + i["std"]) for i in optim_tensors.values()]) + 0.03)
ax.set_ylim(0, 0.1)
# ax.set_ylim(0, np.max([np.max(1 - i["mean"] + i["std"]) for i in optim_tensors.values()]) + 0.03)
ax.grid()
# if idx > 0:
ax.legend()
ax.set_xlabel("Number of Models")
ax.set_ylabel("Error Rate")
# axes[0].set_ylim(0, 0.4)
axes[1].legend()
axes[2].legend()
plt.tight_layout()
plt.savefig(f'graphs/{exp3_testname}-error-rate-curves.png')
plt.show()

View File

@ -5,7 +5,7 @@ description = ""
authors = ["andy <andy@sarsoo.xyz>"]
[tool.poetry.dependencies]
python = "3.8"
python = "~3.8"
tensorflow = "^2.4.1"
pandas = "^1.2.4"
matplotlib = "^3.4.1"

View File

@ -52,3 +52,58 @@
year = {2012}
}
@misc{tf.keras.optimizers.SGD,
author = {TensorFlow},
title = {tf.keras.optimizers.SGD - Documentation},
url = {https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/SGD},
urldate = {2021-05-02},
year = {2019}
}
@misc{paperspace-mom-rmsprop-adam,
author = {Kathuria, Ayoosh},
title = {Intro to optimization in deep learning: Momentum, RMSProp and Adam},
url = {https://blog.paperspace.com/intro-to-optimization-momentum-rmsprop-adam},
urldate = {2021-05-02},
year = {2018}
}
@misc{understanding-rmsprop,
author = {Bushaev, Vitaly},
month = sep,
organization = {Towards Data Science},
title = {Understanding RMSprop — faster neural network learning},
url = {https://towardsdatascience.com/understanding-rmsprop-faster-neural-network-learning-62e116fcf29a},
urldate = {2021-05-02},
year = {2018}
}
@misc{rmsprop-hinton,
author = {Tieleman, T. and Hinton, G.},
howpublished = {COURSERA: Neural Networks for Machine Learning},
title = {Lecture 6.5—RmsProp: Divide the gradient by a running average of its recent magnitude},
url = {https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf},
urldate = {2021-05-02},
year = {2012}
}
@article{adam-paper,
author = {Kingma, Diederik and Ba, Jimmy},
journal = {International Conference on Learning Representations},
month = {12},
title = {Adam: A Method for Stochastic Optimization},
url = {https://www.researchgate.net/publication/269935079_Adam_A_Method_for_Stochastic_Optimization},
urldate = {2021-05-02},
year = {2014}
}
@misc{tds-adam,
author = {Bushaev, Vitaly},
month = oct,
organization = {Towards Data Science},
title = {Adam — latest trends in deep learning optimization.},
url = {https://towardsdatascience.com/adam-latest-trends-in-deep-learning-optimization-6be9a291375c},
urldate = {2021-05-02},
year = {2018}
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More