# Activation function and its derivative.
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1 - np.tanh(x)**2
def sigmoid(x):
return (1/(1+np.exp(-x)))
def sigmoid_derivative(x):
return sigmoid(x) * (1 - sigmoid(x))
# Loss function
def mse(y_true, y_pred):
return np.mean(np.power(y_pred - y_true, 2))
def mse_derivative(y_true, y_pred):
return 2 * (y_pred - y_true)/y_true.size
# define the basic layer.
class FC_Layer:
def __init__(self, input_size, output_size):
# input_size: number of input neurons
# output_size: number of output neurons
self.input_size = input_size
self.output_size = output_size
self.weights = np.random.rand(self.input_size, self.output_size)-0.5
self.bias = np.random.rand(1, self.output_size)-0.5
# return output for a given input x.
def forward_propagation(self, input_x):
self.input = input_x
return np.dot(self.input, self.weights) + self.bias
# compute dE/dW, dE/dB for a given output_error = dE/dY.
# return input_error = dE/dX.
# equation is shown in figures.
def backward_propagation(self, output_error, learning_rate):
############################################
"TODO"
input_error = np.dot(output_error, self.weights.T)
weights_gradient= np.dot(self.input.T, output_error)
bias_gradient = np.sum(output_error, axis=0, keepdims=True)
############################################
# update parameters based on Gradient Descent.
self.weights = self.weights - learning_rate * weights_gradient
self.bias = self.bias - learning_rate * bias_gradient
return input_error
# define the activation layer
class Activation_Layer:
def __init__(self, activation_function, activation_derivative):
self.activation = activation_function
self.activation_derivative = activation_derivative
# returns the activated input
def forward_propagation(self, input_data):
self.input = input_data
self.output = self.activation(self.input)
return self.output
# return the input_errorE/dX
def backward_propagation(self, output_error):
return self.activation_derivative(self.input) * output_error
class NN:
def __init__(self, input_size, output_size, hidden_layers=[64, 32, 64], activation_func=sigmoid,
activation_func_grad=sigmoid_derivative, loss_func=mse, loss_func_gradient=mse_derivative):
self.input_size = input_size
self.output_size = output_size
self.hidden_layers=hidden_layers
self.activation_func = activation_func
self.activation_func_grad = activation_func_grad
self.loss_function = loss_func
self.loss_gradient = loss_func_gradient
self.layer1 = FC_Layer(self.input_size, self.hidden_layers[0])
self.layer2 = FC_Layer(self.hidden_layers[0], self.hidden_layers[1])
self.layer3 = FC_Layer(self.hidden_layers[1], self.hidden_layers[2])
self.layer4 = FC_Layer(self.hidden_layers[2], self.output_size)
self.activation1=Activation_Layer(self.activation_func, self.activation_func_grad)
self.activation2=Activation_Layer(self.activation_func, self.activation_func_grad)
self.activation3=Activation_Layer(self.activation_func, self.activation_func_grad)
def forward(self, input_data):
#####################################################################################################
# forward progragation steps ##
"TODO"
l10Out = self.layer1.forward_propagation(input_data)
a1Out = self.activation1.forward_propagation(l10Out)
l2Out = self.layer2.forward_propagation(a1Out)
a2Out = self.activation2.forward_propagation(l2Out)
l3Out = self.layer3.forward_propagation(a2Out)
a3Out = self.activation3.forward_propagation(l3Out)
pred = self.layer4.forward_propagation(a3Out)
return pred
# train the network
def training(self, x_train, y_train, epochs, learning_rate):
# sample dimension first
samples = len(x_train)
# training loop
for i in range(epochs):
err = 0
for j in range(samples):
# forward propagation
input_data = x_train[j]
input_data = np.expand_dims(input_data, axis=0)
y_true = y_train[j]
y_true = np.expand_dims(y_true, axis=0)
y_pred = self.forward(input_data)
# compute loss (for display purpose only)
err += self.loss_function(y_true, y_pred)
################################
# Backward Propagation
"TODO"
loss_gradient = self.loss_gradient(y_true, y_pred)
l4Err = self.layer4.backward_propagation(loss_gradient, learning_rate)
activate3 = self.activation3.backward_propagation(l4Err)
l3Err = self.layer3.backward_propagation(activate3, learning_rate)
activate2 = self.activation2.backward_propagation(l3Err)
l2Err = self.layer2.backward_propagation(activate2, learning_rate)
activate1 = self.activation1.backward_propagation(l2Err)
l1Err = self.layer1.backward_propagation(activate1, learning_rate)
############################################################################################################
# calculate average error on all samples
err /= samples
print('epoch %d/%d error=%f' % (i+1, epochs, err))
data_pred = net.forward(data_x)
reconstruction_error = np.mean((data_x - data_pred)**2, axis=1)
plt.figure(figsize=(8, 4))
plt.hist(reconstruction_error, bins=100, alpha=0.6, color='g')
plt.xlabel('Error')
plt.ylabel('Samples')
plt.title('Reconstruction error after model training')
plt.show()