# Activation function and its derivative.
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1 - np.tanh(x)**2
def sigmoid(x):
return (1/(1+np.exp(-x)))
def sigmoid_derivative(x):
return sigmoid(x) * (1 - sigmoid(x))
# Loss function
def mse(y_true, y_pred):
return np.mean(np.power(y_pred - y_true, 2))
def mse_derivative(y_true, y_pred):
return 2 * (y_pred - y_true)/y_true.size
# define the basic layer.
class FC_Layer:
def __init__(self, input_size, output_size):
# input_size: number of input neurons
# output_size: number of output neurons
self.input_size = input_size
self.output_size = output_size
self.weights = np.random.rand(self.input_size, self.output_size)-0.5
self.bias = np.random.rand(1, self.output_size)-0.5
# return output for a given input x.
def forward_propagation(self, input_x):
self.input = input_x
return np.dot(self.input, self.weights) + self.bias
# compute dE/dW, dE/dB for a given output_error = dE/dY.
# return input_error = dE/dX.
# equation is shown in figures.
def backward_propagation(self, output_error, learning_rate):
#############################################################
"TODO"
input_error =
weights_gradient =
bias_gradient =
return input_error
#############################################################
# update parameters based on Gradient Descent.
self.weights = self.weights - learning_rate * weights_gradient
self.bias = self.bias - learning_rate * bias_gradient
return input_error
# define the activation layer
class Activation_Layer:
def __init__(self, activation_function, activation_derivative):
self.activation = activation_function
self.activation_derivative = activation_derivative
# returns the activated input
def forward_propagation(self, input_data):
self.input = input_data
self.output = self.activation(self.input)
return self.output
# return the input_errorE/dX
def backward_propagation(self, output_error):
return self.activation_derivative(self.input) * output_error
# return the input_errorE/dX
class NN:
def __init__(self, input_size, output_size, hidden_layers=[64, 32, 64], activation_func=sigmoid, activation_func_grad=sigmoid_derivative, loss_func=mse, loss_func_gradient=mse_derivative):
self.input_size = input_size
self.output_size = output_size
self.hidden_layers=hidden_layers
self.activation_func = activation_func
self.activation_func_grad=activation_func_grad
self.loss_func = loss_func
self.loss_func_grad = loss_func_gradient
self.loss_function = loss_func
self.loss_gradient=loss_func_gradient
self.layer1 = FC_Layer(self.input_size, self.hidden_layers[0])
self.layer2 = FC_Layer(self.hidden_layers[0], self.hidden_layers[1])
self.layer3 = FC_Layer(self.hidden_layers[1], self.hidden_layers[2])
self.layer4 = FC_Layer(self.hidden_layers[2], self.output_size)
self.activation1=Activation_Layer(self.activation_func, self.activation_func_grad)
self.activation2=Activation_Layer(self.activation_func, self.activation_func_grad)
self.activation3=Activation_Layer(self.activation_func, self.activation_func_grad)
def forward(self, input_data):
############################################################
# forward progagation steps ##
"TODO"
pred =
return pred
# train the network
def training(self, x_train, y_train, epochs, learning_rate):
# sample dimension first
samples = len(x_train)
# training loop
for i in range(epochs):
err = 0
for j in range(samples):
# forward propagation
input_data = x_train[i][j]
input_data = np.expand_dims(input_data, axis=0)
y_true = y_train[i]
y_true = np.expand_dims(y_true, axis=0)
y_pred = self.forward(input_data)
# compute loss (for display purpose only)
err += self.loss_function(y_true, y_pred)
####################################################################################################
# calculate average error on all samples
err /= samples
print('epoch %d/%d error=%f' % (i+1, epochs, err))