-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathfullyConnected.py
More file actions
173 lines (127 loc) · 5.62 KB
/
fullyConnected.py
File metadata and controls
173 lines (127 loc) · 5.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
"""
FullyConnected represents a deep fullyConnected artificial network (dff). At
initialisation a list where each elements represents the number of neurons
in each layer. With the evaluate function a given input can be evaluated and
with train the network can be trained
"""
import numpy as np
from activationFunction import ActivationFunction
class FullyConnected():
"""
FullyConnected represents a deep fullyConnected artificial network
(dff). At initialisation a list where each elements represents the
number of neurons in each layer. With the evaluate function a given
input can be evaluated and with train the network can be trained
"""
shape = None
size = None
weights = None
activation = None
def __init__(self, shape, activation=ActivationFunction.tanh):
"""
Initiate network with a shape list, where each element represents
the number of fully connected nodes in a specific layer.
For each link weights are created at random.
"""
self.shape = np.array(shape, ndmin=2)
self.size = len(shape)
self.activation = activation
self.weights = [np.random.normal(0, 1, size=(y, x)) for x, y in zip(shape[:-1], shape[1:])]
def evaluate(self, inputVector, getLayerValues=False):
"""
Takes a vector with shape (1, n) as a input, transpose it to shape
(n, 1) evaluates it in the neural network and returns eighter the
ouput layer vector or a tuple containing the output vector of shape
(n, 1) and a list containing all layer's node values (as vectors,
used in the training method) depending whether getLayerValues is
true or false (Default is false)
"""
layerVector = inputVector.reshape(len(inputVector.flatten()), 1)
networkLayerValues = [inputVector]
# For all layers n (except in input layer) sum up the weights commecting layer n and n-1 times ouput of layer n-1 and pass the output as the new input to the next layer
for layerWeights in self.weights:
# Sum up all the inputs * weights for all node in layer n
summed = np.dot(layerWeights, layerVector)
# Apply the activation function to the summed input in order to get the output of layer n
if self.activation == ActivationFunction.sigmoid:
layerVector = self.sigmoid(summed)
elif self.activation == ActivationFunction.tanh:
layerVector = self.tanh(summed)
else:
print("Error: Activationfunction not found")
return None
networkLayerValues.append(layerVector)
# Return eighter a tuple of just the output vector
if getLayerValues:
return (layerVector, networkLayerValues)
return layerVector
def train(self, inputs, labels, learningRate=0.5):
"""
inputs is the inputlayer vector, where labels is a vector holding
the associated labels. A deltaError is calculated and the weights
are updated. An optional learningRate can be given (Default is 0.5)
"""
# Bring vectors to the right shape: (n, 1)
inputs = inputs.reshape(len(inputs), 1)
labels = labels.reshape(len(np.ravel(labels)), 1)
# Get the outputs tuple of the network
networkOutputs = self.evaluate(inputs, getLayerValues=True)
networkErrors = self.backpropagate(networkOutputs[0], labels)
# Iterate over the network, calculate deltaError and update the weights
for index in range(self.size - 1):
errorL0 = networkErrors[len(networkErrors) - 1 - index]
outputL0 = networkOutputs[1][len(networkOutputs[1]) - 1 - index]
outputL1 = networkOutputs[1][len(networkOutputs[1]) - 2 - index]
# dE Formula: np.dot(-errorL0 * outputL0 * (1.0 - outputL0), outputL1.T))
# Update weights
# same as self.weights[-index] which doesn't work for some reason
if self.activation == ActivationFunction.sigmoid:
deltaWeight = learningRate * np.dot(-errorL0 * outputL0 * (1.0 - outputL0), outputL1.T)
elif self.activation == ActivationFunction.tanh:
deltaWeight = learningRate * np.dot(-errorL0 * (1.0 - np.square(outputL0)), outputL1.T) # tanh
else:
print("Error: Activationfunction not found")
return
self.weights[len(self.weights) - 1 - index] -= deltaWeight
# Return cost
return (networkOutputs[0] - labels)**2
# todo Check if performance improved with the new weights. If so save the new weights, if not restore the old ones
def backpropagate(self, outputs, labels):
"""
Takes the outputs vector and labels vector of the output,
computes the error of the output and backpropagages it, returning a
list showing the error of each node
"""
# Calculate error at the output layer
error = np.array(labels - outputs)
# List containing arrays of the errors of all nodes from inputLayer to the outputLayer
errorVector = [error]
# Start at the ouput player and go backwards to the input layer
for layerWeights in reversed(self.weights):
# Get layernodes error
error = np.dot(layerWeights.T, error)
# Prepend (since we start at the outputlayer and move to the input layer) error to the errorVector
errorVector.insert(0, error)
return np.array(errorVector)
def sigmoid(self, z):
"""
Applies the sigmoid function elementwise to the vector z with shape
(1, n) or (n, 1) and return a vector of the same shape
"""
# Cast to float128 else we get an overflow error
z = z.astype(np.float128)
return 1.0 / (1.0 + np.exp(-z))
def tanh(self, z):
"""
Applies the tanh function elementwise to the vector z with shape
(1, n) or (n, 1) and return a vector of the same shape
"""
# Cast to float128 else we get an overflow error
z = z.astype(np.float128)
return np.tanh(z)
# ez = np.exp(z)
# enz = np.exp(-z)
# a = ez - enz
# b = ez + enz
# return np.divide(a, b, out=np.zeros_like(a), where=b!=0)
# return (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))