|
@@ -5,6 +5,52 @@ import (
|
|
|
mat "gonum.org/v1/gonum/mat"
|
|
|
)
|
|
|
|
|
|
+// NeuralNetwork is simple neural network implementation
|
|
|
+//
|
|
|
+// Matrix: A
|
|
|
+// Description: A is set of calculated neuron activations after sigmoid correction
|
|
|
+// Format: 0 n N
|
|
|
+// ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
|
|
|
+// ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
|
|
|
+// Where s = Sizes[n], N = len(Sizes)
|
|
|
+//
|
|
|
+// Matrix: Z
|
|
|
+// Description: Z is set of calculated raw neuron activations
|
|
|
+// Format: 0 n N
|
|
|
+// ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
|
|
|
+// ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
|
|
|
+// Where s = Sizes[n], N = len(Sizes)
|
|
|
+//
|
|
|
+// Matrix: Biases
|
|
|
+// Description: Biases is set of biases per layer except L0
|
|
|
+// Format:
|
|
|
+// ⎡b[0] ⎤
|
|
|
+// ⎢b[1] ⎥
|
|
|
+// ⎢ ... ⎥
|
|
|
+// ⎢b[i] ⎥
|
|
|
+// ⎢ ... ⎥
|
|
|
+// ⎣b[s] ⎦
|
|
|
+// Where s = Sizes[n]
|
|
|
+//
|
|
|
+// Matrix: Weights
|
|
|
+// Description: Weights is set of weights per layer except L0
|
|
|
+// Format:
|
|
|
+// ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
+// ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
+// ⎢ ... ⎥
|
|
|
+// ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
+// ⎢ ... ⎥
|
|
|
+// ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
+// Where s = Sizes[n], s' = Sizes[n-1]
|
|
|
+
|
|
|
type NeuralNetwork struct {
|
|
|
Count int
|
|
|
Sizes []int
|
|
@@ -73,15 +119,19 @@ func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
|
|
|
aSrc := nn.A[i-1]
|
|
|
aDst := nn.A[i]
|
|
|
|
|
|
- // r, c := nn.Weights[i].Dims()
|
|
|
- // fmt.Printf("r: %v,c: %v\n", r, c)
|
|
|
-
|
|
|
- // r, c = aSrc.Dims()
|
|
|
- // fmt.Printf("src r: %v,c: %v\n\n\n", r, c)
|
|
|
+ //Each iteration implements formula bellow for neuron activation values
|
|
|
+ //A[l]=σ(W[l]*A[l−1]+B[l])
|
|
|
|
|
|
+ //W[l]*A[l−1]
|
|
|
aDst.Mul(nn.Weights[i], aSrc)
|
|
|
+
|
|
|
+ //W[l]*A[l−1]+B[l]
|
|
|
aDst.Add(aDst, nn.Biases[i])
|
|
|
+
|
|
|
+ //Save raw activation value for back propagation
|
|
|
nn.Z[i] = mat.DenseCopyOf(aDst)
|
|
|
+
|
|
|
+ //σ(W[l]*A[l−1]+B[l])
|
|
|
aDst.Apply(applySigmoid, aDst)
|
|
|
}
|
|
|
}
|
|
@@ -91,44 +141,106 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
|
|
|
|
lastLayerNum := nn.Count - 1
|
|
|
|
|
|
- //Initial error
|
|
|
+ //To calculate new values of weights and biases
|
|
|
+ //following formulas are used:
|
|
|
+ //W[l] = A[l−1]*δ[l]
|
|
|
+ //B[l] = δ[l]
|
|
|
+
|
|
|
+ //For last layer δ value is calculated by following:
|
|
|
+ //δ = (A[L]−y)⊙σ'(Z[L])
|
|
|
+
|
|
|
+ //Calculate initial error for last layer L
|
|
|
+ //error = A[L]-y
|
|
|
+ //Where y is expected activations set
|
|
|
err := &mat.Dense{}
|
|
|
err.Sub(nn.result(), aOut)
|
|
|
|
|
|
+ //Calculate sigmoids prime σ'(Z[L]) for last layer L
|
|
|
sigmoidsPrime := &mat.Dense{}
|
|
|
sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
|
|
|
|
|
|
+ //(A[L]−y)⊙σ'(Z[L])
|
|
|
delta := &mat.Dense{}
|
|
|
delta.MulElem(err, sigmoidsPrime)
|
|
|
|
|
|
+ //B[L] = δ[L]
|
|
|
biases := mat.DenseCopyOf(delta)
|
|
|
|
|
|
+ //W[L] = A[L−1]*δ[L]
|
|
|
weights := &mat.Dense{}
|
|
|
weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
|
|
|
+ //Initialize new weights and biases values with last layer values
|
|
|
newBiases := []*mat.Dense{makeBackGradien(biases, nn.Biases[lastLayerNum], nn.alpha)}
|
|
|
newWeights := []*mat.Dense{makeBackGradien(weights, nn.Weights[lastLayerNum], nn.alpha)}
|
|
|
|
|
|
+ //Save calculated delta value temporary error variable
|
|
|
err = delta
|
|
|
- for i := nn.Count - 2; i > 0; i-- {
|
|
|
+
|
|
|
+ //Next layer Weights and Biases are calculated using same formulas:
|
|
|
+ //W[l] = A[l−1]*δ[l]
|
|
|
+ //B[l] = δ[l]
|
|
|
+
|
|
|
+ //But δ[l] is calculated using different formula:
|
|
|
+ //δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
+ //Where Wt[l+1] is transponded matrix of actual Weights from
|
|
|
+ //forward step
|
|
|
+ for l := nn.Count - 2; l > 0; l-- {
|
|
|
+ //Calculate sigmoids prime σ'(Z[l]) for last layer l
|
|
|
sigmoidsPrime := &mat.Dense{}
|
|
|
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[i])
|
|
|
+ sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
|
|
|
|
|
|
+ //(Wt[l+1])*δ[l+1]
|
|
|
+ //err bellow is delta from previous step(l+1)
|
|
|
delta := &mat.Dense{}
|
|
|
wdelta := &mat.Dense{}
|
|
|
- wdelta.Mul(nn.Weights[i+1].T(), err)
|
|
|
+ wdelta.Mul(nn.Weights[l+1].T(), err)
|
|
|
|
|
|
+ //Calculate new delta and store it to temporary variable err
|
|
|
+ //δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
delta.MulElem(wdelta, sigmoidsPrime)
|
|
|
err = delta
|
|
|
|
|
|
+ //B[l] = δ[l]
|
|
|
biases := mat.DenseCopyOf(delta)
|
|
|
|
|
|
+ //W[l] = A[l−1]*δ[l]
|
|
|
+ //At this point it's required to give explanation for inaccuracy
|
|
|
+ //in the formula
|
|
|
+
|
|
|
+ //Multiplying of activations matrix for layer l-1 and δ[l] is imposible
|
|
|
+ //because view of matrices are following:
|
|
|
+ // A[l-1] δ[l]
|
|
|
+ // ⎡A[0] ⎤ ⎡δ[0] ⎤
|
|
|
+ // ⎢A[1] ⎥ ⎢δ[1] ⎥
|
|
|
+ // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
+ // ⎢A[i] ⎥ X ⎢δ[i] ⎥
|
|
|
+ // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
+ // ⎣A[s'] ⎦ ⎣δ[s] ⎦
|
|
|
+ //So we need to modify these matrices to apply mutiplications and got Weights matrix
|
|
|
+ //of following view:
|
|
|
+ // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
+ // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
+ //So we substitude matrices and transposes A[l-1] to get valid multiplication
|
|
|
+ //if following view:
|
|
|
+ // δ[l] A[l-1]
|
|
|
+ // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
|
|
|
+ // ⎢δ[1] ⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎢δ[i] ⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎣δ[s] ⎦
|
|
|
weights := &mat.Dense{}
|
|
|
- weights.Mul(delta, nn.A[i-1].T())
|
|
|
+ weights.Mul(delta, nn.A[l-1].T())
|
|
|
|
|
|
+ //!Prepend! new Biases and Weights
|
|
|
// Scale down
|
|
|
- newBiases = append([]*mat.Dense{makeBackGradien(biases, nn.Biases[i], nn.alpha)}, newBiases...)
|
|
|
- newWeights = append([]*mat.Dense{makeBackGradien(weights, nn.Weights[i], nn.alpha)}, newWeights...)
|
|
|
+ newBiases = append([]*mat.Dense{makeBackGradien(biases, nn.Biases[l], nn.alpha)}, newBiases...)
|
|
|
+ newWeights = append([]*mat.Dense{makeBackGradien(weights, nn.Weights[l], nn.alpha)}, newWeights...)
|
|
|
}
|
|
|
|
|
|
newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|