|
@@ -0,0 +1,345 @@
|
|
|
+/*
|
|
|
+ * MIT License
|
|
|
+ *
|
|
|
+ * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
|
|
|
+ *
|
|
|
+ * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
|
|
|
+ *
|
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
|
+ * software and associated documentation files (the "Software"), to deal in the Software
|
|
|
+ * without restriction, including without limitation the rights to use, copy, modify,
|
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
|
|
+ * to permit persons to whom the Software is furnished to do so, subject to the following
|
|
|
+ * conditions:
|
|
|
+ *
|
|
|
+ * The above copyright notice and this permission notice shall be included in all copies
|
|
|
+ * or substantial portions of the Software.
|
|
|
+ *
|
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
|
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
|
+ * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
|
|
|
+ * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
|
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
|
+ */
|
|
|
+
|
|
|
+package neuralnetworkbase
|
|
|
+
|
|
|
+import (
|
|
|
+ "errors"
|
|
|
+ "fmt"
|
|
|
+ "io"
|
|
|
+
|
|
|
+ teach "../teach"
|
|
|
+ mat "gonum.org/v1/gonum/mat"
|
|
|
+)
|
|
|
+
|
|
|
+// NeuralNetwork is simple neural network implementation
|
|
|
+//
|
|
|
+// Resources:
|
|
|
+// http://neuralnetworksanddeeplearning.com
|
|
|
+// https://www.youtube.com/watch?v=fNk_zzaMoSs
|
|
|
+//
|
|
|
+// Matrix: A
|
|
|
+// Description: A is set of calculated neuron activations after sigmoid correction
|
|
|
+// Format: 0 l L
|
|
|
+// ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
|
|
|
+// ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
|
|
|
+// Where s = Sizes[l] - Neural network layer size
|
|
|
+// L = len(Sizes) - Number of neural network layers
|
|
|
+//
|
|
|
+// Matrix: Z
|
|
|
+// Description: Z is set of calculated raw neuron activations
|
|
|
+// Format: 0 l L
|
|
|
+// ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
|
|
|
+// ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
|
|
|
+// Where s = Sizes[l] - Neural network layer size
|
|
|
+// L = len(Sizes) - Number of neural network layers
|
|
|
+//
|
|
|
+// Matrix: Biases
|
|
|
+// Description: Biases is set of biases per layer except l0
|
|
|
+// NOTE: l0 is always empty Dense because first layer
|
|
|
+// doesn't have connections to previous layer
|
|
|
+// Format: 1 l L
|
|
|
+// ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
|
|
|
+// ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
|
|
|
+// Where s = Sizes[l] - Neural network layer size
|
|
|
+// L = len(Sizes) - Number of neural network layers
|
|
|
+//
|
|
|
+// Matrix: Weights
|
|
|
+// Description: Weights is set of weights per layer except l0
|
|
|
+// NOTE: l0 is always empty Dense because first layer
|
|
|
+// doesn't have connections to previous layer
|
|
|
+// Format: 1 l L
|
|
|
+// ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
+// ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
+// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
+// ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
+// Where s = Sizes[l] - Neural network layer size
|
|
|
+// s' = Sizes[l-1] - Previous neural network layer size
|
|
|
+// L = len(Sizes) - Number of neural network layers
|
|
|
+
|
|
|
+type RProp struct {
|
|
|
+ Count int
|
|
|
+ Sizes []int
|
|
|
+ Biases []*mat.Dense
|
|
|
+ Weights []*mat.Dense
|
|
|
+ A []*mat.Dense
|
|
|
+ Z []*mat.Dense
|
|
|
+ alpha float64
|
|
|
+ trainingCycles int
|
|
|
+}
|
|
|
+
|
|
|
+func NewRProp(sizes []int, nu float64, trainingCycles int) (nn *RProp, err error) {
|
|
|
+ err = nil
|
|
|
+ if len(sizes) < 3 {
|
|
|
+ fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
|
+ return nil, errors.New("Invalid network configuration: %v\n")
|
|
|
+ }
|
|
|
+
|
|
|
+ for i := 0; i < len(sizes); i++ {
|
|
|
+ if sizes[i] < 2 {
|
|
|
+ fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
|
+ return nil, errors.New("Invalid network configuration: %v\n")
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if nu <= 0.0 || nu > 1.0 {
|
|
|
+ fmt.Printf("Invalid η value: %v\n", nu)
|
|
|
+ return nil, errors.New("Invalid η value: %v\n")
|
|
|
+ }
|
|
|
+
|
|
|
+ if trainingCycles <= 0 {
|
|
|
+ fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
|
|
|
+ return nil, errors.New("Invalid training cycles number: %v\n")
|
|
|
+ }
|
|
|
+
|
|
|
+ if trainingCycles < 100 {
|
|
|
+ fmt.Println("Training cycles number probably is too small")
|
|
|
+ }
|
|
|
+
|
|
|
+ nn = &RProp{}
|
|
|
+ nn.Sizes = sizes
|
|
|
+ nn.Count = len(sizes)
|
|
|
+ nn.Weights = make([]*mat.Dense, nn.Count)
|
|
|
+ nn.Biases = make([]*mat.Dense, nn.Count)
|
|
|
+ nn.A = make([]*mat.Dense, nn.Count)
|
|
|
+ nn.Z = make([]*mat.Dense, nn.Count)
|
|
|
+ nn.alpha = nu / float64(nn.Sizes[0])
|
|
|
+ nn.trainingCycles = trainingCycles
|
|
|
+
|
|
|
+ for i := 1; i < nn.Count; i++ {
|
|
|
+ nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
|
|
|
+ nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) Copy() (out *RProp) {
|
|
|
+ out = &RProp{}
|
|
|
+ out.Sizes = nn.Sizes
|
|
|
+ out.Count = nn.Count
|
|
|
+ out.Weights = make([]*mat.Dense, nn.Count)
|
|
|
+ out.Biases = make([]*mat.Dense, nn.Count)
|
|
|
+ out.A = make([]*mat.Dense, nn.Count)
|
|
|
+ out.Z = make([]*mat.Dense, nn.Count)
|
|
|
+ out.alpha = nn.alpha
|
|
|
+ out.trainingCycles = nn.trainingCycles
|
|
|
+
|
|
|
+ for i := 1; i < out.Count; i++ {
|
|
|
+ nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
|
|
|
+ nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
+ r, _ := aIn.Dims()
|
|
|
+ if r != nn.Sizes[0] {
|
|
|
+ fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
|
|
|
+ return -1, 0.0
|
|
|
+ }
|
|
|
+
|
|
|
+ nn.forward(aIn)
|
|
|
+ result := nn.result()
|
|
|
+ r, _ = result.Dims()
|
|
|
+ max = 0.0
|
|
|
+ maxIndex = 0
|
|
|
+ for i := 0; i < r; i++ {
|
|
|
+ if result.At(i, 0) > max {
|
|
|
+ max = result.At(i, 0)
|
|
|
+ maxIndex = i
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) Teach(teacher teach.Teacher) {
|
|
|
+ for i := 0; i < nn.trainingCycles; i++ {
|
|
|
+ for teacher.NextData() {
|
|
|
+ nn.backward(teacher.GetData())
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) SaveState(writer io.Writer) {
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) LoadState(reader io.Reader) {
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) forward(aIn mat.Matrix) {
|
|
|
+ nn.A[0] = mat.DenseCopyOf(aIn)
|
|
|
+
|
|
|
+ for i := 1; i < nn.Count; i++ {
|
|
|
+ nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
|
|
|
+ aSrc := nn.A[i-1]
|
|
|
+ aDst := nn.A[i]
|
|
|
+
|
|
|
+ // Each iteration implements formula bellow for neuron activation values
|
|
|
+ // A[l]=σ(W[l]*A[l−1]+B[l])
|
|
|
+
|
|
|
+ // W[l]*A[l−1]
|
|
|
+ aDst.Mul(nn.Weights[i], aSrc)
|
|
|
+
|
|
|
+ // W[l]*A[l−1]+B[l]
|
|
|
+ aDst.Add(aDst, nn.Biases[i])
|
|
|
+
|
|
|
+ // Save raw activation value for back propagation
|
|
|
+ nn.Z[i] = mat.DenseCopyOf(aDst)
|
|
|
+
|
|
|
+ // σ(W[l]*A[l−1]+B[l])
|
|
|
+ aDst.Apply(applySigmoid, aDst)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) backward(aIn, aOut mat.Matrix) {
|
|
|
+ nn.forward(aIn)
|
|
|
+
|
|
|
+ lastLayerNum := nn.Count - 1
|
|
|
+
|
|
|
+ // To calculate new values of weights and biases
|
|
|
+ // following formulas are used:
|
|
|
+ // W[l] = A[l−1]*δ[l]
|
|
|
+ // B[l] = δ[l]
|
|
|
+
|
|
|
+ // For last layer δ value is calculated by following:
|
|
|
+ // δ = (A[L]−y)⊙σ'(Z[L])
|
|
|
+
|
|
|
+ // Calculate initial error for last layer L
|
|
|
+ // error = A[L]-y
|
|
|
+ // Where y is expected activations set
|
|
|
+ err := &mat.Dense{}
|
|
|
+ err.Sub(nn.result(), aOut)
|
|
|
+
|
|
|
+ // Calculate sigmoids prime σ'(Z[L]) for last layer L
|
|
|
+ sigmoidsPrime := &mat.Dense{}
|
|
|
+ sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
|
|
|
+
|
|
|
+ // (A[L]−y)⊙σ'(Z[L])
|
|
|
+ delta := &mat.Dense{}
|
|
|
+ delta.MulElem(err, sigmoidsPrime)
|
|
|
+
|
|
|
+ // B[L] = δ[L]
|
|
|
+ biases := mat.DenseCopyOf(delta)
|
|
|
+
|
|
|
+ // W[L] = A[L−1]*δ[L]
|
|
|
+ weights := &mat.Dense{}
|
|
|
+ weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
+
|
|
|
+ // Initialize new weights and biases values with last layer values
|
|
|
+ newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
|
|
|
+ newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
|
|
|
+
|
|
|
+ // Save calculated delta value temporary error variable
|
|
|
+ err = delta
|
|
|
+
|
|
|
+ // Next layer Weights and Biases are calculated using same formulas:
|
|
|
+ // W[l] = A[l−1]*δ[l]
|
|
|
+ // B[l] = δ[l]
|
|
|
+
|
|
|
+ // But δ[l] is calculated using different formula:
|
|
|
+ // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
+ // Where Wt[l+1] is transposed matrix of actual Weights from
|
|
|
+ // forward step
|
|
|
+ for l := nn.Count - 2; l > 0; l-- {
|
|
|
+ // Calculate sigmoids prime σ'(Z[l]) for last layer l
|
|
|
+ sigmoidsPrime := &mat.Dense{}
|
|
|
+ sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
|
|
|
+
|
|
|
+ // (Wt[l+1])*δ[l+1]
|
|
|
+ // err bellow is delta from previous step(l+1)
|
|
|
+ delta := &mat.Dense{}
|
|
|
+ wdelta := &mat.Dense{}
|
|
|
+ wdelta.Mul(nn.Weights[l+1].T(), err)
|
|
|
+
|
|
|
+ // Calculate new delta and store it to temporary variable err
|
|
|
+ // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
+ delta.MulElem(wdelta, sigmoidsPrime)
|
|
|
+ err = delta
|
|
|
+
|
|
|
+ // B[l] = δ[l]
|
|
|
+ biases := mat.DenseCopyOf(delta)
|
|
|
+
|
|
|
+ // W[l] = A[l−1]*δ[l]
|
|
|
+ // At this point it's required to give explanation for inaccuracy
|
|
|
+ // in the formula
|
|
|
+
|
|
|
+ // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
|
|
|
+ // because view of matrices are following:
|
|
|
+ // A[l-1] δ[l]
|
|
|
+ // ⎡A[0] ⎤ ⎡δ[0] ⎤
|
|
|
+ // ⎢A[1] ⎥ ⎢δ[1] ⎥
|
|
|
+ // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
+ // ⎢A[i] ⎥ X ⎢δ[i] ⎥
|
|
|
+ // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
+ // ⎣A[s'] ⎦ ⎣δ[s] ⎦
|
|
|
+ // So we need to modify these matrices to apply mutiplications and got
|
|
|
+ // Weights matrix of following view:
|
|
|
+ // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
+ // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
+ // So we swap matrices and transpose A[l-1] to get valid multiplication
|
|
|
+ // of following view:
|
|
|
+ // δ[l] A[l-1]
|
|
|
+ // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
|
|
|
+ // ⎢δ[1] ⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎢δ[i] ⎥
|
|
|
+ // ⎢ ... ⎥
|
|
|
+ // ⎣δ[s] ⎦
|
|
|
+ weights := &mat.Dense{}
|
|
|
+ weights.Mul(delta, nn.A[l-1].T())
|
|
|
+
|
|
|
+ // !Prepend! new Biases and Weights
|
|
|
+ newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
|
|
|
+ newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
|
|
|
+ }
|
|
|
+
|
|
|
+ newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|
|
|
+ newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
|
|
|
+
|
|
|
+ nn.Biases = newBiases
|
|
|
+ nn.Weights = newWeights
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *RProp) result() *mat.Dense {
|
|
|
+ return nn.A[nn.Count-1]
|
|
|
+}
|