|
@@ -1,345 +0,0 @@
|
|
|
-/*
|
|
|
- * MIT License
|
|
|
- *
|
|
|
- * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
|
|
|
- *
|
|
|
- * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
|
|
|
- *
|
|
|
- * Permission is hereby granted, free of charge, to any person obtaining a copy of this
|
|
|
- * software and associated documentation files (the "Software"), to deal in the Software
|
|
|
- * without restriction, including without limitation the rights to use, copy, modify,
|
|
|
- * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
|
|
|
- * to permit persons to whom the Software is furnished to do so, subject to the following
|
|
|
- * conditions:
|
|
|
- *
|
|
|
- * The above copyright notice and this permission notice shall be included in all copies
|
|
|
- * or substantial portions of the Software.
|
|
|
- *
|
|
|
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
|
- * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
|
|
- * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
|
|
|
- * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
|
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
- * DEALINGS IN THE SOFTWARE.
|
|
|
- */
|
|
|
-
|
|
|
-package neuralnetworkbase
|
|
|
-
|
|
|
-import (
|
|
|
- "errors"
|
|
|
- "fmt"
|
|
|
- "io"
|
|
|
-
|
|
|
- teach "../teach"
|
|
|
- mat "gonum.org/v1/gonum/mat"
|
|
|
-)
|
|
|
-
|
|
|
-// NeuralNetwork is simple neural network implementation
|
|
|
-//
|
|
|
-// Resources:
|
|
|
-// http://neuralnetworksanddeeplearning.com
|
|
|
-// https://www.youtube.com/watch?v=fNk_zzaMoSs
|
|
|
-//
|
|
|
-// Matrix: A
|
|
|
-// Description: A is set of calculated neuron activations after sigmoid correction
|
|
|
-// Format: 0 l L
|
|
|
-// ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
|
|
|
-// ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
|
|
|
-// Where s = Sizes[l] - Neural network layer size
|
|
|
-// L = len(Sizes) - Number of neural network layers
|
|
|
-//
|
|
|
-// Matrix: Z
|
|
|
-// Description: Z is set of calculated raw neuron activations
|
|
|
-// Format: 0 l L
|
|
|
-// ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
|
|
|
-// ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
|
|
|
-// Where s = Sizes[l] - Neural network layer size
|
|
|
-// L = len(Sizes) - Number of neural network layers
|
|
|
-//
|
|
|
-// Matrix: Biases
|
|
|
-// Description: Biases is set of biases per layer except l0
|
|
|
-// NOTE: l0 is always empty Dense because first layer
|
|
|
-// doesn't have connections to previous layer
|
|
|
-// Format: 1 l L
|
|
|
-// ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
|
|
|
-// ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
|
|
|
-// Where s = Sizes[l] - Neural network layer size
|
|
|
-// L = len(Sizes) - Number of neural network layers
|
|
|
-//
|
|
|
-// Matrix: Weights
|
|
|
-// Description: Weights is set of weights per layer except l0
|
|
|
-// NOTE: l0 is always empty Dense because first layer
|
|
|
-// doesn't have connections to previous layer
|
|
|
-// Format: 1 l L
|
|
|
-// ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
-// ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
-// ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
|
|
|
-// ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
-// Where s = Sizes[l] - Neural network layer size
|
|
|
-// s' = Sizes[l-1] - Previous neural network layer size
|
|
|
-// L = len(Sizes) - Number of neural network layers
|
|
|
-
|
|
|
-type RProp struct {
|
|
|
- Count int
|
|
|
- Sizes []int
|
|
|
- Biases []*mat.Dense
|
|
|
- Weights []*mat.Dense
|
|
|
- A []*mat.Dense
|
|
|
- Z []*mat.Dense
|
|
|
- alpha float64
|
|
|
- trainingCycles int
|
|
|
-}
|
|
|
-
|
|
|
-func NewRProp(sizes []int, nu float64, trainingCycles int) (nn *RProp, err error) {
|
|
|
- err = nil
|
|
|
- if len(sizes) < 3 {
|
|
|
- fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
|
- return nil, errors.New("Invalid network configuration: %v\n")
|
|
|
- }
|
|
|
-
|
|
|
- for i := 0; i < len(sizes); i++ {
|
|
|
- if sizes[i] < 2 {
|
|
|
- fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
|
- return nil, errors.New("Invalid network configuration: %v\n")
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if nu <= 0.0 || nu > 1.0 {
|
|
|
- fmt.Printf("Invalid η value: %v\n", nu)
|
|
|
- return nil, errors.New("Invalid η value: %v\n")
|
|
|
- }
|
|
|
-
|
|
|
- if trainingCycles <= 0 {
|
|
|
- fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
|
|
|
- return nil, errors.New("Invalid training cycles number: %v\n")
|
|
|
- }
|
|
|
-
|
|
|
- if trainingCycles < 100 {
|
|
|
- fmt.Println("Training cycles number probably is too small")
|
|
|
- }
|
|
|
-
|
|
|
- nn = &RProp{}
|
|
|
- nn.Sizes = sizes
|
|
|
- nn.Count = len(sizes)
|
|
|
- nn.Weights = make([]*mat.Dense, nn.Count)
|
|
|
- nn.Biases = make([]*mat.Dense, nn.Count)
|
|
|
- nn.A = make([]*mat.Dense, nn.Count)
|
|
|
- nn.Z = make([]*mat.Dense, nn.Count)
|
|
|
- nn.alpha = nu / float64(nn.Sizes[0])
|
|
|
- nn.trainingCycles = trainingCycles
|
|
|
-
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
- nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
|
|
|
- nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) Copy() (out *RProp) {
|
|
|
- out = &RProp{}
|
|
|
- out.Sizes = nn.Sizes
|
|
|
- out.Count = nn.Count
|
|
|
- out.Weights = make([]*mat.Dense, nn.Count)
|
|
|
- out.Biases = make([]*mat.Dense, nn.Count)
|
|
|
- out.A = make([]*mat.Dense, nn.Count)
|
|
|
- out.Z = make([]*mat.Dense, nn.Count)
|
|
|
- out.alpha = nn.alpha
|
|
|
- out.trainingCycles = nn.trainingCycles
|
|
|
-
|
|
|
- for i := 1; i < out.Count; i++ {
|
|
|
- nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
|
|
|
- nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
- r, _ := aIn.Dims()
|
|
|
- if r != nn.Sizes[0] {
|
|
|
- fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
|
|
|
- return -1, 0.0
|
|
|
- }
|
|
|
-
|
|
|
- nn.forward(aIn)
|
|
|
- result := nn.result()
|
|
|
- r, _ = result.Dims()
|
|
|
- max = 0.0
|
|
|
- maxIndex = 0
|
|
|
- for i := 0; i < r; i++ {
|
|
|
- if result.At(i, 0) > max {
|
|
|
- max = result.At(i, 0)
|
|
|
- maxIndex = i
|
|
|
- }
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) Teach(teacher teach.Teacher) {
|
|
|
- for i := 0; i < nn.trainingCycles; i++ {
|
|
|
- for teacher.NextData() {
|
|
|
- nn.backward(teacher.GetData())
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) SaveState(writer io.Writer) {
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) LoadState(reader io.Reader) {
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) forward(aIn mat.Matrix) {
|
|
|
- nn.A[0] = mat.DenseCopyOf(aIn)
|
|
|
-
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
- nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
|
|
|
- aSrc := nn.A[i-1]
|
|
|
- aDst := nn.A[i]
|
|
|
-
|
|
|
- // Each iteration implements formula bellow for neuron activation values
|
|
|
- // A[l]=σ(W[l]*A[l−1]+B[l])
|
|
|
-
|
|
|
- // W[l]*A[l−1]
|
|
|
- aDst.Mul(nn.Weights[i], aSrc)
|
|
|
-
|
|
|
- // W[l]*A[l−1]+B[l]
|
|
|
- aDst.Add(aDst, nn.Biases[i])
|
|
|
-
|
|
|
- // Save raw activation value for back propagation
|
|
|
- nn.Z[i] = mat.DenseCopyOf(aDst)
|
|
|
-
|
|
|
- // σ(W[l]*A[l−1]+B[l])
|
|
|
- aDst.Apply(applySigmoid, aDst)
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) backward(aIn, aOut mat.Matrix) {
|
|
|
- nn.forward(aIn)
|
|
|
-
|
|
|
- lastLayerNum := nn.Count - 1
|
|
|
-
|
|
|
- // To calculate new values of weights and biases
|
|
|
- // following formulas are used:
|
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
- // B[l] = δ[l]
|
|
|
-
|
|
|
- // For last layer δ value is calculated by following:
|
|
|
- // δ = (A[L]−y)⊙σ'(Z[L])
|
|
|
-
|
|
|
- // Calculate initial error for last layer L
|
|
|
- // error = A[L]-y
|
|
|
- // Where y is expected activations set
|
|
|
- err := &mat.Dense{}
|
|
|
- err.Sub(nn.result(), aOut)
|
|
|
-
|
|
|
- // Calculate sigmoids prime σ'(Z[L]) for last layer L
|
|
|
- sigmoidsPrime := &mat.Dense{}
|
|
|
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
|
|
|
-
|
|
|
- // (A[L]−y)⊙σ'(Z[L])
|
|
|
- delta := &mat.Dense{}
|
|
|
- delta.MulElem(err, sigmoidsPrime)
|
|
|
-
|
|
|
- // B[L] = δ[L]
|
|
|
- biases := mat.DenseCopyOf(delta)
|
|
|
-
|
|
|
- // W[L] = A[L−1]*δ[L]
|
|
|
- weights := &mat.Dense{}
|
|
|
- weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
-
|
|
|
- // Initialize new weights and biases values with last layer values
|
|
|
- newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
|
|
|
- newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
|
|
|
-
|
|
|
- // Save calculated delta value temporary error variable
|
|
|
- err = delta
|
|
|
-
|
|
|
- // Next layer Weights and Biases are calculated using same formulas:
|
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
- // B[l] = δ[l]
|
|
|
-
|
|
|
- // But δ[l] is calculated using different formula:
|
|
|
- // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
- // Where Wt[l+1] is transposed matrix of actual Weights from
|
|
|
- // forward step
|
|
|
- for l := nn.Count - 2; l > 0; l-- {
|
|
|
- // Calculate sigmoids prime σ'(Z[l]) for last layer l
|
|
|
- sigmoidsPrime := &mat.Dense{}
|
|
|
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
|
|
|
-
|
|
|
- // (Wt[l+1])*δ[l+1]
|
|
|
- // err bellow is delta from previous step(l+1)
|
|
|
- delta := &mat.Dense{}
|
|
|
- wdelta := &mat.Dense{}
|
|
|
- wdelta.Mul(nn.Weights[l+1].T(), err)
|
|
|
-
|
|
|
- // Calculate new delta and store it to temporary variable err
|
|
|
- // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
- delta.MulElem(wdelta, sigmoidsPrime)
|
|
|
- err = delta
|
|
|
-
|
|
|
- // B[l] = δ[l]
|
|
|
- biases := mat.DenseCopyOf(delta)
|
|
|
-
|
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
- // At this point it's required to give explanation for inaccuracy
|
|
|
- // in the formula
|
|
|
-
|
|
|
- // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
|
|
|
- // because view of matrices are following:
|
|
|
- // A[l-1] δ[l]
|
|
|
- // ⎡A[0] ⎤ ⎡δ[0] ⎤
|
|
|
- // ⎢A[1] ⎥ ⎢δ[1] ⎥
|
|
|
- // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
- // ⎢A[i] ⎥ X ⎢δ[i] ⎥
|
|
|
- // ⎢ ... ⎥ ⎢ ... ⎥
|
|
|
- // ⎣A[s'] ⎦ ⎣δ[s] ⎦
|
|
|
- // So we need to modify these matrices to apply mutiplications and got
|
|
|
- // Weights matrix of following view:
|
|
|
- // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
|
|
|
- // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
|
|
|
- // ⎢ ... ⎥
|
|
|
- // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
|
|
|
- // ⎢ ... ⎥
|
|
|
- // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
|
|
|
- // So we swap matrices and transpose A[l-1] to get valid multiplication
|
|
|
- // of following view:
|
|
|
- // δ[l] A[l-1]
|
|
|
- // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
|
|
|
- // ⎢δ[1] ⎥
|
|
|
- // ⎢ ... ⎥
|
|
|
- // ⎢δ[i] ⎥
|
|
|
- // ⎢ ... ⎥
|
|
|
- // ⎣δ[s] ⎦
|
|
|
- weights := &mat.Dense{}
|
|
|
- weights.Mul(delta, nn.A[l-1].T())
|
|
|
-
|
|
|
- // !Prepend! new Biases and Weights
|
|
|
- newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
|
|
|
- newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
|
|
|
- }
|
|
|
-
|
|
|
- newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|
|
|
- newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
|
|
|
-
|
|
|
- nn.Biases = newBiases
|
|
|
- nn.Weights = newWeights
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *RProp) result() *mat.Dense {
|
|
|
- return nn.A[nn.Count-1]
|
|
|
-}
|