/* * MIT License * * Copyright (c) 2019 Alexey Edelev , Tatyana Borisova * * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork * * Permission is hereby granted, free of charge, to any person obtaining a copy of this * software and associated documentation files (the "Software"), to deal in the Software * without restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, and * to permit persons to whom the Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be included in all copies * or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package neuralnetworkbase import ( "encoding/binary" "errors" "fmt" "io" teach "../teach" mat "gonum.org/v1/gonum/mat" ) // NeuralNetwork is simple neural network implementation // // Resources: // http://neuralnetworksanddeeplearning.com // https://www.youtube.com/watch?v=fNk_zzaMoSs // // Matrix: A // Description: A is set of calculated neuron activations after sigmoid correction // Format: 0 l L // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤ // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦ // Where s = Sizes[l] - Neural network layer size // L = len(Sizes) - Number of neural network layers // // Matrix: Z // Description: Z is set of calculated raw neuron activations // Format: 0 l L // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ // Where s = Sizes[l] - Neural network layer size // L = len(Sizes) - Number of neural network layers // // Matrix: Biases // Description: Biases is set of biases per layer except l0 // NOTE: l0 is always empty Dense because first layer // doesn't have connections to previous layer // Format: 1 l L // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤ // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦ // Where s = Sizes[l] - Neural network layer size // L = len(Sizes) - Number of neural network layers // // Matrix: Weights // Description: Weights is set of weights per layer except l0 // NOTE: l0 is always empty Dense because first layer // doesn't have connections to previous layer // Format: 1 l L // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥ // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ // Where s = Sizes[l] - Neural network layer size // s' = Sizes[l-1] - Previous neural network layer size // L = len(Sizes) - Number of neural network layers type BackProp struct { Count int Sizes []int Biases []*mat.Dense Weights []*mat.Dense A []*mat.Dense Z []*mat.Dense alpha float64 trainingCycles int } func NewBackProp(sizes []int, nu float64, trainingCycles int) (nn *BackProp, err error) { err = nil if len(sizes) < 3 { fmt.Printf("Invalid network configuration: %v\n", sizes) return nil, errors.New("Invalid network configuration: %v\n") } for i := 0; i < len(sizes); i++ { if sizes[i] < 2 { fmt.Printf("Invalid network configuration: %v\n", sizes) return nil, errors.New("Invalid network configuration: %v\n") } } if nu <= 0.0 || nu > 1.0 { fmt.Printf("Invalid η value: %v\n", nu) return nil, errors.New("Invalid η value: %v\n") } if trainingCycles <= 0 { fmt.Printf("Invalid training cycles number: %v\n", trainingCycles) return nil, errors.New("Invalid training cycles number: %v\n") } if trainingCycles < 100 { fmt.Println("Training cycles number probably is too small") } nn = &BackProp{} nn.Sizes = sizes nn.Count = len(sizes) nn.Weights = make([]*mat.Dense, nn.Count) nn.Biases = make([]*mat.Dense, nn.Count) nn.A = make([]*mat.Dense, nn.Count) nn.Z = make([]*mat.Dense, nn.Count) nn.alpha = nu / float64(nn.Sizes[0]) nn.trainingCycles = trainingCycles for i := 1; i < nn.Count; i++ { nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1]) nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1) } return } func (nn *BackProp) Copy() (out *BackProp) { out = &BackProp{} out.Sizes = nn.Sizes out.Count = nn.Count out.Weights = make([]*mat.Dense, nn.Count) out.Biases = make([]*mat.Dense, nn.Count) out.A = make([]*mat.Dense, nn.Count) out.Z = make([]*mat.Dense, nn.Count) out.alpha = nn.alpha out.trainingCycles = nn.trainingCycles for i := 1; i < out.Count; i++ { nn.Weights[i] = mat.DenseCopyOf(out.Weights[i]) nn.Biases[i] = mat.DenseCopyOf(out.Biases[i]) } return } func (nn *BackProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) { r, _ := aIn.Dims() if r != nn.Sizes[0] { fmt.Printf("Invalid rows number of input matrix size: %v\n", r) return -1, 0.0 } nn.forward(aIn) result := nn.result() r, _ = result.Dims() max = 0.0 maxIndex = 0 for i := 0; i < r; i++ { if result.At(i, 0) > max { max = result.At(i, 0) maxIndex = i } } return } func (nn *BackProp) Teach(teacher teach.Teacher) { for i := 0; i < nn.trainingCycles; i++ { for teacher.NextData() { nn.backward(teacher.GetData()) } } } func (nn *BackProp) SaveState(writer io.Writer) { //save input array count bufferSize := make([]byte, 4) binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count)) _, err := writer.Write(bufferSize) check(err) fmt.Printf("wrote value %d\n", uint32(nn.Count)) // save an input array buffer := make([]byte, nn.Count*4) for i := 0; i < nn.Count; i++ { binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i])) } _, err = writer.Write(buffer) check(err) // fmt.Printf("wrote buffer %d bytes\n", n2) //save biases //////////////////////// for i := 1; i < nn.Count; i++ { saveDense(writer, nn.Biases[i]) } //save weights //////////////////////// for i := 1; i < nn.Count; i++ { saveDense(writer, nn.Weights[i]) } } func (nn *BackProp) LoadState(reader io.Reader) { // Reade count nn.Count = readInt(reader) // Read an input array sizeBuffer := readByteArray(reader, nn.Count*4) nn.Sizes = make([]int, nn.Count) for i := 0; i < nn.Count; i++ { nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:])) // fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i]) } nn.Weights = []*mat.Dense{&mat.Dense{}} nn.Biases = []*mat.Dense{&mat.Dense{}} // read Biases nn.Biases[0] = &mat.Dense{} for i := 1; i < nn.Count; i++ { nn.Biases = append(nn.Biases, &mat.Dense{}) nn.Biases[i] = readDense(reader, nn.Biases[i]) } // read Weights nn.Weights[0] = &mat.Dense{} for i := 1; i < nn.Count; i++ { nn.Weights = append(nn.Weights, &mat.Dense{}) nn.Weights[i] = readDense(reader, nn.Weights[i]) } nn.A = make([]*mat.Dense, nn.Count) nn.Z = make([]*mat.Dense, nn.Count) // fmt.Printf("\nLoadState end\n") } func (nn *BackProp) forward(aIn mat.Matrix) { nn.A[0] = mat.DenseCopyOf(aIn) for i := 1; i < nn.Count; i++ { nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil) aSrc := nn.A[i-1] aDst := nn.A[i] // Each iteration implements formula bellow for neuron activation values // A[l]=σ(W[l]*A[l−1]+B[l]) // W[l]*A[l−1] aDst.Mul(nn.Weights[i], aSrc) // W[l]*A[l−1]+B[l] aDst.Add(aDst, nn.Biases[i]) // Save raw activation value for back propagation nn.Z[i] = mat.DenseCopyOf(aDst) // σ(W[l]*A[l−1]+B[l]) aDst.Apply(applySigmoid, aDst) } } func (nn *BackProp) backward(aIn, aOut mat.Matrix) { nn.forward(aIn) lastLayerNum := nn.Count - 1 // To calculate new values of weights and biases // following formulas are used: // W[l] = A[l−1]*δ[l] // B[l] = δ[l] // For last layer δ value is calculated by following: // δ = (A[L]−y)⊙σ'(Z[L]) // Calculate initial error for last layer L // error = A[L]-y // Where y is expected activations set err := &mat.Dense{} err.Sub(nn.result(), aOut) // Calculate sigmoids prime σ'(Z[L]) for last layer L sigmoidsPrime := &mat.Dense{} sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum]) // (A[L]−y)⊙σ'(Z[L]) delta := &mat.Dense{} delta.MulElem(err, sigmoidsPrime) // B[L] = δ[L] biases := mat.DenseCopyOf(delta) // W[L] = A[L−1]*δ[L] weights := &mat.Dense{} weights.Mul(delta, nn.A[lastLayerNum-1].T()) // Initialize new weights and biases values with last layer values newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)} newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)} // Save calculated delta value temporary error variable err = delta // Next layer Weights and Biases are calculated using same formulas: // W[l] = A[l−1]*δ[l] // B[l] = δ[l] // But δ[l] is calculated using different formula: // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l]) // Where Wt[l+1] is transposed matrix of actual Weights from // forward step for l := nn.Count - 2; l > 0; l-- { // Calculate sigmoids prime σ'(Z[l]) for last layer l sigmoidsPrime := &mat.Dense{} sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l]) // (Wt[l+1])*δ[l+1] // err bellow is delta from previous step(l+1) delta := &mat.Dense{} wdelta := &mat.Dense{} wdelta.Mul(nn.Weights[l+1].T(), err) // Calculate new delta and store it to temporary variable err // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l]) delta.MulElem(wdelta, sigmoidsPrime) err = delta // B[l] = δ[l] biases := mat.DenseCopyOf(delta) // W[l] = A[l−1]*δ[l] // At this point it's required to give explanation for inaccuracy // in the formula // Multiplying of activations matrix for layer l-1 and δ[l] is imposible // because view of matrices are following: // A[l-1] δ[l] // ⎡A[0] ⎤ ⎡δ[0] ⎤ // ⎢A[1] ⎥ ⎢δ[1] ⎥ // ⎢ ... ⎥ ⎢ ... ⎥ // ⎢A[i] ⎥ X ⎢δ[i] ⎥ // ⎢ ... ⎥ ⎢ ... ⎥ // ⎣A[s'] ⎦ ⎣δ[s] ⎦ // So we need to modify these matrices to apply mutiplications and got // Weights matrix of following view: // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ // ⎢ ... ⎥ // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ // ⎢ ... ⎥ // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ // So we swap matrices and transpose A[l-1] to get valid multiplication // of following view: // δ[l] A[l-1] // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']] // ⎢δ[1] ⎥ // ⎢ ... ⎥ // ⎢δ[i] ⎥ // ⎢ ... ⎥ // ⎣δ[s] ⎦ weights := &mat.Dense{} weights.Mul(delta, nn.A[l-1].T()) // !Prepend! new Biases and Weights newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...) newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...) } newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...) newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...) nn.Biases = newBiases nn.Weights = newWeights } func (nn *BackProp) result() *mat.Dense { return nn.A[nn.Count-1] }