123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- /*
- * MIT License
- *
- * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>, Tatyana Borisova <tanusshhka@mail.ru>
- *
- * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of this
- * software and associated documentation files (the "Software"), to deal in the Software
- * without restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
- * to permit persons to whom the Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be included in all copies
- * or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
- * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
- * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
- package neuralnetworkbase
- import (
- "encoding/binary"
- "errors"
- "fmt"
- "io"
- "runtime"
- "sync"
- teach "../teach"
- mat "gonum.org/v1/gonum/mat"
- )
- // NeuralNetwork is simple neural network implementation
- //
- // Resources:
- // http://neuralnetworksanddeeplearning.com
- // https://www.youtube.com/watch?v=fNk_zzaMoSs
- // http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
- //
- // Matrix: A
- // Description: A is set of calculated neuron activations after sigmoid correction
- // Format: 0 l L
- // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
- // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
- // Where s = Sizes[l] - Neural network layer size
- // L = len(Sizes) - Number of neural network layers
- //
- // Matrix: Z
- // Description: Z is set of calculated raw neuron activations
- // Format: 0 l L
- // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
- // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
- // Where s = Sizes[l] - Neural network layer size
- // L = len(Sizes) - Number of neural network layers
- //
- // Matrix: Biases
- // Description: Biases is set of biases per layer except l0
- // NOTE: l0 is always empty Dense because first layer
- // doesn't have connections to previous layer
- // Format: 1 l L
- // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
- // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
- // Where s = Sizes[l] - Neural network layer size
- // L = len(Sizes) - Number of neural network layers
- //
- // Matrix: Weights
- // Description: Weights is set of weights per layer except l0
- // NOTE: l0 is always empty Dense because first layer
- // doesn't have connections to previous layer
- // Format: 1 l L
- // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
- // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
- // Where s = Sizes[l] - Neural network layer size
- // s' = Sizes[l-1] - Previous neural network layer size
- // L = len(Sizes) - Number of neural network layers
- type NeuralNetwork struct {
- LayerCount int
- Sizes []int
- Biases []*mat.Dense
- Weights []*mat.Dense
- BGradient []interface{}
- WGradient []interface{}
- gradientDescentInitializer GradientDescentInitializer
- }
- func NewNeuralNetwork(sizes []int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
- err = nil
- if len(sizes) < 3 {
- fmt.Printf("Invalid network configuration: %v\n", sizes)
- return nil, errors.New("Invalid network configuration: %v\n")
- }
- for i := 0; i < len(sizes); i++ {
- if sizes[i] < 2 {
- fmt.Printf("Invalid network configuration: %v\n", sizes)
- return nil, errors.New("Invalid network configuration: %v\n")
- }
- }
- nn = &NeuralNetwork{}
- nn.Sizes = sizes
- nn.LayerCount = len(sizes)
- nn.Biases = make([]*mat.Dense, nn.LayerCount)
- nn.Weights = make([]*mat.Dense, nn.LayerCount)
- nn.BGradient = make([]interface{}, nn.LayerCount)
- nn.WGradient = make([]interface{}, nn.LayerCount)
- nn.gradientDescentInitializer = gradientDescentInitializer
- for l := 1; l < nn.LayerCount; l++ {
- nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
- nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
- nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
- nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
- }
- return
- }
- func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
- r, _ := aIn.Dims()
- if r != nn.Sizes[0] {
- fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
- return -1, 0.0
- }
- A, _ := nn.forward(aIn)
- result := A[nn.LayerCount-1]
- r, _ = result.Dims()
- max = 0.0
- maxIndex = 0
- for i := 0; i < r; i++ {
- if result.At(i, 0) > max {
- max = result.At(i, 0)
- maxIndex = i
- }
- }
- return
- }
- func (nn *NeuralNetwork) Teach(teacher teach.Teacher, epocs int) {
- if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
- nn.TeachOnline(teacher, epocs)
- } else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
- nn.TeachBatch(teacher, epocs)
- } else {
- panic("Invalid gradient descent type")
- }
- }
- func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher, epocs int) {
- for t := 0; t < epocs; t++ {
- for teacher.NextData() {
- dB, dW := nn.backward(teacher.GetData())
- for l := 1; l < nn.LayerCount; l++ {
- bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
- if !ok {
- panic("bGradient is not a OnlineGradientDescent")
- }
- wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
- if !ok {
- panic("wGradient is not a OnlineGradientDescent")
- }
- nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
- nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
- }
- }
- teacher.Reset()
- }
- }
- func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher, epocs int) {
- for t := 0; t < epocs; t++ {
- batchWorkers := nn.runBatchWorkers(runtime.NumCPU(), teacher)
- for l := 1; l < nn.LayerCount; l++ {
- bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
- if !ok {
- panic("bGradient is not a BatchGradientDescent")
- }
- wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
- if !ok {
- panic("wGradient is not a BatchGradientDescent")
- }
- for _, bw := range batchWorkers {
- dB, dW := bw.Result(l)
- bGradient.AccumGradients(dB)
- wGradient.AccumGradients(dW)
- }
- nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
- nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
- }
- }
- }
- func (nn *NeuralNetwork) runBatchWorkers(threadCount int, teacher teach.Teacher) (workers []*batchWorker) {
- wg := sync.WaitGroup{}
- chunkSize := teacher.GetDataCount() / threadCount
- workers = make([]*batchWorker, threadCount)
- for i, _ := range workers {
- workers[i] = newBatchWorker(nn)
- wg.Add(1)
- s := i
- go func() {
- workers[s].Run(teacher, s*chunkSize, (s+1)*chunkSize)
- wg.Done()
- }()
- }
- wg.Wait()
- return
- }
- func (nn *NeuralNetwork) SaveState(writer io.Writer) {
- //save input array count
- bufferSize := make([]byte, 4)
- binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
- _, err := writer.Write(bufferSize)
- check(err)
- fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
- // save an input array
- buffer := make([]byte, nn.LayerCount*4)
- for i := 0; i < nn.LayerCount; i++ {
- binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
- }
- _, err = writer.Write(buffer)
- check(err)
- // fmt.Printf("wrote buffer %d bytes\n", n2)
- //save biases
- ////////////////////////
- for i := 1; i < nn.LayerCount; i++ {
- saveDense(writer, nn.Biases[i])
- }
- //save weights
- ////////////////////////
- for i := 1; i < nn.LayerCount; i++ {
- saveDense(writer, nn.Weights[i])
- }
- }
- func (nn *NeuralNetwork) LoadState(reader io.Reader) {
- // Reade count
- nn.LayerCount = readInt(reader)
- // Read an input array
- sizeBuffer := readByteArray(reader, nn.LayerCount*4)
- nn.Sizes = make([]int, nn.LayerCount)
- for i := 0; i < nn.LayerCount; i++ {
- nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:]))
- // fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i])
- }
- nn.Weights = []*mat.Dense{&mat.Dense{}}
- nn.Biases = []*mat.Dense{&mat.Dense{}}
- // read Biases
- nn.Biases[0] = &mat.Dense{}
- for i := 1; i < nn.LayerCount; i++ {
- nn.Biases = append(nn.Biases, &mat.Dense{})
- nn.Biases[i] = readDense(reader, nn.Biases[i])
- }
- // read Weights
- nn.Weights[0] = &mat.Dense{}
- for i := 1; i < nn.LayerCount; i++ {
- nn.Weights = append(nn.Weights, &mat.Dense{})
- nn.Weights[i] = readDense(reader, nn.Weights[i])
- }
- // fmt.Printf("\nLoadState end\n")
- }
- func (nn NeuralNetwork) forward(aIn mat.Matrix) (A, Z []*mat.Dense) {
- A = make([]*mat.Dense, nn.LayerCount)
- Z = make([]*mat.Dense, nn.LayerCount)
- A[0] = mat.DenseCopyOf(aIn)
- for l := 1; l < nn.LayerCount; l++ {
- A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
- aSrc := A[l-1]
- aDst := A[l]
- // Each iteration implements formula bellow for neuron activation values
- // A[l]=σ(W[l]*A[l−1]+B[l])
- // W[l]*A[l−1]
- aDst.Mul(nn.Weights[l], aSrc)
- // W[l]*A[l−1]+B[l]
- aDst.Add(aDst, nn.Biases[l])
- // Save raw activation value for back propagation
- Z[l] = mat.DenseCopyOf(aDst)
- // σ(W[l]*A[l−1]+B[l])
- aDst.Apply(applySigmoid, aDst)
- }
- return
- }
- // Function returns calculated bias and weights derivatives for each
- // layer arround aIn/aOut datasets
- func (nn NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
- A, Z := nn.forward(aIn)
- lastLayerNum := nn.LayerCount - 1
- dB = make([]*mat.Dense, nn.LayerCount)
- dW = make([]*mat.Dense, nn.LayerCount)
- // To calculate new values of weights and biases
- // following formulas are used:
- // ∂E/∂W[l] = A[l−1]*δ[l]
- // ∂E/∂B[l] = δ[l]
- // For last layer δ value is calculated by following:
- // δ = (A[L]−y)⊙σ'(Z[L])
- // Calculate initial error for last layer L
- // error = A[L]-y
- // Where y is expected activations set
- err := &mat.Dense{}
- err.Sub(A[nn.LayerCount-1], aOut)
- // Calculate sigmoids prime σ'(Z[L]) for last layer L
- sigmoidsPrime := &mat.Dense{}
- sigmoidsPrime.Apply(applySigmoidPrime, Z[lastLayerNum])
- // (A[L]−y)⊙σ'(Z[L])
- delta := &mat.Dense{}
- delta.MulElem(err, sigmoidsPrime)
- // ∂E/∂B[L] = δ[L]
- biases := mat.DenseCopyOf(delta)
- // ∂E/∂W[L] = A[L−1]*δ[L]
- weights := &mat.Dense{}
- weights.Mul(delta, A[lastLayerNum-1].T())
- // Initialize new weights and biases values with last layer values
- dB[lastLayerNum] = biases
- dW[lastLayerNum] = weights
- // Next layer derivatives of Weights and Biases are calculated using same formulas:
- // ∂E/∂W[l] = A[l−1]*δ[l]
- // ∂E/∂B[l] = δ[l]
- // But δ[l] is calculated using different formula:
- // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
- // Where Wt[l+1] is transposed matrix of actual Weights from
- // forward step
- for l := nn.LayerCount - 2; l > 0; l-- {
- // Calculate sigmoids prime σ'(Z[l]) for last layer l
- sigmoidsPrime := &mat.Dense{}
- sigmoidsPrime.Apply(applySigmoidPrime, Z[l])
- // (Wt[l+1])*δ[l+1]
- // err bellow is delta from previous step(l+1)
- wdelta := &mat.Dense{}
- wdelta.Mul(nn.Weights[l+1].T(), delta)
- // Calculate new delta and store it to temporary variable err
- // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
- delta = &mat.Dense{}
- delta.MulElem(wdelta, sigmoidsPrime)
- // ∂E/∂B[l] = δ[l]
- biases := mat.DenseCopyOf(delta)
- // ∂E/∂W[l] = A[l−1]*δ[l]
- // At this point it's required to give explanation for inaccuracy
- // in the formula
- // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
- // because view of matrices are following:
- // A[l-1] δ[l]
- // ⎡A[0] ⎤ ⎡δ[0] ⎤
- // ⎢A[1] ⎥ ⎢δ[1] ⎥
- // ⎢ ... ⎥ ⎢ ... ⎥
- // ⎢A[i] ⎥ X ⎢δ[i] ⎥
- // ⎢ ... ⎥ ⎢ ... ⎥
- // ⎣A[s'] ⎦ ⎣δ[s] ⎦
- // So we need to modify these matrices to apply mutiplications and got
- // Weights matrix of following view:
- // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
- // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
- // ⎢ ... ⎥
- // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
- // ⎢ ... ⎥
- // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
- // So we swap matrices and transpose A[l-1] to get valid multiplication
- // of following view:
- // δ[l] A[l-1]
- // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
- // ⎢δ[1] ⎥
- // ⎢ ... ⎥
- // ⎢δ[i] ⎥
- // ⎢ ... ⎥
- // ⎣δ[s] ⎦
- weights := &mat.Dense{}
- weights.Mul(delta, A[l-1].T())
- dB[l] = biases
- dW[l] = weights
- }
- return
- }
|