123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- package neuralnetworkbase
- import (
- teach "../teach"
- mat "gonum.org/v1/gonum/mat"
- )
- // NeuralNetwork is simple neural network implementation
- //
- // Matrix: A
- // Description: A is set of calculated neuron activations after sigmoid correction
- // Format: 0 n N
- // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
- // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
- // Where s = Sizes[n], N = len(Sizes)
- //
- // Matrix: Z
- // Description: Z is set of calculated raw neuron activations
- // Format: 0 n N
- // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
- // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
- // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
- // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
- // Where s = Sizes[n], N = len(Sizes)
- //
- // Matrix: Biases
- // Description: Biases is set of biases per layer except L0
- // Format:
- // ⎡b[0] ⎤
- // ⎢b[1] ⎥
- // ⎢ ... ⎥
- // ⎢b[i] ⎥
- // ⎢ ... ⎥
- // ⎣b[s] ⎦
- // Where s = Sizes[n]
- //
- // Matrix: Weights
- // Description: Weights is set of weights per layer except L0
- // Format:
- // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
- // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
- // ⎢ ... ⎥
- // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
- // ⎢ ... ⎥
- // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
- // Where s = Sizes[n], s' = Sizes[n-1]
- type NeuralNetwork struct {
- Count int
- Sizes []int
- Biases []*mat.Dense
- Weights []*mat.Dense
- A []*mat.Dense
- Z []*mat.Dense
- alpha float64
- trainingCycles int
- }
- func NewNeuralNetwork(Sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork) {
- nn = &NeuralNetwork{}
- nn.Sizes = Sizes
- nn.Count = len(Sizes)
- nn.Weights = make([]*mat.Dense, nn.Count)
- nn.Biases = make([]*mat.Dense, nn.Count)
- nn.A = make([]*mat.Dense, nn.Count)
- nn.Z = make([]*mat.Dense, nn.Count)
- nn.alpha = nu / float64(nn.Sizes[0])
- nn.trainingCycles = trainingCycles
- for i := 1; i < nn.Count; i++ {
- nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
- nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
- }
- return
- }
- func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
- nn.forward(aIn)
- result := nn.result()
- r, _ := result.Dims()
- max = 0.0
- maxIndex = 0
- for i := 0; i < r; i++ {
- if result.At(i, 0) > max {
- max = result.At(i, 0)
- maxIndex = i
- }
- }
- return
- }
- func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
- for i := 0; i < nn.trainingCycles; i++ {
- for teacher.Next() {
- nn.backward(teacher.GetData(), teacher.GetExpect())
- }
- }
- }
- func (nn *NeuralNetwork) SaveState(filename string) {
- }
- func (nn *NeuralNetwork) LoadState(filename string) {
- }
- func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
- nn.A[0] = mat.DenseCopyOf(aIn)
- for i := 1; i < nn.Count; i++ {
- nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
- aSrc := nn.A[i-1]
- aDst := nn.A[i]
- //Each iteration implements formula bellow for neuron activation values
- //A[l]=σ(W[l]*A[l−1]+B[l])
- //W[l]*A[l−1]
- aDst.Mul(nn.Weights[i], aSrc)
- //W[l]*A[l−1]+B[l]
- aDst.Add(aDst, nn.Biases[i])
- //Save raw activation value for back propagation
- nn.Z[i] = mat.DenseCopyOf(aDst)
- //σ(W[l]*A[l−1]+B[l])
- aDst.Apply(applySigmoid, aDst)
- }
- }
- func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
- nn.forward(aIn)
- lastLayerNum := nn.Count - 1
- //To calculate new values of weights and biases
- //following formulas are used:
- //W[l] = A[l−1]*δ[l]
- //B[l] = δ[l]
- //For last layer δ value is calculated by following:
- //δ = (A[L]−y)⊙σ'(Z[L])
- //Calculate initial error for last layer L
- //error = A[L]-y
- //Where y is expected activations set
- err := &mat.Dense{}
- err.Sub(nn.result(), aOut)
- //Calculate sigmoids prime σ'(Z[L]) for last layer L
- sigmoidsPrime := &mat.Dense{}
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
- //(A[L]−y)⊙σ'(Z[L])
- delta := &mat.Dense{}
- delta.MulElem(err, sigmoidsPrime)
- //B[L] = δ[L]
- biases := mat.DenseCopyOf(delta)
- //W[L] = A[L−1]*δ[L]
- weights := &mat.Dense{}
- weights.Mul(delta, nn.A[lastLayerNum-1].T())
- //Initialize new weights and biases values with last layer values
- newBiases := []*mat.Dense{makeBackGradien(biases, nn.Biases[lastLayerNum], nn.alpha)}
- newWeights := []*mat.Dense{makeBackGradien(weights, nn.Weights[lastLayerNum], nn.alpha)}
- //Save calculated delta value temporary error variable
- err = delta
- //Next layer Weights and Biases are calculated using same formulas:
- //W[l] = A[l−1]*δ[l]
- //B[l] = δ[l]
- //But δ[l] is calculated using different formula:
- //δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
- //Where Wt[l+1] is transponded matrix of actual Weights from
- //forward step
- for l := nn.Count - 2; l > 0; l-- {
- //Calculate sigmoids prime σ'(Z[l]) for last layer l
- sigmoidsPrime := &mat.Dense{}
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
- //(Wt[l+1])*δ[l+1]
- //err bellow is delta from previous step(l+1)
- delta := &mat.Dense{}
- wdelta := &mat.Dense{}
- wdelta.Mul(nn.Weights[l+1].T(), err)
- //Calculate new delta and store it to temporary variable err
- //δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
- delta.MulElem(wdelta, sigmoidsPrime)
- err = delta
- //B[l] = δ[l]
- biases := mat.DenseCopyOf(delta)
- //W[l] = A[l−1]*δ[l]
- //At this point it's required to give explanation for inaccuracy
- //in the formula
- //Multiplying of activations matrix for layer l-1 and δ[l] is imposible
- //because view of matrices are following:
- // A[l-1] δ[l]
- // ⎡A[0] ⎤ ⎡δ[0] ⎤
- // ⎢A[1] ⎥ ⎢δ[1] ⎥
- // ⎢ ... ⎥ ⎢ ... ⎥
- // ⎢A[i] ⎥ X ⎢δ[i] ⎥
- // ⎢ ... ⎥ ⎢ ... ⎥
- // ⎣A[s'] ⎦ ⎣δ[s] ⎦
- //So we need to modify these matrices to apply mutiplications and got Weights matrix
- //of following view:
- // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
- // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
- // ⎢ ... ⎥
- // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
- // ⎢ ... ⎥
- // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
- //So we substitude matrices and transposes A[l-1] to get valid multiplication
- //if following view:
- // δ[l] A[l-1]
- // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
- // ⎢δ[1] ⎥
- // ⎢ ... ⎥
- // ⎢δ[i] ⎥
- // ⎢ ... ⎥
- // ⎣δ[s] ⎦
- weights := &mat.Dense{}
- weights.Mul(delta, nn.A[l-1].T())
- //!Prepend! new Biases and Weights
- // Scale down
- newBiases = append([]*mat.Dense{makeBackGradien(biases, nn.Biases[l], nn.alpha)}, newBiases...)
- newWeights = append([]*mat.Dense{makeBackGradien(weights, nn.Weights[l], nn.alpha)}, newWeights...)
- }
- newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
- newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
- nn.Biases = newBiases
- nn.Weights = newWeights
- }
- func (nn *NeuralNetwork) result() *mat.Dense {
- return nn.A[nn.Count-1]
- }
|