Переглянути джерело

Non-working implementation of rprop

Alexey Edelev 5 роки тому
батько
коміт
22144ee7cf

+ 2 - 3
neuralnetwork/main.go

@@ -11,8 +11,7 @@ import (
 
 func main() {
 	sizes := []int{13, 14, 14, 3}
-	var nn neuralnetwork.NeuralNetwork
-	nn, _ = neuralnetwork.NewBackProp(sizes, 0.1, 481)
+	nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 0.1, 481)
 
 	// for i := 0; i < nn.Count; i++ {
 	// 	if i > 0 {
@@ -55,7 +54,7 @@ func main() {
 	}
 	fmt.Printf("Fail count: %v\n\n", failCount)
 
-	nn = &neuralnetwork.BackProp{}
+	nn = &neuralnetwork.NeuralNetwork{}
 	inFile, err := os.Open("./data")
 	if err != nil {
 		log.Fatal(err)

+ 102 - 0
neuralnetwork/neuralnetworkbase/gradients.go

@@ -0,0 +1,102 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
+ *
+ * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this
+ * software and associated documentation files (the "Software"), to deal in the Software
+ * without restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+ * to permit persons to whom the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies
+ * or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
+ * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package neuralnetworkbase
+
+import (
+	"math"
+
+	mat "gonum.org/v1/gonum/mat"
+)
+
+type RPropGradient struct {
+	Gradients *mat.Dense
+	Deltas    *mat.Dense
+}
+
+func NewRPropGradient(r, c int) (g *RPropGradient) {
+	g = &RPropGradient{}
+
+	deltas := make([]float64, r*c)
+
+	for j, _ := range deltas {
+		deltas[j] = 0.1
+	}
+
+	g.Gradients = mat.NewDense(r, c, nil)
+	g.Deltas = mat.NewDense(r, c, deltas)
+	return
+}
+
+func (g *RPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
+	//TODO: move this hardcoded parameters to separate config for gradient
+	nuPlus := 1.2
+	nuMinus := 0.5
+
+	deltaMax := 50.0
+	deltaMin := 0.000001
+
+	result = &mat.Dense{}
+
+	result.Apply(func(i, j int, v float64) (outV float64) {
+		gradientSign := g.Gradients.At(i, j) * derivative.At(i, j)
+		if gradientSign > 0 {
+			g.Deltas.Set(i, j, math.Min(nuPlus*g.Deltas.At(i, j), deltaMax))
+			outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
+
+			g.Gradients.Set(i, j, derivative.At(i, j))
+		} else if gradientSign < 0 {
+			outV = v + sign(g.Gradients.At(i, j))*g.Deltas.At(i, j)
+			g.Deltas.Set(i, j, math.Max(nuMinus*g.Deltas.At(i, j), deltaMin))
+
+			g.Gradients.Set(i, j, 0.0)
+		} else {
+			outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
+
+			g.Gradients.Set(i, j, derivative.At(i, j))
+		}
+		return
+	}, m)
+	return result
+}
+
+//Simple backpropagation with constant value η
+type BackPropGradient struct {
+	alpha float64
+}
+
+func (g *BackPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
+	// Gradient change of actual matrix using:
+	// m[l]′ = m[l] − η * ∂C/∂m
+	// Where ∂E/∂m is `in` matrix
+	scaled := &mat.Dense{}
+	result = &mat.Dense{}
+
+	// η * ∂E/∂m
+	scaled.Scale(g.alpha, derivative)
+	// m[l] − η * ∂E/∂m
+	result.Sub(m, scaled)
+	return result
+}

+ 2 - 8
neuralnetwork/neuralnetworkbase/interface.go

@@ -26,15 +26,9 @@
 package neuralnetworkbase
 
 import (
-	"io"
-
-	teach "../teach"
 	mat "gonum.org/v1/gonum/mat"
 )
 
-type NeuralNetwork interface {
-	Teach(teacher teach.Teacher)
-	Predict(aIn mat.Matrix) (maxIndex int, max float64)
-	SaveState(io.Writer)
-	LoadState(io.Reader)
+type Gradient interface {
+	ApplyDelta(aIn mat.Matrix, gradient mat.Matrix) *mat.Dense
 }

+ 8 - 13
neuralnetwork/neuralnetworkbase/mathcommon.go

@@ -55,19 +55,14 @@ func sigmoid(x float64) float64 {
 }
 
 func sigmoidPrime(x float64) float64 {
-	return sigmoid(x) * (1 - sigmoid(x))
+	sig := sigmoid(x)
+	return sig * (1 - sig)
 }
 
-func makeBackGradient(in mat.Matrix, actual mat.Matrix, alpha float64) *mat.Dense {
-	// Gradient change of actual matrix using:
-	// m[l]′ = m[l] − η * ∂C/∂m
-	// Where ∂C/∂m is `in` matrix
-	scaled := &mat.Dense{}
-	result := &mat.Dense{}
-
-	// η * ∂C/∂m
-	scaled.Scale(alpha, in)
-	// m[l] − η * ∂C/∂m
-	result.Sub(actual, scaled)
-	return result
+func sign(v float64) float64 {
+	if v == 0 {
+		return 0
+	}
+	// fmt.Printf("%v / math.Abs(%v) = %v\n", v, math.Abs(v), v/math.Abs(v))
+	return v / math.Abs(v)
 }

+ 53 - 35
neuralnetwork/neuralnetworkbase/backprop.go → neuralnetwork/neuralnetworkbase/neuralnetwork.go

@@ -40,6 +40,7 @@ import (
 // Resources:
 // http://neuralnetworksanddeeplearning.com
 // https://www.youtube.com/watch?v=fNk_zzaMoSs
+// http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
 //
 // Matrix: A
 // Description: A is set of calculated neuron activations after sigmoid correction
@@ -94,18 +95,19 @@ import (
 //       s' = Sizes[l-1] - Previous neural network layer size
 //       L = len(Sizes) - Number of neural network layers
 
-type BackProp struct {
+type NeuralNetwork struct {
 	Count          int
 	Sizes          []int
 	Biases         []*mat.Dense
 	Weights        []*mat.Dense
 	A              []*mat.Dense
 	Z              []*mat.Dense
-	alpha          float64
+	WGradient      []Gradient
+	BGradient      []Gradient
 	trainingCycles int
 }
 
-func NewBackProp(sizes []int, nu float64, trainingCycles int) (nn *BackProp, err error) {
+func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork, err error) {
 	err = nil
 	if len(sizes) < 3 {
 		fmt.Printf("Invalid network configuration: %v\n", sizes)
@@ -133,42 +135,48 @@ func NewBackProp(sizes []int, nu float64, trainingCycles int) (nn *BackProp, err
 		fmt.Println("Training cycles number probably is too small")
 	}
 
-	nn = &BackProp{}
+	nn = &NeuralNetwork{}
 	nn.Sizes = sizes
 	nn.Count = len(sizes)
 	nn.Weights = make([]*mat.Dense, nn.Count)
 	nn.Biases = make([]*mat.Dense, nn.Count)
+	nn.WGradient = make([]Gradient, nn.Count)
+	nn.BGradient = make([]Gradient, nn.Count)
+
 	nn.A = make([]*mat.Dense, nn.Count)
 	nn.Z = make([]*mat.Dense, nn.Count)
-	nn.alpha = nu / float64(nn.Sizes[0])
 	nn.trainingCycles = trainingCycles
 
+	alpha := nu / float64(nn.Sizes[0])
 	for i := 1; i < nn.Count; i++ {
 		nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
 		nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
+		nn.WGradient[i] = &BackPropGradient{alpha}
+		nn.BGradient[i] = &BackPropGradient{alpha}
+		// nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
+		// nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
 	}
 	return
 }
 
-func (nn *BackProp) Copy() (out *BackProp) {
-	out = &BackProp{}
+func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
+	out = &NeuralNetwork{}
 	out.Sizes = nn.Sizes
 	out.Count = nn.Count
 	out.Weights = make([]*mat.Dense, nn.Count)
 	out.Biases = make([]*mat.Dense, nn.Count)
 	out.A = make([]*mat.Dense, nn.Count)
 	out.Z = make([]*mat.Dense, nn.Count)
-	out.alpha = nn.alpha
 	out.trainingCycles = nn.trainingCycles
 
 	for i := 1; i < out.Count; i++ {
-		nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
-		nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
+		out.Weights[i] = mat.DenseCopyOf(nn.Weights[i])
+		out.Biases[i] = mat.DenseCopyOf(nn.Biases[i])
 	}
 	return
 }
 
-func (nn *BackProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
+func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
 	r, _ := aIn.Dims()
 	if r != nn.Sizes[0] {
 		fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
@@ -189,7 +197,7 @@ func (nn *BackProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
 	return
 }
 
-func (nn *BackProp) Teach(teacher teach.Teacher) {
+func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
 	for i := 0; i < nn.trainingCycles; i++ {
 		for teacher.NextData() {
 			nn.backward(teacher.GetData())
@@ -197,7 +205,7 @@ func (nn *BackProp) Teach(teacher teach.Teacher) {
 	}
 }
 
-func (nn *BackProp) SaveState(writer io.Writer) {
+func (nn *NeuralNetwork) SaveState(writer io.Writer) {
 	//save input array count
 	bufferSize := make([]byte, 4)
 	binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
@@ -229,7 +237,7 @@ func (nn *BackProp) SaveState(writer io.Writer) {
 	}
 }
 
-func (nn *BackProp) LoadState(reader io.Reader) {
+func (nn *NeuralNetwork) LoadState(reader io.Reader) {
 	// Reade count
 	nn.Count = readInt(reader)
 
@@ -265,7 +273,7 @@ func (nn *BackProp) LoadState(reader io.Reader) {
 	// fmt.Printf("\nLoadState end\n")
 }
 
-func (nn *BackProp) forward(aIn mat.Matrix) {
+func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
 	nn.A[0] = mat.DenseCopyOf(aIn)
 
 	for i := 1; i < nn.Count; i++ {
@@ -290,15 +298,15 @@ func (nn *BackProp) forward(aIn mat.Matrix) {
 	}
 }
 
-func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
+func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 	nn.forward(aIn)
 
 	lastLayerNum := nn.Count - 1
 
 	// To calculate new values of weights and biases
 	// following formulas are used:
-	// W[l] = A[l−1]*δ[l]
-	// B[l] = δ[l]
+	// ∂E/∂W[l] = A[l−1]*δ[l]
+	// ∂E/∂B[l] = δ[l]
 
 	// For last layer δ value is calculated by following:
 	// δ = (A[L]−y)⊙σ'(Z[L])
@@ -317,23 +325,31 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
 	delta := &mat.Dense{}
 	delta.MulElem(err, sigmoidsPrime)
 
-	// B[L] = δ[L]
+	// ∂E/∂B[L] = δ[L]
 	biases := mat.DenseCopyOf(delta)
 
-	// W[L] = A[L−1]*δ[L]
+	// ∂E/∂W[L] = A[L−1]*δ[L]
 	weights := &mat.Dense{}
 	weights.Mul(delta, nn.A[lastLayerNum-1].T())
 
-	// Initialize new weights and biases values with last layer values
-	newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
-	newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
+	// fmt.Printf("Prev biases[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Biases[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("Prev weights[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Weights[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
 
-	// Save calculated delta value temporary error variable
-	err = delta
+	// fmt.Printf("Expect[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(aOut, mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("Result[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.result(), mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("nn.Z[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Z[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("sigmoidsPrime[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(sigmoidsPrime, mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("Err[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(err, mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("Biases gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
+	// fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
+
+	// Initialize new weights and biases values with last layer values
+	newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
+	newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
 
 	// Next layer Weights and Biases are calculated using same formulas:
-	// W[l] = A[l−1]*δ[l]
-	// B[l] = δ[l]
+	// ∂E/∂W[l] = A[l−1]*δ[l]
+	// ∂E/∂B[l] = δ[l]
 
 	// But δ[l] is calculated using different formula:
 	// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
@@ -346,19 +362,18 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
 
 		// (Wt[l+1])*δ[l+1]
 		// err bellow is delta from previous step(l+1)
-		delta := &mat.Dense{}
 		wdelta := &mat.Dense{}
-		wdelta.Mul(nn.Weights[l+1].T(), err)
+		wdelta.Mul(nn.Weights[l+1].T(), delta)
 
 		// Calculate new delta and store it to temporary variable err
 		// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
+		delta = &mat.Dense{}
 		delta.MulElem(wdelta, sigmoidsPrime)
-		err = delta
 
-		// B[l] = δ[l]
+		// ∂E/∂B[l] = δ[l]
 		biases := mat.DenseCopyOf(delta)
 
-		// W[l] = A[l−1]*δ[l]
+		// ∂E/∂W[l] = A[l−1]*δ[l]
 		// At this point it's required to give explanation for inaccuracy
 		// in the formula
 
@@ -391,9 +406,12 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
 		weights := &mat.Dense{}
 		weights.Mul(delta, nn.A[l-1].T())
 
+		// fmt.Printf("Weights gradient[%v]:\n%v\n\n", l, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
+		// fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
+
 		// !Prepend! new Biases and Weights
-		newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
-		newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
+		newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
+		newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
 	}
 
 	newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
@@ -403,6 +421,6 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
 	nn.Weights = newWeights
 }
 
-func (nn *BackProp) result() *mat.Dense {
+func (nn *NeuralNetwork) result() *mat.Dense {
 	return nn.A[nn.Count-1]
 }

+ 8 - 8
neuralnetwork/neuralnetworkbase/backprop_test.go → neuralnetwork/neuralnetworkbase/neuralnetwork_test.go

@@ -6,40 +6,40 @@ import (
 	"gonum.org/v1/gonum/mat"
 )
 
-func TestNewBackProp(t *testing.T) {
-	nn, err := NewBackProp([]int{}, 0.1, 500)
+func TestNewNeuralNetwork(t *testing.T) {
+	nn, err := NewNeuralNetwork([]int{}, 0.1, 500)
 	if nn != nil || err == nil {
 		t.Error("nn initialized, but shouldn't ", err)
 	}
 
-	nn, err = NewBackProp([]int{0, 0, 0, 0}, 0.1, 500)
+	nn, err = NewNeuralNetwork([]int{0, 0, 0, 0}, 0.1, 500)
 	if nn != nil || err == nil {
 		t.Error("nn initialized, but shouldn't ", err)
 	}
 
-	nn, err = NewBackProp([]int{1, 1, 1, 1}, 0.1, 500)
+	nn, err = NewNeuralNetwork([]int{1, 1, 1, 1}, 0.1, 500)
 	if nn != nil || err == nil {
 		t.Error("nn initialized, but shouldn't ", err)
 	}
 
-	nn, err = NewBackProp([]int{5, 5}, 0.1, 500)
+	nn, err = NewNeuralNetwork([]int{5, 5}, 0.1, 500)
 	if nn != nil || err == nil {
 		t.Error("nn initialized, but shouldn't ", err)
 	}
 
-	nn, err = NewBackProp([]int{5, 1, 5, 5}, 0.1, 500)
+	nn, err = NewNeuralNetwork([]int{5, 1, 5, 5}, 0.1, 500)
 	if nn != nil || err == nil {
 		t.Error("nn initialized, but shouldn't ", err)
 	}
 
-	nn, err = NewBackProp([]int{5, 4, 4, 5}, 0.1, 500)
+	nn, err = NewNeuralNetwork([]int{5, 4, 4, 5}, 0.1, 500)
 	if nn == nil || err != nil {
 		t.Error("nn is not initialized, but should be ", err)
 	}
 }
 
 func TestNeuralNetworkPredict(t *testing.T) {
-	nn, _ := NewBackProp([]int{3, 4, 4, 2}, 0.1, 500)
+	nn, _ := NewNeuralNetwork([]int{3, 4, 4, 2}, 0.1, 500)
 
 	aIn := &mat.Dense{}
 	index, max := nn.Predict(aIn)

+ 0 - 345
neuralnetwork/neuralnetworkbase/rprop.go

@@ -1,345 +0,0 @@
-/*
- * MIT License
- *
- * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
- *
- * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of this
- * software and associated documentation files (the "Software"), to deal in the Software
- * without restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
- * to permit persons to whom the Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be included in all copies
- * or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
- * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
- * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-package neuralnetworkbase
-
-import (
-	"errors"
-	"fmt"
-	"io"
-
-	teach "../teach"
-	mat "gonum.org/v1/gonum/mat"
-)
-
-// NeuralNetwork is simple neural network implementation
-//
-// Resources:
-// http://neuralnetworksanddeeplearning.com
-// https://www.youtube.com/watch?v=fNk_zzaMoSs
-//
-// Matrix: A
-// Description: A is set of calculated neuron activations after sigmoid correction
-// Format:    0          l           L
-//         ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
-//         ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
-// Where s = Sizes[l] - Neural network layer size
-//       L = len(Sizes) - Number of neural network layers
-//
-// Matrix: Z
-// Description: Z is set of calculated raw neuron activations
-// Format:    0          l           L
-//         ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
-//         ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
-// Where s = Sizes[l] - Neural network layer size
-//       L = len(Sizes) - Number of neural network layers
-//
-// Matrix: Biases
-// Description: Biases is set of biases per layer except l0
-//              NOTE: l0 is always empty Dense because first layer
-//              doesn't have connections to previous layer
-// Format:    1          l           L
-//         ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
-//         ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
-//         ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
-//         ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
-// Where s = Sizes[l] - Neural network layer size
-//       L = len(Sizes) - Number of neural network layers
-//
-// Matrix: Weights
-// Description: Weights is set of weights per layer except l0
-//              NOTE: l0 is always empty Dense because first layer
-//              doesn't have connections to previous layer
-// Format:               1                                   l                                   L
-//         ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
-//         ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
-//         ⎢              ...            ⎥ ... ⎢              ...            ⎥ ... ⎢              ...            ⎥
-//         ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
-//         ⎢              ...            ⎥ ... ⎢              ...            ⎥ ... ⎢              ...            ⎥
-//         ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
-// Where s = Sizes[l] - Neural network layer size
-//       s' = Sizes[l-1] - Previous neural network layer size
-//       L = len(Sizes) - Number of neural network layers
-
-type RProp struct {
-	Count          int
-	Sizes          []int
-	Biases         []*mat.Dense
-	Weights        []*mat.Dense
-	A              []*mat.Dense
-	Z              []*mat.Dense
-	alpha          float64
-	trainingCycles int
-}
-
-func NewRProp(sizes []int, nu float64, trainingCycles int) (nn *RProp, err error) {
-	err = nil
-	if len(sizes) < 3 {
-		fmt.Printf("Invalid network configuration: %v\n", sizes)
-		return nil, errors.New("Invalid network configuration: %v\n")
-	}
-
-	for i := 0; i < len(sizes); i++ {
-		if sizes[i] < 2 {
-			fmt.Printf("Invalid network configuration: %v\n", sizes)
-			return nil, errors.New("Invalid network configuration: %v\n")
-		}
-	}
-
-	if nu <= 0.0 || nu > 1.0 {
-		fmt.Printf("Invalid η value: %v\n", nu)
-		return nil, errors.New("Invalid η value: %v\n")
-	}
-
-	if trainingCycles <= 0 {
-		fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
-		return nil, errors.New("Invalid training cycles number: %v\n")
-	}
-
-	if trainingCycles < 100 {
-		fmt.Println("Training cycles number probably is too small")
-	}
-
-	nn = &RProp{}
-	nn.Sizes = sizes
-	nn.Count = len(sizes)
-	nn.Weights = make([]*mat.Dense, nn.Count)
-	nn.Biases = make([]*mat.Dense, nn.Count)
-	nn.A = make([]*mat.Dense, nn.Count)
-	nn.Z = make([]*mat.Dense, nn.Count)
-	nn.alpha = nu / float64(nn.Sizes[0])
-	nn.trainingCycles = trainingCycles
-
-	for i := 1; i < nn.Count; i++ {
-		nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
-		nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
-	}
-	return
-}
-
-func (nn *RProp) Copy() (out *RProp) {
-	out = &RProp{}
-	out.Sizes = nn.Sizes
-	out.Count = nn.Count
-	out.Weights = make([]*mat.Dense, nn.Count)
-	out.Biases = make([]*mat.Dense, nn.Count)
-	out.A = make([]*mat.Dense, nn.Count)
-	out.Z = make([]*mat.Dense, nn.Count)
-	out.alpha = nn.alpha
-	out.trainingCycles = nn.trainingCycles
-
-	for i := 1; i < out.Count; i++ {
-		nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
-		nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
-	}
-	return
-}
-
-func (nn *RProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
-	r, _ := aIn.Dims()
-	if r != nn.Sizes[0] {
-		fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
-		return -1, 0.0
-	}
-
-	nn.forward(aIn)
-	result := nn.result()
-	r, _ = result.Dims()
-	max = 0.0
-	maxIndex = 0
-	for i := 0; i < r; i++ {
-		if result.At(i, 0) > max {
-			max = result.At(i, 0)
-			maxIndex = i
-		}
-	}
-	return
-}
-
-func (nn *RProp) Teach(teacher teach.Teacher) {
-	for i := 0; i < nn.trainingCycles; i++ {
-		for teacher.NextData() {
-			nn.backward(teacher.GetData())
-		}
-	}
-}
-
-func (nn *RProp) SaveState(writer io.Writer) {
-}
-
-func (nn *RProp) LoadState(reader io.Reader) {
-}
-
-func (nn *RProp) forward(aIn mat.Matrix) {
-	nn.A[0] = mat.DenseCopyOf(aIn)
-
-	for i := 1; i < nn.Count; i++ {
-		nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
-		aSrc := nn.A[i-1]
-		aDst := nn.A[i]
-
-		// Each iteration implements formula bellow for neuron activation values
-		// A[l]=σ(W[l]*A[l−1]+B[l])
-
-		// W[l]*A[l−1]
-		aDst.Mul(nn.Weights[i], aSrc)
-
-		// W[l]*A[l−1]+B[l]
-		aDst.Add(aDst, nn.Biases[i])
-
-		// Save raw activation value for back propagation
-		nn.Z[i] = mat.DenseCopyOf(aDst)
-
-		// σ(W[l]*A[l−1]+B[l])
-		aDst.Apply(applySigmoid, aDst)
-	}
-}
-
-func (nn *RProp) backward(aIn, aOut mat.Matrix) {
-	nn.forward(aIn)
-
-	lastLayerNum := nn.Count - 1
-
-	// To calculate new values of weights and biases
-	// following formulas are used:
-	// W[l] = A[l−1]*δ[l]
-	// B[l] = δ[l]
-
-	// For last layer δ value is calculated by following:
-	// δ = (A[L]−y)⊙σ'(Z[L])
-
-	// Calculate initial error for last layer L
-	// error = A[L]-y
-	// Where y is expected activations set
-	err := &mat.Dense{}
-	err.Sub(nn.result(), aOut)
-
-	// Calculate sigmoids prime σ'(Z[L]) for last layer L
-	sigmoidsPrime := &mat.Dense{}
-	sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
-
-	// (A[L]−y)⊙σ'(Z[L])
-	delta := &mat.Dense{}
-	delta.MulElem(err, sigmoidsPrime)
-
-	// B[L] = δ[L]
-	biases := mat.DenseCopyOf(delta)
-
-	// W[L] = A[L−1]*δ[L]
-	weights := &mat.Dense{}
-	weights.Mul(delta, nn.A[lastLayerNum-1].T())
-
-	// Initialize new weights and biases values with last layer values
-	newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
-	newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
-
-	// Save calculated delta value temporary error variable
-	err = delta
-
-	// Next layer Weights and Biases are calculated using same formulas:
-	// W[l] = A[l−1]*δ[l]
-	// B[l] = δ[l]
-
-	// But δ[l] is calculated using different formula:
-	// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
-	// Where Wt[l+1] is transposed matrix of actual Weights from
-	// forward step
-	for l := nn.Count - 2; l > 0; l-- {
-		// Calculate sigmoids prime σ'(Z[l]) for last layer l
-		sigmoidsPrime := &mat.Dense{}
-		sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
-
-		// (Wt[l+1])*δ[l+1]
-		// err bellow is delta from previous step(l+1)
-		delta := &mat.Dense{}
-		wdelta := &mat.Dense{}
-		wdelta.Mul(nn.Weights[l+1].T(), err)
-
-		// Calculate new delta and store it to temporary variable err
-		// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
-		delta.MulElem(wdelta, sigmoidsPrime)
-		err = delta
-
-		// B[l] = δ[l]
-		biases := mat.DenseCopyOf(delta)
-
-		// W[l] = A[l−1]*δ[l]
-		// At this point it's required to give explanation for inaccuracy
-		// in the formula
-
-		// Multiplying of activations matrix for layer l-1 and δ[l] is imposible
-		// because view of matrices are following:
-		//          A[l-1]       δ[l]
-		//         ⎡A[0]  ⎤     ⎡δ[0] ⎤
-		//         ⎢A[1]  ⎥     ⎢δ[1] ⎥
-		//         ⎢ ...  ⎥     ⎢ ... ⎥
-		//         ⎢A[i]  ⎥  X  ⎢δ[i] ⎥
-		//         ⎢ ...  ⎥     ⎢ ... ⎥
-		//         ⎣A[s'] ⎦     ⎣δ[s] ⎦
-		// So we need to modify these matrices to apply mutiplications and got
-		// Weights matrix of following view:
-		//         ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
-		//         ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
-		//         ⎢              ...            ⎥
-		//         ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
-		//         ⎢              ...            ⎥
-		//         ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
-		// So we swap matrices and transpose A[l-1] to get valid multiplication
-		// of following view:
-		//           δ[l]               A[l-1]
-		//         ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
-		//         ⎢δ[1] ⎥
-		//         ⎢ ... ⎥
-		//         ⎢δ[i] ⎥
-		//         ⎢ ... ⎥
-		//         ⎣δ[s] ⎦
-		weights := &mat.Dense{}
-		weights.Mul(delta, nn.A[l-1].T())
-
-		// !Prepend! new Biases and Weights
-		newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
-		newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
-	}
-
-	newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
-	newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
-
-	nn.Biases = newBiases
-	nn.Weights = newWeights
-}
-
-func (nn *RProp) result() *mat.Dense {
-	return nn.A[nn.Count-1]
-}