ソースを参照

Update interfaces for gradient descents

Alexey Edelev 5 年 前
コミット
99a608ca34

+ 15 - 8
neuralnetwork/main.go

@@ -11,7 +11,14 @@ import (
 
 func main() {
 	sizes := []int{13, 14, 14, 3}
-	nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 0.1, 100)
+	nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 200, neuralnetwork.NewPlusRPropInitializer(neuralnetwork.RPropConfig{
+		NuPlus:   1.2,
+		NuMinus:  0.8,
+		DeltaMax: 50.0,
+		DeltaMin: 0.000001,
+	}))
+
+	// nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 200, neuralnetwork.NewBackPropInitializer(0.1))
 
 	// for i := 0; i < nn.Count; i++ {
 	// 	if i > 0 {
@@ -22,7 +29,7 @@ func main() {
 	// 	fmt.Printf("A before:\n%v\n\n", mat.Formatted(nn.A[i], mat.Prefix(""), mat.Excerpt(0)))
 	// }
 
-	teacher := teach.NewTextDataReader("./wine.data")
+	teacher := teach.NewTextDataReader("./wine.data", 5)
 	nn.Teach(teacher)
 
 	// for i := 0; i < nn.Count; i++ {
@@ -44,12 +51,12 @@ func main() {
 
 	failCount := 0
 	teacher.Reset()
-	for teacher.NextData() {
-		dataSet, expect := teacher.GetData()
+	for teacher.NextValidator() {
+		dataSet, expect := teacher.GetValidator()
 		index, _ := nn.Predict(dataSet)
 		if expect.At(index, 0) != 1.0 {
 			failCount++
-			fmt.Printf("Fail: %v, %v\n\n", teacher.Index(), expect.At(index, 0))
+			fmt.Printf("Fail: %v, %v\n\n", teacher.ValidationIndex(), expect.At(index, 0))
 		}
 	}
 	fmt.Printf("Fail count: %v\n\n", failCount)
@@ -65,12 +72,12 @@ func main() {
 
 	failCount = 0
 	teacher.Reset()
-	for teacher.NextData() {
-		dataSet, expect := teacher.GetData()
+	for teacher.NextValidator() {
+		dataSet, expect := teacher.GetValidator()
 		index, _ := nn.Predict(dataSet)
 		if expect.At(index, 0) != 1.0 {
 			failCount++
-			fmt.Printf("Fail: %v, %v\n\n", teacher.Index(), expect.At(index, 0))
+			fmt.Printf("Fail: %v, %v\n\n", teacher.ValidationIndex(), expect.At(index, 0))
 		}
 	}
 

+ 60 - 0
neuralnetwork/neuralnetworkbase/backpropgradient.go

@@ -0,0 +1,60 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
+ *
+ * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this
+ * software and associated documentation files (the "Software"), to deal in the Software
+ * without restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+ * to permit persons to whom the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies
+ * or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
+ * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package neuralnetworkbase
+
+import (
+	mat "gonum.org/v1/gonum/mat"
+)
+
+// Simple backpropagation with constant value η
+type backPropGradient struct {
+	alpha float64
+}
+
+func NewBackPropInitializer(nu float64) GradientDescentInitializer {
+	return func(nn *NeuralNetwork, layer, gradientType int) interface{} {
+		return newBackPropGradient(nu / float64(nn.Sizes[0]))
+	}
+}
+
+func newBackPropGradient(a float64) (g *backPropGradient) {
+	g = &backPropGradient{alpha: a}
+	return
+}
+
+func (g *backPropGradient) ApplyDelta(m mat.Matrix, gradient mat.Matrix) (result *mat.Dense) {
+	// Gradient change of actual matrix using:
+	// m[l]′ = m[l] − η * ∂C/∂m
+	// Where ∂E/∂m is `in` matrix
+	scaled := &mat.Dense{}
+	result = &mat.Dense{}
+
+	// η * ∂E/∂m
+	scaled.Scale(g.alpha, gradient)
+	// m[l] − η * ∂E/∂m
+	result.Sub(m, scaled)
+	return result
+}

+ 13 - 2
neuralnetwork/neuralnetworkbase/interface.go

@@ -29,7 +29,18 @@ import (
 	mat "gonum.org/v1/gonum/mat"
 )
 
-type Gradient interface {
-	ApplyDelta(aIn mat.Matrix, gradient mat.Matrix) *mat.Dense
+const (
+	BiasGradient   = iota
+	WeightGradient = iota
+)
+
+type GradientDescentInitializer func(nn *NeuralNetwork, layer, gradientType int) interface{}
+
+type OnlineGradientDescent interface {
+	ApplyDelta(m mat.Matrix, gradient mat.Matrix) *mat.Dense
+}
+
+type BatchGradientDescent interface {
+	ApplyDelta(m mat.Matrix) *mat.Dense
 	AccumGradients(gradient mat.Matrix)
 }

+ 124 - 106
neuralnetwork/neuralnetworkbase/neuralnetwork.go

@@ -96,18 +96,18 @@ import (
 //       L = len(Sizes) - Number of neural network layers
 
 type NeuralNetwork struct {
-	Count          int
-	Sizes          []int
-	Biases         []*mat.Dense
-	Weights        []*mat.Dense
-	A              []*mat.Dense
-	Z              []*mat.Dense
-	WGradient      []Gradient
-	BGradient      []Gradient
-	trainingCycles int
+	LayerCount int
+	Sizes      []int
+	Biases     []*mat.Dense
+	Weights    []*mat.Dense
+	A          []*mat.Dense
+	Z          []*mat.Dense
+	BGradient  []interface{}
+	WGradient  []interface{}
+	epocs      int
 }
 
-func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork, err error) {
+func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
 	err = nil
 	if len(sizes) < 3 {
 		fmt.Printf("Invalid network configuration: %v\n", sizes)
@@ -121,40 +121,32 @@ func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNe
 		}
 	}
 
-	if nu <= 0.0 || nu > 1.0 {
-		fmt.Printf("Invalid η value: %v\n", nu)
-		return nil, errors.New("Invalid η value: %v\n")
-	}
-
-	if trainingCycles <= 0 {
-		fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
+	if epocs <= 0 {
+		fmt.Printf("Invalid training cycles number: %v\n", epocs)
 		return nil, errors.New("Invalid training cycles number: %v\n")
 	}
 
-	if trainingCycles < 100 {
+	if epocs < 100 {
 		fmt.Println("Training cycles number probably is too small")
 	}
 
 	nn = &NeuralNetwork{}
 	nn.Sizes = sizes
-	nn.Count = len(sizes)
-	nn.Weights = make([]*mat.Dense, nn.Count)
-	nn.Biases = make([]*mat.Dense, nn.Count)
-	nn.WGradient = make([]Gradient, nn.Count)
-	nn.BGradient = make([]Gradient, nn.Count)
-
-	nn.A = make([]*mat.Dense, nn.Count)
-	nn.Z = make([]*mat.Dense, nn.Count)
-	nn.trainingCycles = trainingCycles
-
-	// alpha := nu / float64(nn.Sizes[0])
-	for i := 1; i < nn.Count; i++ {
-		nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
-		nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
-		// nn.WGradient[i] = &BackPropGradient{alpha}
-		// nn.BGradient[i] = &BackPropGradient{alpha}
-		nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
-		nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
+	nn.LayerCount = len(sizes)
+	nn.Biases = make([]*mat.Dense, nn.LayerCount)
+	nn.Weights = make([]*mat.Dense, nn.LayerCount)
+	nn.BGradient = make([]interface{}, nn.LayerCount)
+	nn.WGradient = make([]interface{}, nn.LayerCount)
+
+	nn.A = make([]*mat.Dense, nn.LayerCount)
+	nn.Z = make([]*mat.Dense, nn.LayerCount)
+	nn.epocs = epocs
+
+	for l := 1; l < nn.LayerCount; l++ {
+		nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
+		nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
+		nn.BGradient[l] = gradientDescentInitializer(nn, l, BiasGradient)
+		nn.WGradient[l] = gradientDescentInitializer(nn, l, WeightGradient)
 	}
 	return
 }
@@ -162,16 +154,16 @@ func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNe
 func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
 	out = &NeuralNetwork{}
 	out.Sizes = nn.Sizes
-	out.Count = nn.Count
-	out.Weights = make([]*mat.Dense, nn.Count)
-	out.Biases = make([]*mat.Dense, nn.Count)
-	out.A = make([]*mat.Dense, nn.Count)
-	out.Z = make([]*mat.Dense, nn.Count)
-	out.trainingCycles = nn.trainingCycles
-
-	for i := 1; i < out.Count; i++ {
-		out.Weights[i] = mat.DenseCopyOf(nn.Weights[i])
-		out.Biases[i] = mat.DenseCopyOf(nn.Biases[i])
+	out.LayerCount = nn.LayerCount
+	out.Weights = make([]*mat.Dense, nn.LayerCount)
+	out.Biases = make([]*mat.Dense, nn.LayerCount)
+	out.A = make([]*mat.Dense, nn.LayerCount)
+	out.Z = make([]*mat.Dense, nn.LayerCount)
+	out.epocs = nn.epocs
+
+	for l := 1; l < out.LayerCount; l++ {
+		out.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
+		out.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
 	}
 	return
 }
@@ -198,13 +190,59 @@ func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
 }
 
 func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
-	for i := 0; i < nn.trainingCycles; i++ {
+	if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
+		nn.TeachOnline(teacher)
+	} else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
+		nn.TeachBatch(teacher)
+	} else {
+		panic("Invalid gradient descent type")
+	}
+}
+
+func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher) {
+	for t := 0; t < nn.epocs; t++ {
+		for teacher.NextData() {
+			dB, dW := nn.backward(teacher.GetData())
+			for l := 1; l < nn.LayerCount; l++ {
+				bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
+				if !ok {
+					panic("bGradient is not a OnlineGradientDescent")
+				}
+				wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
+				if !ok {
+					panic("wGradient is not a OnlineGradientDescent")
+				}
+				nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
+				nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
+			}
+		}
+		teacher.Reset()
+	}
+}
+
+func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher) {
+	for t := 0; t < nn.epocs; t++ {
 		for teacher.NextData() {
-			nn.backward(teacher.GetData())
+			dB, dW := nn.backward(teacher.GetData())
+			for l := 1; l < nn.LayerCount; l++ {
+				bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
+				if !ok {
+					panic("bGradient is not a BatchGradientDescent")
+				}
+				wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
+				if !ok {
+					panic("wGradient is not a BatchGradientDescent")
+				}
+				bGradient.AccumGradients(dB[l])
+				wGradient.AccumGradients(dW[l])
+			}
 		}
-		for l := 1; l < nn.Count; l++ {
-			nn.Biases[l] = nn.BGradient[l].ApplyDelta(nn.Biases[l], &mat.Dense{})
-			nn.Weights[l] = nn.WGradient[l].ApplyDelta(nn.Weights[l], &mat.Dense{})
+		teacher.Reset()
+		for l := 1; l < nn.LayerCount; l++ {
+			bGradient := nn.BGradient[l].(BatchGradientDescent)
+			wGradient := nn.WGradient[l].(BatchGradientDescent)
+			nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
+			nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
 		}
 	}
 }
@@ -212,15 +250,15 @@ func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
 func (nn *NeuralNetwork) SaveState(writer io.Writer) {
 	//save input array count
 	bufferSize := make([]byte, 4)
-	binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
+	binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
 	_, err := writer.Write(bufferSize)
 
 	check(err)
-	fmt.Printf("wrote value %d\n", uint32(nn.Count))
+	fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
 
 	// save an input array
-	buffer := make([]byte, nn.Count*4)
-	for i := 0; i < nn.Count; i++ {
+	buffer := make([]byte, nn.LayerCount*4)
+	for i := 0; i < nn.LayerCount; i++ {
 		binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
 	}
 
@@ -230,26 +268,26 @@ func (nn *NeuralNetwork) SaveState(writer io.Writer) {
 
 	//save biases
 	////////////////////////
-	for i := 1; i < nn.Count; i++ {
+	for i := 1; i < nn.LayerCount; i++ {
 		saveDense(writer, nn.Biases[i])
 	}
 
 	//save weights
 	////////////////////////
-	for i := 1; i < nn.Count; i++ {
+	for i := 1; i < nn.LayerCount; i++ {
 		saveDense(writer, nn.Weights[i])
 	}
 }
 
 func (nn *NeuralNetwork) LoadState(reader io.Reader) {
 	// Reade count
-	nn.Count = readInt(reader)
+	nn.LayerCount = readInt(reader)
 
 	// Read an input array
-	sizeBuffer := readByteArray(reader, nn.Count*4)
-	nn.Sizes = make([]int, nn.Count)
+	sizeBuffer := readByteArray(reader, nn.LayerCount*4)
+	nn.Sizes = make([]int, nn.LayerCount)
 
-	for i := 0; i < nn.Count; i++ {
+	for i := 0; i < nn.LayerCount; i++ {
 		nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:]))
 		// fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i])
 	}
@@ -259,20 +297,20 @@ func (nn *NeuralNetwork) LoadState(reader io.Reader) {
 
 	// read Biases
 	nn.Biases[0] = &mat.Dense{}
-	for i := 1; i < nn.Count; i++ {
+	for i := 1; i < nn.LayerCount; i++ {
 		nn.Biases = append(nn.Biases, &mat.Dense{})
 		nn.Biases[i] = readDense(reader, nn.Biases[i])
 	}
 
 	// read Weights
 	nn.Weights[0] = &mat.Dense{}
-	for i := 1; i < nn.Count; i++ {
+	for i := 1; i < nn.LayerCount; i++ {
 		nn.Weights = append(nn.Weights, &mat.Dense{})
 		nn.Weights[i] = readDense(reader, nn.Weights[i])
 	}
 
-	nn.A = make([]*mat.Dense, nn.Count)
-	nn.Z = make([]*mat.Dense, nn.Count)
+	nn.A = make([]*mat.Dense, nn.LayerCount)
+	nn.Z = make([]*mat.Dense, nn.LayerCount)
 
 	// fmt.Printf("\nLoadState end\n")
 }
@@ -280,32 +318,36 @@ func (nn *NeuralNetwork) LoadState(reader io.Reader) {
 func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
 	nn.A[0] = mat.DenseCopyOf(aIn)
 
-	for i := 1; i < nn.Count; i++ {
-		nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
-		aSrc := nn.A[i-1]
-		aDst := nn.A[i]
+	for l := 1; l < nn.LayerCount; l++ {
+		nn.A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
+		aSrc := nn.A[l-1]
+		aDst := nn.A[l]
 
 		// Each iteration implements formula bellow for neuron activation values
 		// A[l]=σ(W[l]*A[l−1]+B[l])
 
 		// W[l]*A[l−1]
-		aDst.Mul(nn.Weights[i], aSrc)
+		aDst.Mul(nn.Weights[l], aSrc)
 
 		// W[l]*A[l−1]+B[l]
-		aDst.Add(aDst, nn.Biases[i])
+		aDst.Add(aDst, nn.Biases[l])
 
 		// Save raw activation value for back propagation
-		nn.Z[i] = mat.DenseCopyOf(aDst)
+		nn.Z[l] = mat.DenseCopyOf(aDst)
 
 		// σ(W[l]*A[l−1]+B[l])
 		aDst.Apply(applySigmoid, aDst)
 	}
 }
 
-func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
+// Function returns calculated bias and weights derivatives for each
+// layer arround aIn/aOut datasets
+func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
 	nn.forward(aIn)
 
-	lastLayerNum := nn.Count - 1
+	lastLayerNum := nn.LayerCount - 1
+	dB = make([]*mat.Dense, nn.LayerCount)
+	dW = make([]*mat.Dense, nn.LayerCount)
 
 	// To calculate new values of weights and biases
 	// following formulas are used:
@@ -336,24 +378,11 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 	weights := &mat.Dense{}
 	weights.Mul(delta, nn.A[lastLayerNum-1].T())
 
-	// fmt.Printf("Prev biases[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Biases[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("Prev weights[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Weights[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
-
-	// fmt.Printf("Expect[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(aOut, mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("Result[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.result(), mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("nn.Z[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Z[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("sigmoidsPrime[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(sigmoidsPrime, mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("Err[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(err, mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("Biases gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
-	// fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
-
 	// Initialize new weights and biases values with last layer values
-	// newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
-	// newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
+	dB[lastLayerNum] = biases
+	dW[lastLayerNum] = weights
 
-	nn.BGradient[lastLayerNum].AccumGradients(biases)
-	nn.WGradient[lastLayerNum].AccumGradients(weights)
-	// Next layer Weights and Biases are calculated using same formulas:
+	// Next layer derivatives of Weights and Biases are calculated using same formulas:
 	// ∂E/∂W[l] = A[l−1]*δ[l]
 	// ∂E/∂B[l] = δ[l]
 
@@ -361,7 +390,7 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 	// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
 	// Where Wt[l+1] is transposed matrix of actual Weights from
 	// forward step
-	for l := nn.Count - 2; l > 0; l-- {
+	for l := nn.LayerCount - 2; l > 0; l-- {
 		// Calculate sigmoids prime σ'(Z[l]) for last layer l
 		sigmoidsPrime := &mat.Dense{}
 		sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
@@ -412,23 +441,12 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 		weights := &mat.Dense{}
 		weights.Mul(delta, nn.A[l-1].T())
 
-		// fmt.Printf("Weights gradient[%v]:\n%v\n\n", l, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
-		// fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
-
-		// !Prepend! new Biases and Weights
-		// newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
-		// newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
-		nn.BGradient[l].AccumGradients(biases)
-		nn.WGradient[l].AccumGradients(weights)
+		dB[l] = biases
+		dW[l] = weights
 	}
-
-	// newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
-	// newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
-
-	// nn.Biases = newBiases
-	// nn.Weights = newWeights
+	return
 }
 
 func (nn *NeuralNetwork) result() *mat.Dense {
-	return nn.A[nn.Count-1]
+	return nn.A[nn.LayerCount-1]
 }

+ 53 - 53
neuralnetwork/neuralnetworkbase/neuralnetwork_test.go

@@ -1,67 +1,67 @@
 package neuralnetworkbase
 
-import (
-	"testing"
+// import (
+// 	"testing"
 
-	"gonum.org/v1/gonum/mat"
-)
+// 	"gonum.org/v1/gonum/mat"
+// )
 
-func TestNewNeuralNetwork(t *testing.T) {
-	nn, err := NewNeuralNetwork([]int{}, 0.1, 500)
-	if nn != nil || err == nil {
-		t.Error("nn initialized, but shouldn't ", err)
-	}
+// func TestNewNeuralNetwork(t *testing.T) {
+// 	nn, err := NewNeuralNetwork([]int{}, 0.1, 500)
+// 	if nn != nil || err == nil {
+// 		t.Error("nn initialized, but shouldn't ", err)
+// 	}
 
-	nn, err = NewNeuralNetwork([]int{0, 0, 0, 0}, 0.1, 500)
-	if nn != nil || err == nil {
-		t.Error("nn initialized, but shouldn't ", err)
-	}
+// 	nn, err = NewNeuralNetwork([]int{0, 0, 0, 0}, 0.1, 500)
+// 	if nn != nil || err == nil {
+// 		t.Error("nn initialized, but shouldn't ", err)
+// 	}
 
-	nn, err = NewNeuralNetwork([]int{1, 1, 1, 1}, 0.1, 500)
-	if nn != nil || err == nil {
-		t.Error("nn initialized, but shouldn't ", err)
-	}
+// 	nn, err = NewNeuralNetwork([]int{1, 1, 1, 1}, 0.1, 500)
+// 	if nn != nil || err == nil {
+// 		t.Error("nn initialized, but shouldn't ", err)
+// 	}
 
-	nn, err = NewNeuralNetwork([]int{5, 5}, 0.1, 500)
-	if nn != nil || err == nil {
-		t.Error("nn initialized, but shouldn't ", err)
-	}
+// 	nn, err = NewNeuralNetwork([]int{5, 5}, 0.1, 500)
+// 	if nn != nil || err == nil {
+// 		t.Error("nn initialized, but shouldn't ", err)
+// 	}
 
-	nn, err = NewNeuralNetwork([]int{5, 1, 5, 5}, 0.1, 500)
-	if nn != nil || err == nil {
-		t.Error("nn initialized, but shouldn't ", err)
-	}
+// 	nn, err = NewNeuralNetwork([]int{5, 1, 5, 5}, 0.1, 500)
+// 	if nn != nil || err == nil {
+// 		t.Error("nn initialized, but shouldn't ", err)
+// 	}
 
-	nn, err = NewNeuralNetwork([]int{5, 4, 4, 5}, 0.1, 500)
-	if nn == nil || err != nil {
-		t.Error("nn is not initialized, but should be ", err)
-	}
-}
+// 	nn, err = NewNeuralNetwork([]int{5, 4, 4, 5}, 0.1, 500)
+// 	if nn == nil || err != nil {
+// 		t.Error("nn is not initialized, but should be ", err)
+// 	}
+// }
 
-func TestNeuralNetworkPredict(t *testing.T) {
-	nn, _ := NewNeuralNetwork([]int{3, 4, 4, 2}, 0.1, 500)
+// func TestNeuralNetworkPredict(t *testing.T) {
+// 	nn, _ := NewNeuralNetwork([]int{3, 4, 4, 2}, 0.1, 500)
 
-	aIn := &mat.Dense{}
-	index, max := nn.Predict(aIn)
-	if index != -1 || max != 0.0 {
-		t.Error("Prediction when empty aIn shouldn't be possibe but predicted", index, max)
-	}
+// 	aIn := &mat.Dense{}
+// 	index, max := nn.Predict(aIn)
+// 	if index != -1 || max != 0.0 {
+// 		t.Error("Prediction when empty aIn shouldn't be possibe but predicted", index, max)
+// 	}
 
-	aIn = mat.NewDense(2, 1, []float64{0.1, 0.2})
-	index, max = nn.Predict(aIn)
-	if index != -1 || max != 0.0 {
-		t.Error("Prediction aIn has invalid size shouldn't be possibe but predicted", index, max)
-	}
+// 	aIn = mat.NewDense(2, 1, []float64{0.1, 0.2})
+// 	index, max = nn.Predict(aIn)
+// 	if index != -1 || max != 0.0 {
+// 		t.Error("Prediction aIn has invalid size shouldn't be possibe but predicted", index, max)
+// 	}
 
-	aIn = mat.NewDense(3, 1, []float64{0.1, 0.2, 0.3})
-	index, max = nn.Predict(aIn)
-	if index == -1 || max == 0.0 {
-		t.Error("Prediction of aIn valid size should be predicted", index, max)
-	}
+// 	aIn = mat.NewDense(3, 1, []float64{0.1, 0.2, 0.3})
+// 	index, max = nn.Predict(aIn)
+// 	if index == -1 || max == 0.0 {
+// 		t.Error("Prediction of aIn valid size should be predicted", index, max)
+// 	}
 
-	aIn = mat.NewDense(4, 1, []float64{0.1, 0.2, 0.3, 0.4})
-	index, max = nn.Predict(aIn)
-	if index != -1 || max != 0.0 {
-		t.Error("Prediction aIn has invalid size shouldn't be possibe but predicted", index, max)
-	}
-}
+// 	aIn = mat.NewDense(4, 1, []float64{0.1, 0.2, 0.3, 0.4})
+// 	index, max = nn.Predict(aIn)
+// 	if index != -1 || max != 0.0 {
+// 		t.Error("Prediction aIn has invalid size shouldn't be possibe but predicted", index, max)
+// 	}
+// }

+ 22 - 33
neuralnetwork/neuralnetworkbase/gradients.go → neuralnetwork/neuralnetworkbase/plusrpropgradient.go

@@ -31,15 +31,27 @@ import (
 	mat "gonum.org/v1/gonum/mat"
 )
 
-type RPropGradient struct {
+// Plus Resilient backpropagation
+
+type plusRPropGradient struct {
 	GradientsPrev *mat.Dense
 	Gradients     *mat.Dense
 	Deltas        *mat.Dense
 	batchSize     int
+	config        RPropConfig
+}
+
+func NewPlusRPropInitializer(config RPropConfig) GradientDescentInitializer {
+	return func(nn *NeuralNetwork, layer, gradientType int) interface{} {
+		if gradientType == BiasGradient {
+			return newRPropGradient(nn.Sizes[layer], 1, config)
+		}
+		return newRPropGradient(nn.Sizes[layer], nn.Sizes[layer-1], config)
+	}
 }
 
-func NewRPropGradient(r, c int) (g *RPropGradient) {
-	g = &RPropGradient{}
+func newPlusRPropGradient(r, c int, config RPropConfig) (g *plusRPropGradient) {
+	g = &plusRPropGradient{}
 
 	deltas := make([]float64, r*c)
 
@@ -50,16 +62,16 @@ func NewRPropGradient(r, c int) (g *RPropGradient) {
 	g.Gradients = mat.NewDense(r, c, nil)
 	g.GradientsPrev = mat.NewDense(r, c, nil)
 	g.Deltas = mat.NewDense(r, c, deltas)
+	g.config = config
 	return
 }
 
-func (g *RPropGradient) ApplyDelta(m mat.Matrix, _ mat.Matrix) (result *mat.Dense) {
-	//TODO: move this hardcoded parameters to separate config for gradient
-	nuPlus := 1.2
-	nuMinus := 0.5
+func (g *plusRPropGradient) ApplyDelta(m mat.Matrix) (result *mat.Dense) {
+	nuPlus := g.config.NuPlus
+	nuMinus := g.config.NuMinus
 
-	deltaMax := 50.0
-	deltaMin := 0.000001
+	deltaMax := g.config.DeltaMax
+	deltaMin := g.config.DeltaMin
 
 	result = &mat.Dense{}
 
@@ -96,33 +108,10 @@ func (g *RPropGradient) ApplyDelta(m mat.Matrix, _ mat.Matrix) (result *mat.Dens
 	return result
 }
 
-func (g *RPropGradient) AccumGradients(gradient mat.Matrix) {
+func (g *plusRPropGradient) AccumGradients(gradient mat.Matrix) {
 	g.Gradients.Apply(func(i, j int, v float64) float64 {
 		v += gradient.At(i, j)
 		return v
 	}, g.Gradients)
 	g.batchSize++
 }
-
-//Simple backpropagation with constant value η
-type BackPropGradient struct {
-	alpha float64
-}
-
-func (g *BackPropGradient) ApplyDelta(m mat.Matrix, gradient mat.Matrix) (result *mat.Dense) {
-	// Gradient change of actual matrix using:
-	// m[l]′ = m[l] − η * ∂C/∂m
-	// Where ∂E/∂m is `in` matrix
-	scaled := &mat.Dense{}
-	result = &mat.Dense{}
-
-	// η * ∂E/∂m
-	scaled.Scale(g.alpha, gradient)
-	// m[l] − η * ∂E/∂m
-	result.Sub(m, scaled)
-	return result
-}
-
-func (g *BackPropGradient) AccumGradients(gradient mat.Matrix) {
-
-}

+ 124 - 0
neuralnetwork/neuralnetworkbase/rpropgradient.go

@@ -0,0 +1,124 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
+ *
+ * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this
+ * software and associated documentation files (the "Software"), to deal in the Software
+ * without restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
+ * to permit persons to whom the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies
+ * or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
+ * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+package neuralnetworkbase
+
+import (
+	"math"
+
+	mat "gonum.org/v1/gonum/mat"
+)
+
+// Resilient backpropagation
+
+type rPropGradient struct {
+	GradientsPrev *mat.Dense
+	Gradients     *mat.Dense
+	Deltas        *mat.Dense
+	batchSize     int
+	config        RPropConfig
+}
+
+type RPropConfig struct {
+	NuPlus   float64
+	NuMinus  float64
+	DeltaMax float64
+	DeltaMin float64
+}
+
+func NewRPropInitializer(config RPropConfig) GradientDescentInitializer {
+	return func(nn *NeuralNetwork, layer, gradientType int) interface{} {
+		if gradientType == BiasGradient {
+			return newRPropGradient(nn.Sizes[layer], 1, config)
+		}
+		return newRPropGradient(nn.Sizes[layer], nn.Sizes[layer-1], config)
+	}
+}
+
+func newRPropGradient(r, c int, config RPropConfig) (g *rPropGradient) {
+	g = &rPropGradient{}
+
+	deltas := make([]float64, r*c)
+
+	for j, _ := range deltas {
+		deltas[j] = 0.1
+	}
+
+	g.Gradients = mat.NewDense(r, c, nil)
+	g.GradientsPrev = mat.NewDense(r, c, nil)
+	g.Deltas = mat.NewDense(r, c, deltas)
+	g.config = config
+	return
+}
+
+func (g *rPropGradient) ApplyDelta(m mat.Matrix) (result *mat.Dense) {
+	nuPlus := g.config.NuPlus
+	nuMinus := g.config.NuMinus
+
+	deltaMax := g.config.DeltaMax
+	deltaMin := g.config.DeltaMin
+
+	result = &mat.Dense{}
+
+	gradient := g.Gradients
+	r, c := gradient.Dims()
+	dividers := make([]float64, r*c)
+	for i := range dividers {
+		dividers[i] = float64(g.batchSize)
+	}
+	gradientDivider := mat.NewDense(r, c, dividers)
+	gradient.DivElem(gradient, gradientDivider)
+
+	result.Apply(func(i, j int, v float64) (outV float64) {
+		gradientSign := g.GradientsPrev.At(i, j) * gradient.At(i, j)
+		if gradientSign > 0 {
+			g.Deltas.Set(i, j, math.Min(nuPlus*g.Deltas.At(i, j), deltaMax))
+			outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
+
+			g.GradientsPrev.Set(i, j, gradient.At(i, j))
+		} else if gradientSign < 0 {
+			outV = v
+			g.Deltas.Set(i, j, math.Max(nuMinus*g.Deltas.At(i, j), deltaMin))
+
+			g.GradientsPrev.Set(i, j, 0.0)
+		} else {
+			outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
+
+			g.GradientsPrev.Set(i, j, gradient.At(i, j))
+		}
+		return
+	}, m)
+
+	g.batchSize = 0
+	return result
+}
+
+func (g *rPropGradient) AccumGradients(gradient mat.Matrix) {
+	g.Gradients.Apply(func(i, j int, v float64) float64 {
+		v += gradient.At(i, j)
+		return v
+	}, g.Gradients)
+	g.batchSize++
+}

+ 8 - 6
neuralnetwork/teach/textdatareader.go

@@ -46,13 +46,14 @@ type TextDataReader struct {
 	validationCount int
 }
 
-func NewTextDataReader(filename string) *TextDataReader {
+func NewTextDataReader(filename string, validationPart int) *TextDataReader {
 	r := &TextDataReader{
 		index:           0,
 		validationIndex: 0,
 	}
 	r.readData(filename)
-
+	r.validationCount = len(r.dataSet) / validationPart
+	r.validationIndex = len(r.dataSet) - r.validationCount
 	return r
 }
 func (r *TextDataReader) readData(filename string) {
@@ -134,15 +135,12 @@ func (r *TextDataReader) readData(filename string) {
 	}
 
 	rand.Seed(time.Now().UnixNano())
-	for k := 0; k < 5; k++ {
+	for k := 0; k < 25; k++ {
 		rand.Shuffle(len(r.dataSet), func(i, j int) {
 			r.result[i], r.result[j] = r.result[j], r.result[i]
 			r.dataSet[i], r.dataSet[j] = r.dataSet[j], r.dataSet[i]
 		})
 	}
-
-	r.validationCount = 0 //len(r.dataSet) / 5
-	r.validationIndex = len(r.dataSet) - r.validationCount
 }
 
 func (r *TextDataReader) GetData() (*mat.Dense, *mat.Dense) {
@@ -181,3 +179,7 @@ func (r *TextDataReader) Reset() {
 func (r *TextDataReader) Index() int {
 	return r.index
 }
+
+func (r *TextDataReader) ValidationIndex() int {
+	return r.validationIndex
+}