Explorar o código

Implement batch learning for RProp

Alexey Edelev %!s(int64=5) %!d(string=hai) anos
pai
achega
62b295ac3d

+ 1 - 1
neuralnetwork/main.go

@@ -11,7 +11,7 @@ import (
 
 func main() {
 	sizes := []int{13, 14, 14, 3}
-	nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 0.1, 481)
+	nn, _ := neuralnetwork.NewNeuralNetwork(sizes, 0.1, 100)
 
 	// for i := 0; i < nn.Count; i++ {
 	// 	if i > 0 {

+ 38 - 12
neuralnetwork/neuralnetworkbase/gradients.go

@@ -32,8 +32,10 @@ import (
 )
 
 type RPropGradient struct {
-	Gradients *mat.Dense
-	Deltas    *mat.Dense
+	GradientsPrev *mat.Dense
+	Gradients     *mat.Dense
+	Deltas        *mat.Dense
+	batchSize     int
 }
 
 func NewRPropGradient(r, c int) (g *RPropGradient) {
@@ -46,11 +48,12 @@ func NewRPropGradient(r, c int) (g *RPropGradient) {
 	}
 
 	g.Gradients = mat.NewDense(r, c, nil)
+	g.GradientsPrev = mat.NewDense(r, c, nil)
 	g.Deltas = mat.NewDense(r, c, deltas)
 	return
 }
 
-func (g *RPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
+func (g *RPropGradient) ApplyDelta(m mat.Matrix, _ mat.Matrix) (result *mat.Dense) {
 	//TODO: move this hardcoded parameters to separate config for gradient
 	nuPlus := 1.2
 	nuMinus := 0.5
@@ -60,34 +63,53 @@ func (g *RPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result
 
 	result = &mat.Dense{}
 
+	gradient := g.Gradients
+	r, c := gradient.Dims()
+	dividers := make([]float64, r*c)
+	for i := range dividers {
+		dividers[i] = float64(g.batchSize)
+	}
+	gradientDivider := mat.NewDense(r, c, dividers)
+	gradient.DivElem(gradient, gradientDivider)
+
 	result.Apply(func(i, j int, v float64) (outV float64) {
-		gradientSign := g.Gradients.At(i, j) * derivative.At(i, j)
+		gradientSign := g.GradientsPrev.At(i, j) * gradient.At(i, j)
 		if gradientSign > 0 {
 			g.Deltas.Set(i, j, math.Min(nuPlus*g.Deltas.At(i, j), deltaMax))
-			outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
+			outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
 
-			g.Gradients.Set(i, j, derivative.At(i, j))
+			g.GradientsPrev.Set(i, j, gradient.At(i, j))
 		} else if gradientSign < 0 {
-			outV = v + sign(g.Gradients.At(i, j))*g.Deltas.At(i, j)
+			outV = v + sign(g.GradientsPrev.At(i, j))*g.Deltas.At(i, j)
 			g.Deltas.Set(i, j, math.Max(nuMinus*g.Deltas.At(i, j), deltaMin))
 
-			g.Gradients.Set(i, j, 0.0)
+			g.GradientsPrev.Set(i, j, 0.0)
 		} else {
-			outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
+			outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
 
-			g.Gradients.Set(i, j, derivative.At(i, j))
+			g.GradientsPrev.Set(i, j, gradient.At(i, j))
 		}
 		return
 	}, m)
+
+	g.batchSize = 0
 	return result
 }
 
+func (g *RPropGradient) AccumGradients(gradient mat.Matrix) {
+	g.Gradients.Apply(func(i, j int, v float64) float64 {
+		v += gradient.At(i, j)
+		return v
+	}, g.Gradients)
+	g.batchSize++
+}
+
 //Simple backpropagation with constant value η
 type BackPropGradient struct {
 	alpha float64
 }
 
-func (g *BackPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
+func (g *BackPropGradient) ApplyDelta(m mat.Matrix, gradient mat.Matrix) (result *mat.Dense) {
 	// Gradient change of actual matrix using:
 	// m[l]′ = m[l] − η * ∂C/∂m
 	// Where ∂E/∂m is `in` matrix
@@ -95,8 +117,12 @@ func (g *BackPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (resu
 	result = &mat.Dense{}
 
 	// η * ∂E/∂m
-	scaled.Scale(g.alpha, derivative)
+	scaled.Scale(g.alpha, gradient)
 	// m[l] − η * ∂E/∂m
 	result.Sub(m, scaled)
 	return result
 }
+
+func (g *BackPropGradient) AccumGradients(gradient mat.Matrix) {
+
+}

+ 1 - 0
neuralnetwork/neuralnetworkbase/interface.go

@@ -31,4 +31,5 @@ import (
 
 type Gradient interface {
 	ApplyDelta(aIn mat.Matrix, gradient mat.Matrix) *mat.Dense
+	AccumGradients(gradient mat.Matrix)
 }

+ 21 - 13
neuralnetwork/neuralnetworkbase/neuralnetwork.go

@@ -147,14 +147,14 @@ func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNe
 	nn.Z = make([]*mat.Dense, nn.Count)
 	nn.trainingCycles = trainingCycles
 
-	alpha := nu / float64(nn.Sizes[0])
+	// alpha := nu / float64(nn.Sizes[0])
 	for i := 1; i < nn.Count; i++ {
 		nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
 		nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
-		nn.WGradient[i] = &BackPropGradient{alpha}
-		nn.BGradient[i] = &BackPropGradient{alpha}
-		// nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
-		// nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
+		// nn.WGradient[i] = &BackPropGradient{alpha}
+		// nn.BGradient[i] = &BackPropGradient{alpha}
+		nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
+		nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
 	}
 	return
 }
@@ -202,6 +202,10 @@ func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
 		for teacher.NextData() {
 			nn.backward(teacher.GetData())
 		}
+		for l := 1; l < nn.Count; l++ {
+			nn.Biases[l] = nn.BGradient[l].ApplyDelta(nn.Biases[l], &mat.Dense{})
+			nn.Weights[l] = nn.WGradient[l].ApplyDelta(nn.Weights[l], &mat.Dense{})
+		}
 	}
 }
 
@@ -344,9 +348,11 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 	// fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
 
 	// Initialize new weights and biases values with last layer values
-	newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
-	newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
+	// newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
+	// newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
 
+	nn.BGradient[lastLayerNum].AccumGradients(biases)
+	nn.WGradient[lastLayerNum].AccumGradients(weights)
 	// Next layer Weights and Biases are calculated using same formulas:
 	// ∂E/∂W[l] = A[l−1]*δ[l]
 	// ∂E/∂B[l] = δ[l]
@@ -410,15 +416,17 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
 		// fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
 
 		// !Prepend! new Biases and Weights
-		newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
-		newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
+		// newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
+		// newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
+		nn.BGradient[l].AccumGradients(biases)
+		nn.WGradient[l].AccumGradients(weights)
 	}
 
-	newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
-	newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
+	// newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
+	// newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
 
-	nn.Biases = newBiases
-	nn.Weights = newWeights
+	// nn.Biases = newBiases
+	// nn.Weights = newWeights
 }
 
 func (nn *NeuralNetwork) result() *mat.Dense {