|
@@ -32,8 +32,10 @@ import (
|
|
|
)
|
|
|
|
|
|
type RPropGradient struct {
|
|
|
- Gradients *mat.Dense
|
|
|
- Deltas *mat.Dense
|
|
|
+ GradientsPrev *mat.Dense
|
|
|
+ Gradients *mat.Dense
|
|
|
+ Deltas *mat.Dense
|
|
|
+ batchSize int
|
|
|
}
|
|
|
|
|
|
func NewRPropGradient(r, c int) (g *RPropGradient) {
|
|
@@ -46,11 +48,12 @@ func NewRPropGradient(r, c int) (g *RPropGradient) {
|
|
|
}
|
|
|
|
|
|
g.Gradients = mat.NewDense(r, c, nil)
|
|
|
+ g.GradientsPrev = mat.NewDense(r, c, nil)
|
|
|
g.Deltas = mat.NewDense(r, c, deltas)
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func (g *RPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
|
|
|
+func (g *RPropGradient) ApplyDelta(m mat.Matrix, _ mat.Matrix) (result *mat.Dense) {
|
|
|
//TODO: move this hardcoded parameters to separate config for gradient
|
|
|
nuPlus := 1.2
|
|
|
nuMinus := 0.5
|
|
@@ -60,34 +63,53 @@ func (g *RPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result
|
|
|
|
|
|
result = &mat.Dense{}
|
|
|
|
|
|
+ gradient := g.Gradients
|
|
|
+ r, c := gradient.Dims()
|
|
|
+ dividers := make([]float64, r*c)
|
|
|
+ for i := range dividers {
|
|
|
+ dividers[i] = float64(g.batchSize)
|
|
|
+ }
|
|
|
+ gradientDivider := mat.NewDense(r, c, dividers)
|
|
|
+ gradient.DivElem(gradient, gradientDivider)
|
|
|
+
|
|
|
result.Apply(func(i, j int, v float64) (outV float64) {
|
|
|
- gradientSign := g.Gradients.At(i, j) * derivative.At(i, j)
|
|
|
+ gradientSign := g.GradientsPrev.At(i, j) * gradient.At(i, j)
|
|
|
if gradientSign > 0 {
|
|
|
g.Deltas.Set(i, j, math.Min(nuPlus*g.Deltas.At(i, j), deltaMax))
|
|
|
- outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
|
|
|
+ outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
|
|
|
|
|
|
- g.Gradients.Set(i, j, derivative.At(i, j))
|
|
|
+ g.GradientsPrev.Set(i, j, gradient.At(i, j))
|
|
|
} else if gradientSign < 0 {
|
|
|
- outV = v + sign(g.Gradients.At(i, j))*g.Deltas.At(i, j)
|
|
|
+ outV = v + sign(g.GradientsPrev.At(i, j))*g.Deltas.At(i, j)
|
|
|
g.Deltas.Set(i, j, math.Max(nuMinus*g.Deltas.At(i, j), deltaMin))
|
|
|
|
|
|
- g.Gradients.Set(i, j, 0.0)
|
|
|
+ g.GradientsPrev.Set(i, j, 0.0)
|
|
|
} else {
|
|
|
- outV = v - sign(derivative.At(i, j))*g.Deltas.At(i, j)
|
|
|
+ outV = v - sign(gradient.At(i, j))*g.Deltas.At(i, j)
|
|
|
|
|
|
- g.Gradients.Set(i, j, derivative.At(i, j))
|
|
|
+ g.GradientsPrev.Set(i, j, gradient.At(i, j))
|
|
|
}
|
|
|
return
|
|
|
}, m)
|
|
|
+
|
|
|
+ g.batchSize = 0
|
|
|
return result
|
|
|
}
|
|
|
|
|
|
+func (g *RPropGradient) AccumGradients(gradient mat.Matrix) {
|
|
|
+ g.Gradients.Apply(func(i, j int, v float64) float64 {
|
|
|
+ v += gradient.At(i, j)
|
|
|
+ return v
|
|
|
+ }, g.Gradients)
|
|
|
+ g.batchSize++
|
|
|
+}
|
|
|
+
|
|
|
//Simple backpropagation with constant value η
|
|
|
type BackPropGradient struct {
|
|
|
alpha float64
|
|
|
}
|
|
|
|
|
|
-func (g *BackPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (result *mat.Dense) {
|
|
|
+func (g *BackPropGradient) ApplyDelta(m mat.Matrix, gradient mat.Matrix) (result *mat.Dense) {
|
|
|
// Gradient change of actual matrix using:
|
|
|
// m[l]′ = m[l] − η * ∂C/∂m
|
|
|
// Where ∂E/∂m is `in` matrix
|
|
@@ -95,8 +117,12 @@ func (g *BackPropGradient) ApplyDelta(m mat.Matrix, derivative mat.Matrix) (resu
|
|
|
result = &mat.Dense{}
|
|
|
|
|
|
// η * ∂E/∂m
|
|
|
- scaled.Scale(g.alpha, derivative)
|
|
|
+ scaled.Scale(g.alpha, gradient)
|
|
|
// m[l] − η * ∂E/∂m
|
|
|
result.Sub(m, scaled)
|
|
|
return result
|
|
|
}
|
|
|
+
|
|
|
+func (g *BackPropGradient) AccumGradients(gradient mat.Matrix) {
|
|
|
+
|
|
|
+}
|