|
@@ -40,6 +40,7 @@ import (
|
|
// Resources:
|
|
// Resources:
|
|
// http://neuralnetworksanddeeplearning.com
|
|
// http://neuralnetworksanddeeplearning.com
|
|
// https://www.youtube.com/watch?v=fNk_zzaMoSs
|
|
// https://www.youtube.com/watch?v=fNk_zzaMoSs
|
|
|
|
+// http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
|
|
//
|
|
//
|
|
// Matrix: A
|
|
// Matrix: A
|
|
// Description: A is set of calculated neuron activations after sigmoid correction
|
|
// Description: A is set of calculated neuron activations after sigmoid correction
|
|
@@ -94,18 +95,19 @@ import (
|
|
// s' = Sizes[l-1] - Previous neural network layer size
|
|
// s' = Sizes[l-1] - Previous neural network layer size
|
|
// L = len(Sizes) - Number of neural network layers
|
|
// L = len(Sizes) - Number of neural network layers
|
|
|
|
|
|
-type BackProp struct {
|
|
|
|
|
|
+type NeuralNetwork struct {
|
|
Count int
|
|
Count int
|
|
Sizes []int
|
|
Sizes []int
|
|
Biases []*mat.Dense
|
|
Biases []*mat.Dense
|
|
Weights []*mat.Dense
|
|
Weights []*mat.Dense
|
|
A []*mat.Dense
|
|
A []*mat.Dense
|
|
Z []*mat.Dense
|
|
Z []*mat.Dense
|
|
- alpha float64
|
|
|
|
|
|
+ WGradient []Gradient
|
|
|
|
+ BGradient []Gradient
|
|
trainingCycles int
|
|
trainingCycles int
|
|
}
|
|
}
|
|
|
|
|
|
-func NewBackProp(sizes []int, nu float64, trainingCycles int) (nn *BackProp, err error) {
|
|
|
|
|
|
+func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork, err error) {
|
|
err = nil
|
|
err = nil
|
|
if len(sizes) < 3 {
|
|
if len(sizes) < 3 {
|
|
fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
@@ -133,42 +135,48 @@ func NewBackProp(sizes []int, nu float64, trainingCycles int) (nn *BackProp, err
|
|
fmt.Println("Training cycles number probably is too small")
|
|
fmt.Println("Training cycles number probably is too small")
|
|
}
|
|
}
|
|
|
|
|
|
- nn = &BackProp{}
|
|
|
|
|
|
+ nn = &NeuralNetwork{}
|
|
nn.Sizes = sizes
|
|
nn.Sizes = sizes
|
|
nn.Count = len(sizes)
|
|
nn.Count = len(sizes)
|
|
nn.Weights = make([]*mat.Dense, nn.Count)
|
|
nn.Weights = make([]*mat.Dense, nn.Count)
|
|
nn.Biases = make([]*mat.Dense, nn.Count)
|
|
nn.Biases = make([]*mat.Dense, nn.Count)
|
|
|
|
+ nn.WGradient = make([]Gradient, nn.Count)
|
|
|
|
+ nn.BGradient = make([]Gradient, nn.Count)
|
|
|
|
+
|
|
nn.A = make([]*mat.Dense, nn.Count)
|
|
nn.A = make([]*mat.Dense, nn.Count)
|
|
nn.Z = make([]*mat.Dense, nn.Count)
|
|
nn.Z = make([]*mat.Dense, nn.Count)
|
|
- nn.alpha = nu / float64(nn.Sizes[0])
|
|
|
|
nn.trainingCycles = trainingCycles
|
|
nn.trainingCycles = trainingCycles
|
|
|
|
|
|
|
|
+ alpha := nu / float64(nn.Sizes[0])
|
|
for i := 1; i < nn.Count; i++ {
|
|
for i := 1; i < nn.Count; i++ {
|
|
nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
|
|
nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
|
|
nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
|
|
nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
|
|
|
|
+ nn.WGradient[i] = &BackPropGradient{alpha}
|
|
|
|
+ nn.BGradient[i] = &BackPropGradient{alpha}
|
|
|
|
+ // nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
|
|
|
|
+ // nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
|
|
}
|
|
}
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) Copy() (out *BackProp) {
|
|
|
|
- out = &BackProp{}
|
|
|
|
|
|
+func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
|
|
|
|
+ out = &NeuralNetwork{}
|
|
out.Sizes = nn.Sizes
|
|
out.Sizes = nn.Sizes
|
|
out.Count = nn.Count
|
|
out.Count = nn.Count
|
|
out.Weights = make([]*mat.Dense, nn.Count)
|
|
out.Weights = make([]*mat.Dense, nn.Count)
|
|
out.Biases = make([]*mat.Dense, nn.Count)
|
|
out.Biases = make([]*mat.Dense, nn.Count)
|
|
out.A = make([]*mat.Dense, nn.Count)
|
|
out.A = make([]*mat.Dense, nn.Count)
|
|
out.Z = make([]*mat.Dense, nn.Count)
|
|
out.Z = make([]*mat.Dense, nn.Count)
|
|
- out.alpha = nn.alpha
|
|
|
|
out.trainingCycles = nn.trainingCycles
|
|
out.trainingCycles = nn.trainingCycles
|
|
|
|
|
|
for i := 1; i < out.Count; i++ {
|
|
for i := 1; i < out.Count; i++ {
|
|
- nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
|
|
|
|
- nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
|
|
|
|
|
|
+ out.Weights[i] = mat.DenseCopyOf(nn.Weights[i])
|
|
|
|
+ out.Biases[i] = mat.DenseCopyOf(nn.Biases[i])
|
|
}
|
|
}
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
r, _ := aIn.Dims()
|
|
r, _ := aIn.Dims()
|
|
if r != nn.Sizes[0] {
|
|
if r != nn.Sizes[0] {
|
|
fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
|
|
fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
|
|
@@ -189,7 +197,7 @@ func (nn *BackProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
return
|
|
return
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) Teach(teacher teach.Teacher) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
|
|
for i := 0; i < nn.trainingCycles; i++ {
|
|
for i := 0; i < nn.trainingCycles; i++ {
|
|
for teacher.NextData() {
|
|
for teacher.NextData() {
|
|
nn.backward(teacher.GetData())
|
|
nn.backward(teacher.GetData())
|
|
@@ -197,7 +205,7 @@ func (nn *BackProp) Teach(teacher teach.Teacher) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) SaveState(writer io.Writer) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) SaveState(writer io.Writer) {
|
|
//save input array count
|
|
//save input array count
|
|
bufferSize := make([]byte, 4)
|
|
bufferSize := make([]byte, 4)
|
|
binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
|
|
binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
|
|
@@ -229,7 +237,7 @@ func (nn *BackProp) SaveState(writer io.Writer) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) LoadState(reader io.Reader) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) LoadState(reader io.Reader) {
|
|
// Reade count
|
|
// Reade count
|
|
nn.Count = readInt(reader)
|
|
nn.Count = readInt(reader)
|
|
|
|
|
|
@@ -265,7 +273,7 @@ func (nn *BackProp) LoadState(reader io.Reader) {
|
|
// fmt.Printf("\nLoadState end\n")
|
|
// fmt.Printf("\nLoadState end\n")
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) forward(aIn mat.Matrix) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
|
|
nn.A[0] = mat.DenseCopyOf(aIn)
|
|
nn.A[0] = mat.DenseCopyOf(aIn)
|
|
|
|
|
|
for i := 1; i < nn.Count; i++ {
|
|
for i := 1; i < nn.Count; i++ {
|
|
@@ -290,15 +298,15 @@ func (nn *BackProp) forward(aIn mat.Matrix) {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
|
|
|
|
|
|
+func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
nn.forward(aIn)
|
|
nn.forward(aIn)
|
|
|
|
|
|
lastLayerNum := nn.Count - 1
|
|
lastLayerNum := nn.Count - 1
|
|
|
|
|
|
// To calculate new values of weights and biases
|
|
// To calculate new values of weights and biases
|
|
// following formulas are used:
|
|
// following formulas are used:
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
|
- // B[l] = δ[l]
|
|
|
|
|
|
+ // ∂E/∂W[l] = A[l−1]*δ[l]
|
|
|
|
+ // ∂E/∂B[l] = δ[l]
|
|
|
|
|
|
// For last layer δ value is calculated by following:
|
|
// For last layer δ value is calculated by following:
|
|
// δ = (A[L]−y)⊙σ'(Z[L])
|
|
// δ = (A[L]−y)⊙σ'(Z[L])
|
|
@@ -317,23 +325,31 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
|
|
delta := &mat.Dense{}
|
|
delta := &mat.Dense{}
|
|
delta.MulElem(err, sigmoidsPrime)
|
|
delta.MulElem(err, sigmoidsPrime)
|
|
|
|
|
|
- // B[L] = δ[L]
|
|
|
|
|
|
+ // ∂E/∂B[L] = δ[L]
|
|
biases := mat.DenseCopyOf(delta)
|
|
biases := mat.DenseCopyOf(delta)
|
|
|
|
|
|
- // W[L] = A[L−1]*δ[L]
|
|
|
|
|
|
+ // ∂E/∂W[L] = A[L−1]*δ[L]
|
|
weights := &mat.Dense{}
|
|
weights := &mat.Dense{}
|
|
weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
|
|
|
- // Initialize new weights and biases values with last layer values
|
|
|
|
- newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
|
|
|
|
- newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
|
|
|
|
|
|
+ // fmt.Printf("Prev biases[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Biases[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Prev weights[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Weights[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
|
|
- // Save calculated delta value temporary error variable
|
|
|
|
- err = delta
|
|
|
|
|
|
+ // fmt.Printf("Expect[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(aOut, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Result[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.result(), mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("nn.Z[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Z[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("sigmoidsPrime[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(sigmoidsPrime, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Err[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(err, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Biases gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+
|
|
|
|
+ // Initialize new weights and biases values with last layer values
|
|
|
|
+ newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
|
|
|
|
+ newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
|
|
|
|
|
|
// Next layer Weights and Biases are calculated using same formulas:
|
|
// Next layer Weights and Biases are calculated using same formulas:
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
|
- // B[l] = δ[l]
|
|
|
|
|
|
+ // ∂E/∂W[l] = A[l−1]*δ[l]
|
|
|
|
+ // ∂E/∂B[l] = δ[l]
|
|
|
|
|
|
// But δ[l] is calculated using different formula:
|
|
// But δ[l] is calculated using different formula:
|
|
// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
@@ -346,19 +362,18 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
|
|
|
|
|
|
// (Wt[l+1])*δ[l+1]
|
|
// (Wt[l+1])*δ[l+1]
|
|
// err bellow is delta from previous step(l+1)
|
|
// err bellow is delta from previous step(l+1)
|
|
- delta := &mat.Dense{}
|
|
|
|
wdelta := &mat.Dense{}
|
|
wdelta := &mat.Dense{}
|
|
- wdelta.Mul(nn.Weights[l+1].T(), err)
|
|
|
|
|
|
+ wdelta.Mul(nn.Weights[l+1].T(), delta)
|
|
|
|
|
|
// Calculate new delta and store it to temporary variable err
|
|
// Calculate new delta and store it to temporary variable err
|
|
// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
|
+ delta = &mat.Dense{}
|
|
delta.MulElem(wdelta, sigmoidsPrime)
|
|
delta.MulElem(wdelta, sigmoidsPrime)
|
|
- err = delta
|
|
|
|
|
|
|
|
- // B[l] = δ[l]
|
|
|
|
|
|
+ // ∂E/∂B[l] = δ[l]
|
|
biases := mat.DenseCopyOf(delta)
|
|
biases := mat.DenseCopyOf(delta)
|
|
|
|
|
|
- // W[l] = A[l−1]*δ[l]
|
|
|
|
|
|
+ // ∂E/∂W[l] = A[l−1]*δ[l]
|
|
// At this point it's required to give explanation for inaccuracy
|
|
// At this point it's required to give explanation for inaccuracy
|
|
// in the formula
|
|
// in the formula
|
|
|
|
|
|
@@ -391,9 +406,12 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
|
|
weights := &mat.Dense{}
|
|
weights := &mat.Dense{}
|
|
weights.Mul(delta, nn.A[l-1].T())
|
|
weights.Mul(delta, nn.A[l-1].T())
|
|
|
|
|
|
|
|
+ // fmt.Printf("Weights gradient[%v]:\n%v\n\n", l, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+ // fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
|
+
|
|
// !Prepend! new Biases and Weights
|
|
// !Prepend! new Biases and Weights
|
|
- newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
|
|
|
|
- newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
|
|
|
|
|
|
+ newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
|
|
|
|
+ newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
|
|
}
|
|
}
|
|
|
|
|
|
newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|
|
newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|
|
@@ -403,6 +421,6 @@ func (nn *BackProp) backward(aIn, aOut mat.Matrix) {
|
|
nn.Weights = newWeights
|
|
nn.Weights = newWeights
|
|
}
|
|
}
|
|
|
|
|
|
-func (nn *BackProp) result() *mat.Dense {
|
|
|
|
|
|
+func (nn *NeuralNetwork) result() *mat.Dense {
|
|
return nn.A[nn.Count-1]
|
|
return nn.A[nn.Count-1]
|
|
}
|
|
}
|