|
@@ -30,6 +30,7 @@ import (
|
|
|
"errors"
|
|
|
"fmt"
|
|
|
"io"
|
|
|
+ "sync"
|
|
|
|
|
|
teach "../teach"
|
|
|
mat "gonum.org/v1/gonum/mat"
|
|
@@ -96,15 +97,14 @@ import (
|
|
|
// L = len(Sizes) - Number of neural network layers
|
|
|
|
|
|
type NeuralNetwork struct {
|
|
|
- LayerCount int
|
|
|
- Sizes []int
|
|
|
- Biases []*mat.Dense
|
|
|
- Weights []*mat.Dense
|
|
|
- A []*mat.Dense
|
|
|
- Z []*mat.Dense
|
|
|
- BGradient []interface{}
|
|
|
- WGradient []interface{}
|
|
|
- epocs int
|
|
|
+ LayerCount int
|
|
|
+ Sizes []int
|
|
|
+ Biases []*mat.Dense
|
|
|
+ Weights []*mat.Dense
|
|
|
+ BGradient []interface{}
|
|
|
+ WGradient []interface{}
|
|
|
+ epocs int
|
|
|
+ gradientDescentInitializer GradientDescentInitializer
|
|
|
}
|
|
|
|
|
|
func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
|
|
@@ -138,32 +138,14 @@ func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer Gradien
|
|
|
nn.BGradient = make([]interface{}, nn.LayerCount)
|
|
|
nn.WGradient = make([]interface{}, nn.LayerCount)
|
|
|
|
|
|
- nn.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
- nn.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
nn.epocs = epocs
|
|
|
+ nn.gradientDescentInitializer = gradientDescentInitializer
|
|
|
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
|
nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
|
|
|
nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
|
|
|
- nn.BGradient[l] = gradientDescentInitializer(nn, l, BiasGradient)
|
|
|
- nn.WGradient[l] = gradientDescentInitializer(nn, l, WeightGradient)
|
|
|
- }
|
|
|
- return
|
|
|
-}
|
|
|
-
|
|
|
-func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
|
|
|
- out = &NeuralNetwork{}
|
|
|
- out.Sizes = nn.Sizes
|
|
|
- out.LayerCount = nn.LayerCount
|
|
|
- out.Weights = make([]*mat.Dense, nn.LayerCount)
|
|
|
- out.Biases = make([]*mat.Dense, nn.LayerCount)
|
|
|
- out.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
- out.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
- out.epocs = nn.epocs
|
|
|
-
|
|
|
- for l := 1; l < out.LayerCount; l++ {
|
|
|
- out.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
|
|
|
- out.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
|
|
|
+ nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
|
|
|
+ nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
|
|
|
}
|
|
|
return
|
|
|
}
|
|
@@ -175,8 +157,8 @@ func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
return -1, 0.0
|
|
|
}
|
|
|
|
|
|
- nn.forward(aIn)
|
|
|
- result := nn.result()
|
|
|
+ A, _ := nn.forward(aIn)
|
|
|
+ result := A[nn.LayerCount-1]
|
|
|
r, _ = result.Dims()
|
|
|
max = 0.0
|
|
|
maxIndex = 0
|
|
@@ -220,11 +202,25 @@ func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher) {
|
|
|
+func (nn *NeuralNetwork) TeachBatch(_ teach.Teacher) {
|
|
|
+ wg := sync.WaitGroup{}
|
|
|
for t := 0; t < nn.epocs; t++ {
|
|
|
- for teacher.NextData() {
|
|
|
- dB, dW := nn.backward(teacher.GetData())
|
|
|
+ batchWorkers := []*batchWorker{newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn)} //, newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn)}
|
|
|
+ for i, _ := range batchWorkers {
|
|
|
+ wg.Add(1)
|
|
|
+ go func() {
|
|
|
+ teacher := teach.NewTextDataReader("./wine.data", 5)
|
|
|
+ batchWorkers[i].Run(teacher)
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+
|
|
|
+ // teacher.Reset()
|
|
|
+
|
|
|
+ for _, bw := range batchWorkers {
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ dB, dW := bw.Result(l)
|
|
|
bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
|
|
|
if !ok {
|
|
|
panic("bGradient is not a BatchGradientDescent")
|
|
@@ -233,11 +229,11 @@ func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher) {
|
|
|
if !ok {
|
|
|
panic("wGradient is not a BatchGradientDescent")
|
|
|
}
|
|
|
- bGradient.AccumGradients(dB[l])
|
|
|
- wGradient.AccumGradients(dW[l])
|
|
|
+ bGradient.AccumGradients(dB)
|
|
|
+ wGradient.AccumGradients(dW)
|
|
|
}
|
|
|
}
|
|
|
- teacher.Reset()
|
|
|
+
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
|
bGradient := nn.BGradient[l].(BatchGradientDescent)
|
|
|
wGradient := nn.WGradient[l].(BatchGradientDescent)
|
|
@@ -309,19 +305,19 @@ func (nn *NeuralNetwork) LoadState(reader io.Reader) {
|
|
|
nn.Weights[i] = readDense(reader, nn.Weights[i])
|
|
|
}
|
|
|
|
|
|
- nn.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
- nn.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
-
|
|
|
// fmt.Printf("\nLoadState end\n")
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
|
|
|
- nn.A[0] = mat.DenseCopyOf(aIn)
|
|
|
+func (nn NeuralNetwork) forward(aIn mat.Matrix) (A, Z []*mat.Dense) {
|
|
|
+ A = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
+
|
|
|
+ A[0] = mat.DenseCopyOf(aIn)
|
|
|
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
|
- nn.A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
|
|
|
- aSrc := nn.A[l-1]
|
|
|
- aDst := nn.A[l]
|
|
|
+ A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
|
|
|
+ aSrc := A[l-1]
|
|
|
+ aDst := A[l]
|
|
|
|
|
|
// Each iteration implements formula bellow for neuron activation values
|
|
|
// A[l]=σ(W[l]*A[l−1]+B[l])
|
|
@@ -333,17 +329,18 @@ func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
|
|
|
aDst.Add(aDst, nn.Biases[l])
|
|
|
|
|
|
// Save raw activation value for back propagation
|
|
|
- nn.Z[l] = mat.DenseCopyOf(aDst)
|
|
|
+ Z[l] = mat.DenseCopyOf(aDst)
|
|
|
|
|
|
// σ(W[l]*A[l−1]+B[l])
|
|
|
aDst.Apply(applySigmoid, aDst)
|
|
|
}
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
// Function returns calculated bias and weights derivatives for each
|
|
|
// layer arround aIn/aOut datasets
|
|
|
-func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
- nn.forward(aIn)
|
|
|
+func (nn NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
+ A, Z := nn.forward(aIn)
|
|
|
|
|
|
lastLayerNum := nn.LayerCount - 1
|
|
|
dB = make([]*mat.Dense, nn.LayerCount)
|
|
@@ -361,11 +358,11 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
// error = A[L]-y
|
|
|
// Where y is expected activations set
|
|
|
err := &mat.Dense{}
|
|
|
- err.Sub(nn.result(), aOut)
|
|
|
+ err.Sub(A[nn.LayerCount-1], aOut)
|
|
|
|
|
|
// Calculate sigmoids prime σ'(Z[L]) for last layer L
|
|
|
sigmoidsPrime := &mat.Dense{}
|
|
|
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
|
|
|
+ sigmoidsPrime.Apply(applySigmoidPrime, Z[lastLayerNum])
|
|
|
|
|
|
// (A[L]−y)⊙σ'(Z[L])
|
|
|
delta := &mat.Dense{}
|
|
@@ -376,7 +373,7 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
|
|
|
// ∂E/∂W[L] = A[L−1]*δ[L]
|
|
|
weights := &mat.Dense{}
|
|
|
- weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
+ weights.Mul(delta, A[lastLayerNum-1].T())
|
|
|
|
|
|
// Initialize new weights and biases values with last layer values
|
|
|
dB[lastLayerNum] = biases
|
|
@@ -393,7 +390,7 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
for l := nn.LayerCount - 2; l > 0; l-- {
|
|
|
// Calculate sigmoids prime σ'(Z[l]) for last layer l
|
|
|
sigmoidsPrime := &mat.Dense{}
|
|
|
- sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
|
|
|
+ sigmoidsPrime.Apply(applySigmoidPrime, Z[l])
|
|
|
|
|
|
// (Wt[l+1])*δ[l+1]
|
|
|
// err bellow is delta from previous step(l+1)
|
|
@@ -439,14 +436,10 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
// ⎢ ... ⎥
|
|
|
// ⎣δ[s] ⎦
|
|
|
weights := &mat.Dense{}
|
|
|
- weights.Mul(delta, nn.A[l-1].T())
|
|
|
+ weights.Mul(delta, A[l-1].T())
|
|
|
|
|
|
dB[l] = biases
|
|
|
dW[l] = weights
|
|
|
}
|
|
|
return
|
|
|
}
|
|
|
-
|
|
|
-func (nn *NeuralNetwork) result() *mat.Dense {
|
|
|
- return nn.A[nn.LayerCount-1]
|
|
|
-}
|