|
@@ -30,6 +30,7 @@ import (
|
|
|
"errors"
|
|
|
"fmt"
|
|
|
"io"
|
|
|
+ "runtime"
|
|
|
"sync"
|
|
|
|
|
|
teach "../teach"
|
|
@@ -103,11 +104,10 @@ type NeuralNetwork struct {
|
|
|
Weights []*mat.Dense
|
|
|
BGradient []interface{}
|
|
|
WGradient []interface{}
|
|
|
- epocs int
|
|
|
gradientDescentInitializer GradientDescentInitializer
|
|
|
}
|
|
|
|
|
|
-func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
|
|
|
+func NewNeuralNetwork(sizes []int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
|
|
|
err = nil
|
|
|
if len(sizes) < 3 {
|
|
|
fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
@@ -121,15 +121,6 @@ func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer Gradien
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if epocs <= 0 {
|
|
|
- fmt.Printf("Invalid training cycles number: %v\n", epocs)
|
|
|
- return nil, errors.New("Invalid training cycles number: %v\n")
|
|
|
- }
|
|
|
-
|
|
|
- if epocs < 100 {
|
|
|
- fmt.Println("Training cycles number probably is too small")
|
|
|
- }
|
|
|
-
|
|
|
nn = &NeuralNetwork{}
|
|
|
nn.Sizes = sizes
|
|
|
nn.LayerCount = len(sizes)
|
|
@@ -138,7 +129,6 @@ func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer Gradien
|
|
|
nn.BGradient = make([]interface{}, nn.LayerCount)
|
|
|
nn.WGradient = make([]interface{}, nn.LayerCount)
|
|
|
|
|
|
- nn.epocs = epocs
|
|
|
nn.gradientDescentInitializer = gradientDescentInitializer
|
|
|
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
@@ -171,18 +161,18 @@ func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
return
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
|
|
|
+func (nn *NeuralNetwork) Teach(teacher teach.Teacher, epocs int) {
|
|
|
if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
|
|
|
- nn.TeachOnline(teacher)
|
|
|
+ nn.TeachOnline(teacher, epocs)
|
|
|
} else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
|
|
|
- nn.TeachBatch(teacher)
|
|
|
+ nn.TeachBatch(teacher, epocs)
|
|
|
} else {
|
|
|
panic("Invalid gradient descent type")
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher) {
|
|
|
- for t := 0; t < nn.epocs; t++ {
|
|
|
+func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher, epocs int) {
|
|
|
+ for t := 0; t < epocs; t++ {
|
|
|
for teacher.NextData() {
|
|
|
dB, dW := nn.backward(teacher.GetData())
|
|
|
for l := 1; l < nn.LayerCount; l++ {
|
|
@@ -202,47 +192,47 @@ func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher) {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher) {
|
|
|
- wg := sync.WaitGroup{}
|
|
|
- for t := 0; t < nn.epocs; t++ {
|
|
|
- batchWorkers := []*batchWorker{newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn), newBatchWorker(nn)}
|
|
|
- for i, _ := range batchWorkers {
|
|
|
- wg.Add(1)
|
|
|
- s := i
|
|
|
- go func() {
|
|
|
- batchWorkers[s].Run(teacher, s*teacher.GetDataCount()/len(batchWorkers), (s+1)*teacher.GetDataCount()/len(batchWorkers))
|
|
|
- wg.Done()
|
|
|
- }()
|
|
|
- }
|
|
|
- wg.Wait()
|
|
|
-
|
|
|
- // teacher.Reset()
|
|
|
+func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher, epocs int) {
|
|
|
+ for t := 0; t < epocs; t++ {
|
|
|
+ batchWorkers := nn.runBatchWorkers(runtime.NumCPU(), teacher)
|
|
|
|
|
|
- for _, bw := range batchWorkers {
|
|
|
- for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("bGradient is not a BatchGradientDescent")
|
|
|
+ }
|
|
|
+ wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("wGradient is not a BatchGradientDescent")
|
|
|
+ }
|
|
|
+ for _, bw := range batchWorkers {
|
|
|
dB, dW := bw.Result(l)
|
|
|
- bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
|
|
|
- if !ok {
|
|
|
- panic("bGradient is not a BatchGradientDescent")
|
|
|
- }
|
|
|
- wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
|
|
|
- if !ok {
|
|
|
- panic("wGradient is not a BatchGradientDescent")
|
|
|
- }
|
|
|
bGradient.AccumGradients(dB)
|
|
|
wGradient.AccumGradients(dW)
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- for l := 1; l < nn.LayerCount; l++ {
|
|
|
- bGradient := nn.BGradient[l].(BatchGradientDescent)
|
|
|
- wGradient := nn.WGradient[l].(BatchGradientDescent)
|
|
|
nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
|
|
|
nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+func (nn *NeuralNetwork) runBatchWorkers(threadCount int, teacher teach.Teacher) (workers []*batchWorker) {
|
|
|
+ wg := sync.WaitGroup{}
|
|
|
+ chunkSize := teacher.GetDataCount() / threadCount
|
|
|
+ workers = make([]*batchWorker, threadCount)
|
|
|
+ for i, _ := range workers {
|
|
|
+ workers[i] = newBatchWorker(nn)
|
|
|
+ wg.Add(1)
|
|
|
+ s := i
|
|
|
+ go func() {
|
|
|
+ workers[s].Run(teacher, s*chunkSize, (s+1)*chunkSize)
|
|
|
+ wg.Done()
|
|
|
+ }()
|
|
|
+ }
|
|
|
+ wg.Wait()
|
|
|
+ return
|
|
|
+}
|
|
|
+
|
|
|
func (nn *NeuralNetwork) SaveState(writer io.Writer) {
|
|
|
//save input array count
|
|
|
bufferSize := make([]byte, 4)
|