|
@@ -96,18 +96,18 @@ import (
|
|
|
// L = len(Sizes) - Number of neural network layers
|
|
|
|
|
|
type NeuralNetwork struct {
|
|
|
- Count int
|
|
|
- Sizes []int
|
|
|
- Biases []*mat.Dense
|
|
|
- Weights []*mat.Dense
|
|
|
- A []*mat.Dense
|
|
|
- Z []*mat.Dense
|
|
|
- WGradient []Gradient
|
|
|
- BGradient []Gradient
|
|
|
- trainingCycles int
|
|
|
+ LayerCount int
|
|
|
+ Sizes []int
|
|
|
+ Biases []*mat.Dense
|
|
|
+ Weights []*mat.Dense
|
|
|
+ A []*mat.Dense
|
|
|
+ Z []*mat.Dense
|
|
|
+ BGradient []interface{}
|
|
|
+ WGradient []interface{}
|
|
|
+ epocs int
|
|
|
}
|
|
|
|
|
|
-func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork, err error) {
|
|
|
+func NewNeuralNetwork(sizes []int, epocs int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
|
|
|
err = nil
|
|
|
if len(sizes) < 3 {
|
|
|
fmt.Printf("Invalid network configuration: %v\n", sizes)
|
|
@@ -121,40 +121,32 @@ func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNe
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if nu <= 0.0 || nu > 1.0 {
|
|
|
- fmt.Printf("Invalid η value: %v\n", nu)
|
|
|
- return nil, errors.New("Invalid η value: %v\n")
|
|
|
- }
|
|
|
-
|
|
|
- if trainingCycles <= 0 {
|
|
|
- fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
|
|
|
+ if epocs <= 0 {
|
|
|
+ fmt.Printf("Invalid training cycles number: %v\n", epocs)
|
|
|
return nil, errors.New("Invalid training cycles number: %v\n")
|
|
|
}
|
|
|
|
|
|
- if trainingCycles < 100 {
|
|
|
+ if epocs < 100 {
|
|
|
fmt.Println("Training cycles number probably is too small")
|
|
|
}
|
|
|
|
|
|
nn = &NeuralNetwork{}
|
|
|
nn.Sizes = sizes
|
|
|
- nn.Count = len(sizes)
|
|
|
- nn.Weights = make([]*mat.Dense, nn.Count)
|
|
|
- nn.Biases = make([]*mat.Dense, nn.Count)
|
|
|
- nn.WGradient = make([]Gradient, nn.Count)
|
|
|
- nn.BGradient = make([]Gradient, nn.Count)
|
|
|
-
|
|
|
- nn.A = make([]*mat.Dense, nn.Count)
|
|
|
- nn.Z = make([]*mat.Dense, nn.Count)
|
|
|
- nn.trainingCycles = trainingCycles
|
|
|
-
|
|
|
- // alpha := nu / float64(nn.Sizes[0])
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
- nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
|
|
|
- nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
|
|
|
- // nn.WGradient[i] = &BackPropGradient{alpha}
|
|
|
- // nn.BGradient[i] = &BackPropGradient{alpha}
|
|
|
- nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
|
|
|
- nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
|
|
|
+ nn.LayerCount = len(sizes)
|
|
|
+ nn.Biases = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ nn.Weights = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ nn.BGradient = make([]interface{}, nn.LayerCount)
|
|
|
+ nn.WGradient = make([]interface{}, nn.LayerCount)
|
|
|
+
|
|
|
+ nn.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ nn.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ nn.epocs = epocs
|
|
|
+
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
|
|
|
+ nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
|
|
|
+ nn.BGradient[l] = gradientDescentInitializer(nn, l, BiasGradient)
|
|
|
+ nn.WGradient[l] = gradientDescentInitializer(nn, l, WeightGradient)
|
|
|
}
|
|
|
return
|
|
|
}
|
|
@@ -162,16 +154,16 @@ func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNe
|
|
|
func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
|
|
|
out = &NeuralNetwork{}
|
|
|
out.Sizes = nn.Sizes
|
|
|
- out.Count = nn.Count
|
|
|
- out.Weights = make([]*mat.Dense, nn.Count)
|
|
|
- out.Biases = make([]*mat.Dense, nn.Count)
|
|
|
- out.A = make([]*mat.Dense, nn.Count)
|
|
|
- out.Z = make([]*mat.Dense, nn.Count)
|
|
|
- out.trainingCycles = nn.trainingCycles
|
|
|
-
|
|
|
- for i := 1; i < out.Count; i++ {
|
|
|
- out.Weights[i] = mat.DenseCopyOf(nn.Weights[i])
|
|
|
- out.Biases[i] = mat.DenseCopyOf(nn.Biases[i])
|
|
|
+ out.LayerCount = nn.LayerCount
|
|
|
+ out.Weights = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ out.Biases = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ out.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ out.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ out.epocs = nn.epocs
|
|
|
+
|
|
|
+ for l := 1; l < out.LayerCount; l++ {
|
|
|
+ out.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
|
|
|
+ out.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
|
|
|
}
|
|
|
return
|
|
|
}
|
|
@@ -198,13 +190,59 @@ func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
|
|
|
}
|
|
|
|
|
|
func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
|
|
|
- for i := 0; i < nn.trainingCycles; i++ {
|
|
|
+ if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
|
|
|
+ nn.TeachOnline(teacher)
|
|
|
+ } else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
|
|
|
+ nn.TeachBatch(teacher)
|
|
|
+ } else {
|
|
|
+ panic("Invalid gradient descent type")
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *NeuralNetwork) TeachOnline(teacher teach.Teacher) {
|
|
|
+ for t := 0; t < nn.epocs; t++ {
|
|
|
+ for teacher.NextData() {
|
|
|
+ dB, dW := nn.backward(teacher.GetData())
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("bGradient is not a OnlineGradientDescent")
|
|
|
+ }
|
|
|
+ wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("wGradient is not a OnlineGradientDescent")
|
|
|
+ }
|
|
|
+ nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
|
|
|
+ nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ teacher.Reset()
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func (nn *NeuralNetwork) TeachBatch(teacher teach.Teacher) {
|
|
|
+ for t := 0; t < nn.epocs; t++ {
|
|
|
for teacher.NextData() {
|
|
|
- nn.backward(teacher.GetData())
|
|
|
+ dB, dW := nn.backward(teacher.GetData())
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("bGradient is not a BatchGradientDescent")
|
|
|
+ }
|
|
|
+ wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
|
|
|
+ if !ok {
|
|
|
+ panic("wGradient is not a BatchGradientDescent")
|
|
|
+ }
|
|
|
+ bGradient.AccumGradients(dB[l])
|
|
|
+ wGradient.AccumGradients(dW[l])
|
|
|
+ }
|
|
|
}
|
|
|
- for l := 1; l < nn.Count; l++ {
|
|
|
- nn.Biases[l] = nn.BGradient[l].ApplyDelta(nn.Biases[l], &mat.Dense{})
|
|
|
- nn.Weights[l] = nn.WGradient[l].ApplyDelta(nn.Weights[l], &mat.Dense{})
|
|
|
+ teacher.Reset()
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ bGradient := nn.BGradient[l].(BatchGradientDescent)
|
|
|
+ wGradient := nn.WGradient[l].(BatchGradientDescent)
|
|
|
+ nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
|
|
|
+ nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -212,15 +250,15 @@ func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
|
|
|
func (nn *NeuralNetwork) SaveState(writer io.Writer) {
|
|
|
//save input array count
|
|
|
bufferSize := make([]byte, 4)
|
|
|
- binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
|
|
|
+ binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
|
|
|
_, err := writer.Write(bufferSize)
|
|
|
|
|
|
check(err)
|
|
|
- fmt.Printf("wrote value %d\n", uint32(nn.Count))
|
|
|
+ fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
|
|
|
|
|
|
// save an input array
|
|
|
- buffer := make([]byte, nn.Count*4)
|
|
|
- for i := 0; i < nn.Count; i++ {
|
|
|
+ buffer := make([]byte, nn.LayerCount*4)
|
|
|
+ for i := 0; i < nn.LayerCount; i++ {
|
|
|
binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
|
|
|
}
|
|
|
|
|
@@ -230,26 +268,26 @@ func (nn *NeuralNetwork) SaveState(writer io.Writer) {
|
|
|
|
|
|
//save biases
|
|
|
////////////////////////
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
+ for i := 1; i < nn.LayerCount; i++ {
|
|
|
saveDense(writer, nn.Biases[i])
|
|
|
}
|
|
|
|
|
|
//save weights
|
|
|
////////////////////////
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
+ for i := 1; i < nn.LayerCount; i++ {
|
|
|
saveDense(writer, nn.Weights[i])
|
|
|
}
|
|
|
}
|
|
|
|
|
|
func (nn *NeuralNetwork) LoadState(reader io.Reader) {
|
|
|
// Reade count
|
|
|
- nn.Count = readInt(reader)
|
|
|
+ nn.LayerCount = readInt(reader)
|
|
|
|
|
|
// Read an input array
|
|
|
- sizeBuffer := readByteArray(reader, nn.Count*4)
|
|
|
- nn.Sizes = make([]int, nn.Count)
|
|
|
+ sizeBuffer := readByteArray(reader, nn.LayerCount*4)
|
|
|
+ nn.Sizes = make([]int, nn.LayerCount)
|
|
|
|
|
|
- for i := 0; i < nn.Count; i++ {
|
|
|
+ for i := 0; i < nn.LayerCount; i++ {
|
|
|
nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:]))
|
|
|
// fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i])
|
|
|
}
|
|
@@ -259,20 +297,20 @@ func (nn *NeuralNetwork) LoadState(reader io.Reader) {
|
|
|
|
|
|
// read Biases
|
|
|
nn.Biases[0] = &mat.Dense{}
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
+ for i := 1; i < nn.LayerCount; i++ {
|
|
|
nn.Biases = append(nn.Biases, &mat.Dense{})
|
|
|
nn.Biases[i] = readDense(reader, nn.Biases[i])
|
|
|
}
|
|
|
|
|
|
// read Weights
|
|
|
nn.Weights[0] = &mat.Dense{}
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
+ for i := 1; i < nn.LayerCount; i++ {
|
|
|
nn.Weights = append(nn.Weights, &mat.Dense{})
|
|
|
nn.Weights[i] = readDense(reader, nn.Weights[i])
|
|
|
}
|
|
|
|
|
|
- nn.A = make([]*mat.Dense, nn.Count)
|
|
|
- nn.Z = make([]*mat.Dense, nn.Count)
|
|
|
+ nn.A = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ nn.Z = make([]*mat.Dense, nn.LayerCount)
|
|
|
|
|
|
// fmt.Printf("\nLoadState end\n")
|
|
|
}
|
|
@@ -280,32 +318,36 @@ func (nn *NeuralNetwork) LoadState(reader io.Reader) {
|
|
|
func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
|
|
|
nn.A[0] = mat.DenseCopyOf(aIn)
|
|
|
|
|
|
- for i := 1; i < nn.Count; i++ {
|
|
|
- nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
|
|
|
- aSrc := nn.A[i-1]
|
|
|
- aDst := nn.A[i]
|
|
|
+ for l := 1; l < nn.LayerCount; l++ {
|
|
|
+ nn.A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
|
|
|
+ aSrc := nn.A[l-1]
|
|
|
+ aDst := nn.A[l]
|
|
|
|
|
|
// Each iteration implements formula bellow for neuron activation values
|
|
|
// A[l]=σ(W[l]*A[l−1]+B[l])
|
|
|
|
|
|
// W[l]*A[l−1]
|
|
|
- aDst.Mul(nn.Weights[i], aSrc)
|
|
|
+ aDst.Mul(nn.Weights[l], aSrc)
|
|
|
|
|
|
// W[l]*A[l−1]+B[l]
|
|
|
- aDst.Add(aDst, nn.Biases[i])
|
|
|
+ aDst.Add(aDst, nn.Biases[l])
|
|
|
|
|
|
// Save raw activation value for back propagation
|
|
|
- nn.Z[i] = mat.DenseCopyOf(aDst)
|
|
|
+ nn.Z[l] = mat.DenseCopyOf(aDst)
|
|
|
|
|
|
// σ(W[l]*A[l−1]+B[l])
|
|
|
aDst.Apply(applySigmoid, aDst)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
|
+// Function returns calculated bias and weights derivatives for each
|
|
|
+// layer arround aIn/aOut datasets
|
|
|
+func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
|
|
|
nn.forward(aIn)
|
|
|
|
|
|
- lastLayerNum := nn.Count - 1
|
|
|
+ lastLayerNum := nn.LayerCount - 1
|
|
|
+ dB = make([]*mat.Dense, nn.LayerCount)
|
|
|
+ dW = make([]*mat.Dense, nn.LayerCount)
|
|
|
|
|
|
// To calculate new values of weights and biases
|
|
|
// following formulas are used:
|
|
@@ -336,24 +378,11 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
|
weights := &mat.Dense{}
|
|
|
weights.Mul(delta, nn.A[lastLayerNum-1].T())
|
|
|
|
|
|
- // fmt.Printf("Prev biases[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Biases[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Prev weights[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Weights[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
-
|
|
|
- // fmt.Printf("Expect[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(aOut, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Result[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.result(), mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("nn.Z[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Z[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("sigmoidsPrime[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(sigmoidsPrime, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Err[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(err, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Biases gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
-
|
|
|
// Initialize new weights and biases values with last layer values
|
|
|
- // newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
|
|
|
- // newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
|
|
|
+ dB[lastLayerNum] = biases
|
|
|
+ dW[lastLayerNum] = weights
|
|
|
|
|
|
- nn.BGradient[lastLayerNum].AccumGradients(biases)
|
|
|
- nn.WGradient[lastLayerNum].AccumGradients(weights)
|
|
|
- // Next layer Weights and Biases are calculated using same formulas:
|
|
|
+ // Next layer derivatives of Weights and Biases are calculated using same formulas:
|
|
|
// ∂E/∂W[l] = A[l−1]*δ[l]
|
|
|
// ∂E/∂B[l] = δ[l]
|
|
|
|
|
@@ -361,7 +390,7 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
|
// δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
|
|
|
// Where Wt[l+1] is transposed matrix of actual Weights from
|
|
|
// forward step
|
|
|
- for l := nn.Count - 2; l > 0; l-- {
|
|
|
+ for l := nn.LayerCount - 2; l > 0; l-- {
|
|
|
// Calculate sigmoids prime σ'(Z[l]) for last layer l
|
|
|
sigmoidsPrime := &mat.Dense{}
|
|
|
sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
|
|
@@ -412,23 +441,12 @@ func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
|
|
|
weights := &mat.Dense{}
|
|
|
weights.Mul(delta, nn.A[l-1].T())
|
|
|
|
|
|
- // fmt.Printf("Weights gradient[%v]:\n%v\n\n", l, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
- // fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
|
|
|
-
|
|
|
- // !Prepend! new Biases and Weights
|
|
|
- // newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
|
|
|
- // newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
|
|
|
- nn.BGradient[l].AccumGradients(biases)
|
|
|
- nn.WGradient[l].AccumGradients(weights)
|
|
|
+ dB[l] = biases
|
|
|
+ dW[l] = weights
|
|
|
}
|
|
|
-
|
|
|
- // newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
|
|
|
- // newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
|
|
|
-
|
|
|
- // nn.Biases = newBiases
|
|
|
- // nn.Weights = newWeights
|
|
|
+ return
|
|
|
}
|
|
|
|
|
|
func (nn *NeuralNetwork) result() *mat.Dense {
|
|
|
- return nn.A[nn.Count-1]
|
|
|
+ return nn.A[nn.LayerCount-1]
|
|
|
}
|