neuralnetwork.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetworkbase
  26. import (
  27. teach "../teach"
  28. mat "gonum.org/v1/gonum/mat"
  29. )
  30. // NeuralNetwork is simple neural network implementation
  31. //
  32. // Resources:
  33. // http://neuralnetworksanddeeplearning.com
  34. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  35. //
  36. // Matrix: A
  37. // Description: A is set of calculated neuron activations after sigmoid correction
  38. // Format: 0 n N
  39. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  40. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  41. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  42. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  43. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  44. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  45. // Where s = Sizes[n], N = len(Sizes)
  46. //
  47. // Matrix: Z
  48. // Description: Z is set of calculated raw neuron activations
  49. // Format: 0 n N
  50. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  51. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  52. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  53. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  54. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  55. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  56. // Where s = Sizes[n], N = len(Sizes)
  57. //
  58. // Matrix: Biases
  59. // Description: Biases is set of biases per layer except L0
  60. // Format:
  61. // ⎡b[0] ⎤
  62. // ⎢b[1] ⎥
  63. // ⎢ ... ⎥
  64. // ⎢b[i] ⎥
  65. // ⎢ ... ⎥
  66. // ⎣b[s] ⎦
  67. // Where s = Sizes[n]
  68. //
  69. // Matrix: Weights
  70. // Description: Weights is set of weights per layer except L0
  71. // Format:
  72. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  73. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  74. // ⎢ ... ⎥
  75. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  76. // ⎢ ... ⎥
  77. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  78. // Where s = Sizes[n], s' = Sizes[n-1]
  79. type NeuralNetwork struct {
  80. Count int
  81. Sizes []int
  82. Biases []*mat.Dense
  83. Weights []*mat.Dense
  84. A []*mat.Dense
  85. Z []*mat.Dense
  86. alpha float64
  87. trainingCycles int
  88. }
  89. func NewNeuralNetwork(Sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork) {
  90. nn = &NeuralNetwork{}
  91. nn.Sizes = Sizes
  92. nn.Count = len(Sizes)
  93. nn.Weights = make([]*mat.Dense, nn.Count)
  94. nn.Biases = make([]*mat.Dense, nn.Count)
  95. nn.A = make([]*mat.Dense, nn.Count)
  96. nn.Z = make([]*mat.Dense, nn.Count)
  97. nn.alpha = nu / float64(nn.Sizes[0])
  98. nn.trainingCycles = trainingCycles
  99. for i := 1; i < nn.Count; i++ {
  100. nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
  101. nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
  102. }
  103. return
  104. }
  105. func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  106. nn.forward(aIn)
  107. result := nn.result()
  108. r, _ := result.Dims()
  109. max = 0.0
  110. maxIndex = 0
  111. for i := 0; i < r; i++ {
  112. if result.At(i, 0) > max {
  113. max = result.At(i, 0)
  114. maxIndex = i
  115. }
  116. }
  117. return
  118. }
  119. func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
  120. for i := 0; i < nn.trainingCycles; i++ {
  121. for teacher.Next() {
  122. nn.backward(teacher.GetData(), teacher.GetExpect())
  123. }
  124. }
  125. }
  126. func (nn *NeuralNetwork) SaveState(filename string) {
  127. }
  128. func (nn *NeuralNetwork) LoadState(filename string) {
  129. }
  130. func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
  131. nn.A[0] = mat.DenseCopyOf(aIn)
  132. for i := 1; i < nn.Count; i++ {
  133. nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
  134. aSrc := nn.A[i-1]
  135. aDst := nn.A[i]
  136. // Each iteration implements formula bellow for neuron activation values
  137. // A[l]=σ(W[l]*A[l−1]+B[l])
  138. // W[l]*A[l−1]
  139. aDst.Mul(nn.Weights[i], aSrc)
  140. // W[l]*A[l−1]+B[l]
  141. aDst.Add(aDst, nn.Biases[i])
  142. // Save raw activation value for back propagation
  143. nn.Z[i] = mat.DenseCopyOf(aDst)
  144. // σ(W[l]*A[l−1]+B[l])
  145. aDst.Apply(applySigmoid, aDst)
  146. }
  147. }
  148. func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
  149. nn.forward(aIn)
  150. lastLayerNum := nn.Count - 1
  151. // To calculate new values of weights and biases
  152. // following formulas are used:
  153. // W[l] = A[l−1]*δ[l]
  154. // B[l] = δ[l]
  155. // For last layer δ value is calculated by following:
  156. // δ = (A[L]−y)⊙σ'(Z[L])
  157. // Calculate initial error for last layer L
  158. // error = A[L]-y
  159. // Where y is expected activations set
  160. err := &mat.Dense{}
  161. err.Sub(nn.result(), aOut)
  162. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  163. sigmoidsPrime := &mat.Dense{}
  164. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
  165. // (A[L]−y)⊙σ'(Z[L])
  166. delta := &mat.Dense{}
  167. delta.MulElem(err, sigmoidsPrime)
  168. // B[L] = δ[L]
  169. biases := mat.DenseCopyOf(delta)
  170. // W[L] = A[L−1]*δ[L]
  171. weights := &mat.Dense{}
  172. weights.Mul(delta, nn.A[lastLayerNum-1].T())
  173. // Initialize new weights and biases values with last layer values
  174. newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
  175. newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
  176. // Save calculated delta value temporary error variable
  177. err = delta
  178. // Next layer Weights and Biases are calculated using same formulas:
  179. // W[l] = A[l−1]*δ[l]
  180. // B[l] = δ[l]
  181. // But δ[l] is calculated using different formula:
  182. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  183. // Where Wt[l+1] is transposed matrix of actual Weights from
  184. // forward step
  185. for l := nn.Count - 2; l > 0; l-- {
  186. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  187. sigmoidsPrime := &mat.Dense{}
  188. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
  189. // (Wt[l+1])*δ[l+1]
  190. // err bellow is delta from previous step(l+1)
  191. delta := &mat.Dense{}
  192. wdelta := &mat.Dense{}
  193. wdelta.Mul(nn.Weights[l+1].T(), err)
  194. // Calculate new delta and store it to temporary variable err
  195. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  196. delta.MulElem(wdelta, sigmoidsPrime)
  197. err = delta
  198. // B[l] = δ[l]
  199. biases := mat.DenseCopyOf(delta)
  200. // W[l] = A[l−1]*δ[l]
  201. // At this point it's required to give explanation for inaccuracy
  202. // in the formula
  203. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  204. // because view of matrices are following:
  205. // A[l-1] δ[l]
  206. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  207. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  208. // ⎢ ... ⎥ ⎢ ... ⎥
  209. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  210. // ⎢ ... ⎥ ⎢ ... ⎥
  211. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  212. // So we need to modify these matrices to apply mutiplications and got
  213. // Weights matrix of following view:
  214. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  215. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  216. // ⎢ ... ⎥
  217. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  218. // ⎢ ... ⎥
  219. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  220. // So we swap matrices and transpose A[l-1] to get valid multiplication
  221. // of following view:
  222. // δ[l] A[l-1]
  223. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  224. // ⎢δ[1] ⎥
  225. // ⎢ ... ⎥
  226. // ⎢δ[i] ⎥
  227. // ⎢ ... ⎥
  228. // ⎣δ[s] ⎦
  229. weights := &mat.Dense{}
  230. weights.Mul(delta, nn.A[l-1].T())
  231. // !Prepend! new Biases and Weights
  232. newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
  233. newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
  234. }
  235. newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
  236. newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
  237. nn.Biases = newBiases
  238. nn.Weights = newWeights
  239. }
  240. func (nn *NeuralNetwork) result() *mat.Dense {
  241. return nn.A[nn.Count-1]
  242. }