rprop.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetworkbase
  26. import (
  27. "errors"
  28. "fmt"
  29. "io"
  30. teach "../teach"
  31. mat "gonum.org/v1/gonum/mat"
  32. )
  33. // NeuralNetwork is simple neural network implementation
  34. //
  35. // Resources:
  36. // http://neuralnetworksanddeeplearning.com
  37. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  38. //
  39. // Matrix: A
  40. // Description: A is set of calculated neuron activations after sigmoid correction
  41. // Format: 0 l L
  42. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  43. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  44. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  45. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  46. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  47. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  48. // Where s = Sizes[l] - Neural network layer size
  49. // L = len(Sizes) - Number of neural network layers
  50. //
  51. // Matrix: Z
  52. // Description: Z is set of calculated raw neuron activations
  53. // Format: 0 l L
  54. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  55. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  56. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  57. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  58. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  59. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  60. // Where s = Sizes[l] - Neural network layer size
  61. // L = len(Sizes) - Number of neural network layers
  62. //
  63. // Matrix: Biases
  64. // Description: Biases is set of biases per layer except l0
  65. // NOTE: l0 is always empty Dense because first layer
  66. // doesn't have connections to previous layer
  67. // Format: 1 l L
  68. // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
  69. // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
  70. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  71. // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
  72. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  73. // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
  74. // Where s = Sizes[l] - Neural network layer size
  75. // L = len(Sizes) - Number of neural network layers
  76. //
  77. // Matrix: Weights
  78. // Description: Weights is set of weights per layer except l0
  79. // NOTE: l0 is always empty Dense because first layer
  80. // doesn't have connections to previous layer
  81. // Format: 1 l L
  82. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  83. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  84. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  85. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  86. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  87. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  88. // Where s = Sizes[l] - Neural network layer size
  89. // s' = Sizes[l-1] - Previous neural network layer size
  90. // L = len(Sizes) - Number of neural network layers
  91. type RProp struct {
  92. Count int
  93. Sizes []int
  94. Biases []*mat.Dense
  95. Weights []*mat.Dense
  96. A []*mat.Dense
  97. Z []*mat.Dense
  98. alpha float64
  99. trainingCycles int
  100. }
  101. func NewRProp(sizes []int, nu float64, trainingCycles int) (nn *RProp, err error) {
  102. err = nil
  103. if len(sizes) < 3 {
  104. fmt.Printf("Invalid network configuration: %v\n", sizes)
  105. return nil, errors.New("Invalid network configuration: %v\n")
  106. }
  107. for i := 0; i < len(sizes); i++ {
  108. if sizes[i] < 2 {
  109. fmt.Printf("Invalid network configuration: %v\n", sizes)
  110. return nil, errors.New("Invalid network configuration: %v\n")
  111. }
  112. }
  113. if nu <= 0.0 || nu > 1.0 {
  114. fmt.Printf("Invalid η value: %v\n", nu)
  115. return nil, errors.New("Invalid η value: %v\n")
  116. }
  117. if trainingCycles <= 0 {
  118. fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
  119. return nil, errors.New("Invalid training cycles number: %v\n")
  120. }
  121. if trainingCycles < 100 {
  122. fmt.Println("Training cycles number probably is too small")
  123. }
  124. nn = &RProp{}
  125. nn.Sizes = sizes
  126. nn.Count = len(sizes)
  127. nn.Weights = make([]*mat.Dense, nn.Count)
  128. nn.Biases = make([]*mat.Dense, nn.Count)
  129. nn.A = make([]*mat.Dense, nn.Count)
  130. nn.Z = make([]*mat.Dense, nn.Count)
  131. nn.alpha = nu / float64(nn.Sizes[0])
  132. nn.trainingCycles = trainingCycles
  133. for i := 1; i < nn.Count; i++ {
  134. nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
  135. nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
  136. }
  137. return
  138. }
  139. func (nn *RProp) Copy() (out *RProp) {
  140. out = &RProp{}
  141. out.Sizes = nn.Sizes
  142. out.Count = nn.Count
  143. out.Weights = make([]*mat.Dense, nn.Count)
  144. out.Biases = make([]*mat.Dense, nn.Count)
  145. out.A = make([]*mat.Dense, nn.Count)
  146. out.Z = make([]*mat.Dense, nn.Count)
  147. out.alpha = nn.alpha
  148. out.trainingCycles = nn.trainingCycles
  149. for i := 1; i < out.Count; i++ {
  150. nn.Weights[i] = mat.DenseCopyOf(out.Weights[i])
  151. nn.Biases[i] = mat.DenseCopyOf(out.Biases[i])
  152. }
  153. return
  154. }
  155. func (nn *RProp) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  156. r, _ := aIn.Dims()
  157. if r != nn.Sizes[0] {
  158. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  159. return -1, 0.0
  160. }
  161. nn.forward(aIn)
  162. result := nn.result()
  163. r, _ = result.Dims()
  164. max = 0.0
  165. maxIndex = 0
  166. for i := 0; i < r; i++ {
  167. if result.At(i, 0) > max {
  168. max = result.At(i, 0)
  169. maxIndex = i
  170. }
  171. }
  172. return
  173. }
  174. func (nn *RProp) Teach(teacher teach.Teacher) {
  175. for i := 0; i < nn.trainingCycles; i++ {
  176. for teacher.NextData() {
  177. nn.backward(teacher.GetData())
  178. }
  179. }
  180. }
  181. func (nn *RProp) SaveState(writer io.Writer) {
  182. }
  183. func (nn *RProp) LoadState(reader io.Reader) {
  184. }
  185. func (nn *RProp) forward(aIn mat.Matrix) {
  186. nn.A[0] = mat.DenseCopyOf(aIn)
  187. for i := 1; i < nn.Count; i++ {
  188. nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
  189. aSrc := nn.A[i-1]
  190. aDst := nn.A[i]
  191. // Each iteration implements formula bellow for neuron activation values
  192. // A[l]=σ(W[l]*A[l−1]+B[l])
  193. // W[l]*A[l−1]
  194. aDst.Mul(nn.Weights[i], aSrc)
  195. // W[l]*A[l−1]+B[l]
  196. aDst.Add(aDst, nn.Biases[i])
  197. // Save raw activation value for back propagation
  198. nn.Z[i] = mat.DenseCopyOf(aDst)
  199. // σ(W[l]*A[l−1]+B[l])
  200. aDst.Apply(applySigmoid, aDst)
  201. }
  202. }
  203. func (nn *RProp) backward(aIn, aOut mat.Matrix) {
  204. nn.forward(aIn)
  205. lastLayerNum := nn.Count - 1
  206. // To calculate new values of weights and biases
  207. // following formulas are used:
  208. // W[l] = A[l−1]*δ[l]
  209. // B[l] = δ[l]
  210. // For last layer δ value is calculated by following:
  211. // δ = (A[L]−y)⊙σ'(Z[L])
  212. // Calculate initial error for last layer L
  213. // error = A[L]-y
  214. // Where y is expected activations set
  215. err := &mat.Dense{}
  216. err.Sub(nn.result(), aOut)
  217. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  218. sigmoidsPrime := &mat.Dense{}
  219. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
  220. // (A[L]−y)⊙σ'(Z[L])
  221. delta := &mat.Dense{}
  222. delta.MulElem(err, sigmoidsPrime)
  223. // B[L] = δ[L]
  224. biases := mat.DenseCopyOf(delta)
  225. // W[L] = A[L−1]*δ[L]
  226. weights := &mat.Dense{}
  227. weights.Mul(delta, nn.A[lastLayerNum-1].T())
  228. // Initialize new weights and biases values with last layer values
  229. newBiases := []*mat.Dense{makeBackGradient(biases, nn.Biases[lastLayerNum], nn.alpha)}
  230. newWeights := []*mat.Dense{makeBackGradient(weights, nn.Weights[lastLayerNum], nn.alpha)}
  231. // Save calculated delta value temporary error variable
  232. err = delta
  233. // Next layer Weights and Biases are calculated using same formulas:
  234. // W[l] = A[l−1]*δ[l]
  235. // B[l] = δ[l]
  236. // But δ[l] is calculated using different formula:
  237. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  238. // Where Wt[l+1] is transposed matrix of actual Weights from
  239. // forward step
  240. for l := nn.Count - 2; l > 0; l-- {
  241. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  242. sigmoidsPrime := &mat.Dense{}
  243. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
  244. // (Wt[l+1])*δ[l+1]
  245. // err bellow is delta from previous step(l+1)
  246. delta := &mat.Dense{}
  247. wdelta := &mat.Dense{}
  248. wdelta.Mul(nn.Weights[l+1].T(), err)
  249. // Calculate new delta and store it to temporary variable err
  250. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  251. delta.MulElem(wdelta, sigmoidsPrime)
  252. err = delta
  253. // B[l] = δ[l]
  254. biases := mat.DenseCopyOf(delta)
  255. // W[l] = A[l−1]*δ[l]
  256. // At this point it's required to give explanation for inaccuracy
  257. // in the formula
  258. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  259. // because view of matrices are following:
  260. // A[l-1] δ[l]
  261. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  262. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  263. // ⎢ ... ⎥ ⎢ ... ⎥
  264. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  265. // ⎢ ... ⎥ ⎢ ... ⎥
  266. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  267. // So we need to modify these matrices to apply mutiplications and got
  268. // Weights matrix of following view:
  269. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  270. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  271. // ⎢ ... ⎥
  272. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  273. // ⎢ ... ⎥
  274. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  275. // So we swap matrices and transpose A[l-1] to get valid multiplication
  276. // of following view:
  277. // δ[l] A[l-1]
  278. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  279. // ⎢δ[1] ⎥
  280. // ⎢ ... ⎥
  281. // ⎢δ[i] ⎥
  282. // ⎢ ... ⎥
  283. // ⎣δ[s] ⎦
  284. weights := &mat.Dense{}
  285. weights.Mul(delta, nn.A[l-1].T())
  286. // !Prepend! new Biases and Weights
  287. newBiases = append([]*mat.Dense{makeBackGradient(biases, nn.Biases[l], nn.alpha)}, newBiases...)
  288. newWeights = append([]*mat.Dense{makeBackGradient(weights, nn.Weights[l], nn.alpha)}, newWeights...)
  289. }
  290. newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
  291. newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
  292. nn.Biases = newBiases
  293. nn.Weights = newWeights
  294. }
  295. func (nn *RProp) result() *mat.Dense {
  296. return nn.A[nn.Count-1]
  297. }