neuralnetwork.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>, Tatyana Borisova <tanusshhka@mail.ru>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetworkbase
  26. import (
  27. "encoding/binary"
  28. "errors"
  29. "fmt"
  30. "io"
  31. teach "../teach"
  32. mat "gonum.org/v1/gonum/mat"
  33. )
  34. // NeuralNetwork is simple neural network implementation
  35. //
  36. // Resources:
  37. // http://neuralnetworksanddeeplearning.com
  38. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  39. // http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
  40. //
  41. // Matrix: A
  42. // Description: A is set of calculated neuron activations after sigmoid correction
  43. // Format: 0 l L
  44. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  45. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  46. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  47. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  48. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  49. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  50. // Where s = Sizes[l] - Neural network layer size
  51. // L = len(Sizes) - Number of neural network layers
  52. //
  53. // Matrix: Z
  54. // Description: Z is set of calculated raw neuron activations
  55. // Format: 0 l L
  56. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  57. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  58. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  59. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  60. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  61. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  62. // Where s = Sizes[l] - Neural network layer size
  63. // L = len(Sizes) - Number of neural network layers
  64. //
  65. // Matrix: Biases
  66. // Description: Biases is set of biases per layer except l0
  67. // NOTE: l0 is always empty Dense because first layer
  68. // doesn't have connections to previous layer
  69. // Format: 1 l L
  70. // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
  71. // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
  72. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  73. // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
  74. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  75. // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
  76. // Where s = Sizes[l] - Neural network layer size
  77. // L = len(Sizes) - Number of neural network layers
  78. //
  79. // Matrix: Weights
  80. // Description: Weights is set of weights per layer except l0
  81. // NOTE: l0 is always empty Dense because first layer
  82. // doesn't have connections to previous layer
  83. // Format: 1 l L
  84. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  85. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  86. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  87. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  88. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  89. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  90. // Where s = Sizes[l] - Neural network layer size
  91. // s' = Sizes[l-1] - Previous neural network layer size
  92. // L = len(Sizes) - Number of neural network layers
  93. type NeuralNetwork struct {
  94. Count int
  95. Sizes []int
  96. Biases []*mat.Dense
  97. Weights []*mat.Dense
  98. A []*mat.Dense
  99. Z []*mat.Dense
  100. WGradient []Gradient
  101. BGradient []Gradient
  102. trainingCycles int
  103. }
  104. func NewNeuralNetwork(sizes []int, nu float64, trainingCycles int) (nn *NeuralNetwork, err error) {
  105. err = nil
  106. if len(sizes) < 3 {
  107. fmt.Printf("Invalid network configuration: %v\n", sizes)
  108. return nil, errors.New("Invalid network configuration: %v\n")
  109. }
  110. for i := 0; i < len(sizes); i++ {
  111. if sizes[i] < 2 {
  112. fmt.Printf("Invalid network configuration: %v\n", sizes)
  113. return nil, errors.New("Invalid network configuration: %v\n")
  114. }
  115. }
  116. if nu <= 0.0 || nu > 1.0 {
  117. fmt.Printf("Invalid η value: %v\n", nu)
  118. return nil, errors.New("Invalid η value: %v\n")
  119. }
  120. if trainingCycles <= 0 {
  121. fmt.Printf("Invalid training cycles number: %v\n", trainingCycles)
  122. return nil, errors.New("Invalid training cycles number: %v\n")
  123. }
  124. if trainingCycles < 100 {
  125. fmt.Println("Training cycles number probably is too small")
  126. }
  127. nn = &NeuralNetwork{}
  128. nn.Sizes = sizes
  129. nn.Count = len(sizes)
  130. nn.Weights = make([]*mat.Dense, nn.Count)
  131. nn.Biases = make([]*mat.Dense, nn.Count)
  132. nn.WGradient = make([]Gradient, nn.Count)
  133. nn.BGradient = make([]Gradient, nn.Count)
  134. nn.A = make([]*mat.Dense, nn.Count)
  135. nn.Z = make([]*mat.Dense, nn.Count)
  136. nn.trainingCycles = trainingCycles
  137. // alpha := nu / float64(nn.Sizes[0])
  138. for i := 1; i < nn.Count; i++ {
  139. nn.Weights[i] = generateRandomDense(nn.Sizes[i], nn.Sizes[i-1])
  140. nn.Biases[i] = generateRandomDense(nn.Sizes[i], 1)
  141. // nn.WGradient[i] = &BackPropGradient{alpha}
  142. // nn.BGradient[i] = &BackPropGradient{alpha}
  143. nn.WGradient[i] = NewRPropGradient(nn.Sizes[i], nn.Sizes[i-1])
  144. nn.BGradient[i] = NewRPropGradient(nn.Sizes[i], 1)
  145. }
  146. return
  147. }
  148. func (nn *NeuralNetwork) Copy() (out *NeuralNetwork) {
  149. out = &NeuralNetwork{}
  150. out.Sizes = nn.Sizes
  151. out.Count = nn.Count
  152. out.Weights = make([]*mat.Dense, nn.Count)
  153. out.Biases = make([]*mat.Dense, nn.Count)
  154. out.A = make([]*mat.Dense, nn.Count)
  155. out.Z = make([]*mat.Dense, nn.Count)
  156. out.trainingCycles = nn.trainingCycles
  157. for i := 1; i < out.Count; i++ {
  158. out.Weights[i] = mat.DenseCopyOf(nn.Weights[i])
  159. out.Biases[i] = mat.DenseCopyOf(nn.Biases[i])
  160. }
  161. return
  162. }
  163. func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  164. r, _ := aIn.Dims()
  165. if r != nn.Sizes[0] {
  166. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  167. return -1, 0.0
  168. }
  169. nn.forward(aIn)
  170. result := nn.result()
  171. r, _ = result.Dims()
  172. max = 0.0
  173. maxIndex = 0
  174. for i := 0; i < r; i++ {
  175. if result.At(i, 0) > max {
  176. max = result.At(i, 0)
  177. maxIndex = i
  178. }
  179. }
  180. return
  181. }
  182. func (nn *NeuralNetwork) Teach(teacher teach.Teacher) {
  183. for i := 0; i < nn.trainingCycles; i++ {
  184. for teacher.NextData() {
  185. nn.backward(teacher.GetData())
  186. }
  187. for l := 1; l < nn.Count; l++ {
  188. nn.Biases[l] = nn.BGradient[l].ApplyDelta(nn.Biases[l], &mat.Dense{})
  189. nn.Weights[l] = nn.WGradient[l].ApplyDelta(nn.Weights[l], &mat.Dense{})
  190. }
  191. }
  192. }
  193. func (nn *NeuralNetwork) SaveState(writer io.Writer) {
  194. //save input array count
  195. bufferSize := make([]byte, 4)
  196. binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.Count))
  197. _, err := writer.Write(bufferSize)
  198. check(err)
  199. fmt.Printf("wrote value %d\n", uint32(nn.Count))
  200. // save an input array
  201. buffer := make([]byte, nn.Count*4)
  202. for i := 0; i < nn.Count; i++ {
  203. binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
  204. }
  205. _, err = writer.Write(buffer)
  206. check(err)
  207. // fmt.Printf("wrote buffer %d bytes\n", n2)
  208. //save biases
  209. ////////////////////////
  210. for i := 1; i < nn.Count; i++ {
  211. saveDense(writer, nn.Biases[i])
  212. }
  213. //save weights
  214. ////////////////////////
  215. for i := 1; i < nn.Count; i++ {
  216. saveDense(writer, nn.Weights[i])
  217. }
  218. }
  219. func (nn *NeuralNetwork) LoadState(reader io.Reader) {
  220. // Reade count
  221. nn.Count = readInt(reader)
  222. // Read an input array
  223. sizeBuffer := readByteArray(reader, nn.Count*4)
  224. nn.Sizes = make([]int, nn.Count)
  225. for i := 0; i < nn.Count; i++ {
  226. nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:]))
  227. // fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i])
  228. }
  229. nn.Weights = []*mat.Dense{&mat.Dense{}}
  230. nn.Biases = []*mat.Dense{&mat.Dense{}}
  231. // read Biases
  232. nn.Biases[0] = &mat.Dense{}
  233. for i := 1; i < nn.Count; i++ {
  234. nn.Biases = append(nn.Biases, &mat.Dense{})
  235. nn.Biases[i] = readDense(reader, nn.Biases[i])
  236. }
  237. // read Weights
  238. nn.Weights[0] = &mat.Dense{}
  239. for i := 1; i < nn.Count; i++ {
  240. nn.Weights = append(nn.Weights, &mat.Dense{})
  241. nn.Weights[i] = readDense(reader, nn.Weights[i])
  242. }
  243. nn.A = make([]*mat.Dense, nn.Count)
  244. nn.Z = make([]*mat.Dense, nn.Count)
  245. // fmt.Printf("\nLoadState end\n")
  246. }
  247. func (nn *NeuralNetwork) forward(aIn mat.Matrix) {
  248. nn.A[0] = mat.DenseCopyOf(aIn)
  249. for i := 1; i < nn.Count; i++ {
  250. nn.A[i] = mat.NewDense(nn.Sizes[i], 1, nil)
  251. aSrc := nn.A[i-1]
  252. aDst := nn.A[i]
  253. // Each iteration implements formula bellow for neuron activation values
  254. // A[l]=σ(W[l]*A[l−1]+B[l])
  255. // W[l]*A[l−1]
  256. aDst.Mul(nn.Weights[i], aSrc)
  257. // W[l]*A[l−1]+B[l]
  258. aDst.Add(aDst, nn.Biases[i])
  259. // Save raw activation value for back propagation
  260. nn.Z[i] = mat.DenseCopyOf(aDst)
  261. // σ(W[l]*A[l−1]+B[l])
  262. aDst.Apply(applySigmoid, aDst)
  263. }
  264. }
  265. func (nn *NeuralNetwork) backward(aIn, aOut mat.Matrix) {
  266. nn.forward(aIn)
  267. lastLayerNum := nn.Count - 1
  268. // To calculate new values of weights and biases
  269. // following formulas are used:
  270. // ∂E/∂W[l] = A[l−1]*δ[l]
  271. // ∂E/∂B[l] = δ[l]
  272. // For last layer δ value is calculated by following:
  273. // δ = (A[L]−y)⊙σ'(Z[L])
  274. // Calculate initial error for last layer L
  275. // error = A[L]-y
  276. // Where y is expected activations set
  277. err := &mat.Dense{}
  278. err.Sub(nn.result(), aOut)
  279. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  280. sigmoidsPrime := &mat.Dense{}
  281. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[lastLayerNum])
  282. // (A[L]−y)⊙σ'(Z[L])
  283. delta := &mat.Dense{}
  284. delta.MulElem(err, sigmoidsPrime)
  285. // ∂E/∂B[L] = δ[L]
  286. biases := mat.DenseCopyOf(delta)
  287. // ∂E/∂W[L] = A[L−1]*δ[L]
  288. weights := &mat.Dense{}
  289. weights.Mul(delta, nn.A[lastLayerNum-1].T())
  290. // fmt.Printf("Prev biases[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Biases[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
  291. // fmt.Printf("Prev weights[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Weights[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
  292. // fmt.Printf("Expect[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(aOut, mat.Prefix(""), mat.Excerpt(0)))
  293. // fmt.Printf("Result[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.result(), mat.Prefix(""), mat.Excerpt(0)))
  294. // fmt.Printf("nn.Z[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(nn.Z[lastLayerNum], mat.Prefix(""), mat.Excerpt(0)))
  295. // fmt.Printf("sigmoidsPrime[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(sigmoidsPrime, mat.Prefix(""), mat.Excerpt(0)))
  296. // fmt.Printf("Err[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(err, mat.Prefix(""), mat.Excerpt(0)))
  297. // fmt.Printf("Biases gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
  298. // fmt.Printf("Weights gradient[%v]:\n%v\n\n", lastLayerNum, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
  299. // Initialize new weights and biases values with last layer values
  300. // newBiases := []*mat.Dense{nn.BGradient[lastLayerNum].ApplyDelta(nn.Biases[lastLayerNum], biases)}
  301. // newWeights := []*mat.Dense{nn.WGradient[lastLayerNum].ApplyDelta(nn.Weights[lastLayerNum], weights)}
  302. nn.BGradient[lastLayerNum].AccumGradients(biases)
  303. nn.WGradient[lastLayerNum].AccumGradients(weights)
  304. // Next layer Weights and Biases are calculated using same formulas:
  305. // ∂E/∂W[l] = A[l−1]*δ[l]
  306. // ∂E/∂B[l] = δ[l]
  307. // But δ[l] is calculated using different formula:
  308. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  309. // Where Wt[l+1] is transposed matrix of actual Weights from
  310. // forward step
  311. for l := nn.Count - 2; l > 0; l-- {
  312. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  313. sigmoidsPrime := &mat.Dense{}
  314. sigmoidsPrime.Apply(applySigmoidPrime, nn.Z[l])
  315. // (Wt[l+1])*δ[l+1]
  316. // err bellow is delta from previous step(l+1)
  317. wdelta := &mat.Dense{}
  318. wdelta.Mul(nn.Weights[l+1].T(), delta)
  319. // Calculate new delta and store it to temporary variable err
  320. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  321. delta = &mat.Dense{}
  322. delta.MulElem(wdelta, sigmoidsPrime)
  323. // ∂E/∂B[l] = δ[l]
  324. biases := mat.DenseCopyOf(delta)
  325. // ∂E/∂W[l] = A[l−1]*δ[l]
  326. // At this point it's required to give explanation for inaccuracy
  327. // in the formula
  328. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  329. // because view of matrices are following:
  330. // A[l-1] δ[l]
  331. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  332. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  333. // ⎢ ... ⎥ ⎢ ... ⎥
  334. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  335. // ⎢ ... ⎥ ⎢ ... ⎥
  336. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  337. // So we need to modify these matrices to apply mutiplications and got
  338. // Weights matrix of following view:
  339. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  340. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  341. // ⎢ ... ⎥
  342. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  343. // ⎢ ... ⎥
  344. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  345. // So we swap matrices and transpose A[l-1] to get valid multiplication
  346. // of following view:
  347. // δ[l] A[l-1]
  348. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  349. // ⎢δ[1] ⎥
  350. // ⎢ ... ⎥
  351. // ⎢δ[i] ⎥
  352. // ⎢ ... ⎥
  353. // ⎣δ[s] ⎦
  354. weights := &mat.Dense{}
  355. weights.Mul(delta, nn.A[l-1].T())
  356. // fmt.Printf("Weights gradient[%v]:\n%v\n\n", l, mat.Formatted(weights, mat.Prefix(""), mat.Excerpt(0)))
  357. // fmt.Printf("Biases gradient[%v]:\n%v\n\n", l, mat.Formatted(biases, mat.Prefix(""), mat.Excerpt(0)))
  358. // !Prepend! new Biases and Weights
  359. // newBiases = append([]*mat.Dense{nn.BGradient[l].ApplyDelta(nn.Biases[l], biases)}, newBiases...)
  360. // newWeights = append([]*mat.Dense{nn.WGradient[l].ApplyDelta(nn.Weights[l], weights)}, newWeights...)
  361. nn.BGradient[l].AccumGradients(biases)
  362. nn.WGradient[l].AccumGradients(weights)
  363. }
  364. // newBiases = append([]*mat.Dense{&mat.Dense{}}, newBiases...)
  365. // newWeights = append([]*mat.Dense{&mat.Dense{}}, newWeights...)
  366. // nn.Biases = newBiases
  367. // nn.Weights = newWeights
  368. }
  369. func (nn *NeuralNetwork) result() *mat.Dense {
  370. return nn.A[nn.Count-1]
  371. }