neuralnetwork.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>, Tatyana Borisova <tanusshhka@mail.ru>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetwork
  26. import (
  27. "encoding/binary"
  28. "errors"
  29. "fmt"
  30. "io"
  31. "runtime"
  32. "sync"
  33. "time"
  34. training "../training"
  35. mat "gonum.org/v1/gonum/mat"
  36. )
  37. // NeuralNetwork is simple neural network implementation
  38. //
  39. // Resources:
  40. // http://neuralnetworksanddeeplearning.com
  41. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  42. // http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
  43. //
  44. // Matrix: A
  45. // Description: A is set of calculated neuron activations after sigmoid correction
  46. // Format: 0 l L
  47. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  48. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  49. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  50. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  51. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  52. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  53. // Where s = Sizes[l] - Neural network layer size
  54. // L = len(Sizes) - Number of neural network layers
  55. //
  56. // Matrix: Z
  57. // Description: Z is set of calculated raw neuron activations
  58. // Format: 0 l L
  59. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  60. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  61. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  62. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  63. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  64. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  65. // Where s = Sizes[l] - Neural network layer size
  66. // L = len(Sizes) - Number of neural network layers
  67. //
  68. // Matrix: Biases
  69. // Description: Biases is set of biases per layer except l0
  70. // NOTE: l0 is always empty Dense because first layer
  71. // doesn't have connections to previous layer
  72. // Format: 1 l L
  73. // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
  74. // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
  75. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  76. // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
  77. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  78. // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
  79. // Where s = Sizes[l] - Neural network layer size
  80. // L = len(Sizes) - Number of neural network layers
  81. //
  82. // Matrix: Weights
  83. // Description: Weights is set of weights per layer except l0
  84. // NOTE: l0 is always empty Dense because first layer
  85. // doesn't have connections to previous layer
  86. // Format: 1 l L
  87. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  88. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  89. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  90. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  91. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  92. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  93. // Where s = Sizes[l] - Neural network layer size
  94. // s' = Sizes[l-1] - Previous neural network layer size
  95. // L = len(Sizes) - Number of neural network layers
  96. type NeuralNetwork struct {
  97. LayerCount int
  98. Sizes []int
  99. Biases []*mat.Dense
  100. Weights []*mat.Dense
  101. BGradient []interface{}
  102. WGradient []interface{}
  103. gradientDescentInitializer GradientDescentInitializer
  104. watcher StateWatcher
  105. }
  106. func NewNeuralNetwork(sizes []int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
  107. err = nil
  108. if len(sizes) < 3 {
  109. fmt.Printf("Invalid network configuration: %v\n", sizes)
  110. return nil, errors.New("Invalid network configuration: %v\n")
  111. }
  112. for i := 0; i < len(sizes); i++ {
  113. if sizes[i] < 2 {
  114. fmt.Printf("Invalid network configuration: %v\n", sizes)
  115. return nil, errors.New("Invalid network configuration: %v\n")
  116. }
  117. }
  118. lenSizes := len(sizes)
  119. nn = &NeuralNetwork{
  120. Sizes: sizes,
  121. LayerCount: len(sizes),
  122. Biases: make([]*mat.Dense, lenSizes),
  123. Weights: make([]*mat.Dense, lenSizes),
  124. BGradient: make([]interface{}, lenSizes),
  125. WGradient: make([]interface{}, lenSizes),
  126. gradientDescentInitializer: gradientDescentInitializer,
  127. }
  128. for l := 1; l < nn.LayerCount; l++ {
  129. nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
  130. nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
  131. if nn.gradientDescentInitializer != nil {
  132. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  133. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  134. }
  135. }
  136. return
  137. }
  138. func (nn *NeuralNetwork) Copy() (outNN *NeuralNetwork) {
  139. outNN = &NeuralNetwork{
  140. Sizes: nn.Sizes,
  141. LayerCount: len(nn.Sizes),
  142. Biases: make([]*mat.Dense, nn.LayerCount),
  143. Weights: make([]*mat.Dense, nn.LayerCount),
  144. BGradient: make([]interface{}, nn.LayerCount),
  145. WGradient: make([]interface{}, nn.LayerCount),
  146. gradientDescentInitializer: nn.gradientDescentInitializer,
  147. watcher: nn.watcher,
  148. }
  149. for l := 1; l < outNN.LayerCount; l++ {
  150. outNN.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
  151. outNN.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
  152. if outNN.gradientDescentInitializer != nil {
  153. outNN.BGradient[l] = outNN.gradientDescentInitializer(outNN, l, BiasGradient)
  154. outNN.WGradient[l] = outNN.gradientDescentInitializer(outNN, l, WeightGradient)
  155. }
  156. }
  157. return
  158. }
  159. func (nn *NeuralNetwork) SetStateWatcher(watcher StateWatcher) {
  160. nn.watcher = watcher
  161. if watcher != nil {
  162. watcher.Init(nn)
  163. watcher.UpdateState(StateIdle)
  164. }
  165. }
  166. func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  167. if nn.watcher != nil {
  168. nn.watcher.UpdateState(StatePredict)
  169. defer nn.watcher.UpdateState(StateIdle)
  170. }
  171. r, _ := aIn.Dims()
  172. if r != nn.Sizes[0] {
  173. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  174. return -1, 0.0
  175. }
  176. A, _ := nn.forward(aIn)
  177. result := A[nn.LayerCount-1]
  178. r, _ = result.Dims()
  179. max = 0.0
  180. maxIndex = 0
  181. for i := 0; i < r; i++ {
  182. if result.At(i, 0) > max {
  183. max = result.At(i, 0)
  184. maxIndex = i
  185. }
  186. }
  187. return
  188. }
  189. func (nn *NeuralNetwork) Train(trainer training.Trainer, epocs int) {
  190. if nn.watcher != nil {
  191. nn.watcher.UpdateState(StateLearning)
  192. defer nn.watcher.UpdateState(StateIdle)
  193. }
  194. if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
  195. nn.TrainOnline(trainer, epocs)
  196. } else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
  197. nn.TrainBatch(trainer, epocs)
  198. } else {
  199. panic("Invalid gradient descent type")
  200. }
  201. }
  202. func (nn *NeuralNetwork) TrainOnline(trainer training.Trainer, epocs int) {
  203. for t := 0; t < epocs; t++ {
  204. for trainer.NextData() {
  205. dB, dW := nn.backward(trainer.GetData())
  206. for l := 1; l < nn.LayerCount; l++ {
  207. bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
  208. if !ok {
  209. panic("bGradient is not a OnlineGradientDescent")
  210. }
  211. wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
  212. if !ok {
  213. panic("wGradient is not a OnlineGradientDescent")
  214. }
  215. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
  216. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
  217. if nn.watcher != nil {
  218. nn.watcher.UpdateBiases(l, nn.Biases[l])
  219. nn.watcher.UpdateWeights(l, nn.Weights[l])
  220. }
  221. }
  222. }
  223. trainer.Reset()
  224. }
  225. }
  226. func (nn *NeuralNetwork) TrainBatch(trainer training.Trainer, epocs int) {
  227. fmt.Printf("Start training in %v threads\n", 2*runtime.NumCPU())
  228. for t := 0; t < epocs; t++ {
  229. batchWorkers := nn.runBatchWorkers(2*runtime.NumCPU(), trainer)
  230. for l := 1; l < nn.LayerCount; l++ {
  231. bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
  232. if !ok {
  233. panic("bGradient is not a BatchGradientDescent")
  234. }
  235. wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
  236. if !ok {
  237. panic("wGradient is not a BatchGradientDescent")
  238. }
  239. for _, bw := range batchWorkers {
  240. dB, dW := bw.Result(l)
  241. bGradient.AccumGradients(dB)
  242. wGradient.AccumGradients(dW)
  243. }
  244. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
  245. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
  246. if nn.watcher != nil {
  247. nn.watcher.UpdateBiases(l, nn.Biases[l])
  248. nn.watcher.UpdateWeights(l, nn.Weights[l])
  249. }
  250. }
  251. //TODO: remove this is not used for visualization
  252. time.Sleep(100 * time.Millisecond)
  253. }
  254. }
  255. func (nn *NeuralNetwork) runBatchWorkers(threadCount int, trainer training.Trainer) (workers []*batchWorker) {
  256. wg := sync.WaitGroup{}
  257. chunkSize := trainer.GetDataCount() / threadCount
  258. workers = make([]*batchWorker, threadCount)
  259. for i, _ := range workers {
  260. workers[i] = newBatchWorker(nn)
  261. wg.Add(1)
  262. s := i
  263. go func() {
  264. workers[s].Run(trainer, s*chunkSize, (s+1)*chunkSize)
  265. wg.Done()
  266. }()
  267. }
  268. wg.Wait()
  269. return
  270. }
  271. func (nn *NeuralNetwork) SaveState(writer io.Writer) {
  272. //save input array count
  273. bufferSize := make([]byte, 4)
  274. binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
  275. _, err := writer.Write(bufferSize)
  276. check(err)
  277. fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
  278. // save an input array
  279. buffer := make([]byte, nn.LayerCount*4)
  280. for i := 0; i < nn.LayerCount; i++ {
  281. binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
  282. }
  283. _, err = writer.Write(buffer)
  284. check(err)
  285. // fmt.Printf("wrote buffer %d bytes\n", n2)
  286. //save biases
  287. ////////////////////////
  288. for i := 1; i < nn.LayerCount; i++ {
  289. saveDense(writer, nn.Biases[i])
  290. }
  291. //save weights
  292. ////////////////////////
  293. for i := 1; i < nn.LayerCount; i++ {
  294. saveDense(writer, nn.Weights[i])
  295. }
  296. }
  297. func (nn *NeuralNetwork) LoadState(reader io.Reader) {
  298. // Reade count
  299. nn.LayerCount = readInt(reader)
  300. // Read an input array
  301. sizeBuffer := readByteArray(reader, nn.LayerCount*4)
  302. nn.Sizes = make([]int, nn.LayerCount)
  303. for i := 0; i < nn.LayerCount; i++ {
  304. nn.Sizes[i] = int(binary.LittleEndian.Uint32(sizeBuffer[i*4:]))
  305. fmt.Printf("LoadState: nn.Sizes[%d] %d \n", i, nn.Sizes[i])
  306. }
  307. nn.Weights = []*mat.Dense{&mat.Dense{}}
  308. nn.Biases = []*mat.Dense{&mat.Dense{}}
  309. // read Biases
  310. nn.Biases[0] = &mat.Dense{}
  311. for i := 1; i < nn.LayerCount; i++ {
  312. nn.Biases = append(nn.Biases, &mat.Dense{})
  313. nn.Biases[i] = readDense(reader, nn.Biases[i])
  314. }
  315. // read Weights
  316. nn.Weights[0] = &mat.Dense{}
  317. for i := 1; i < nn.LayerCount; i++ {
  318. nn.Weights = append(nn.Weights, &mat.Dense{})
  319. nn.Weights[i] = readDense(reader, nn.Weights[i])
  320. }
  321. // fmt.Printf("\nLoadState end\n")
  322. }
  323. func (nn NeuralNetwork) forward(aIn mat.Matrix) (A, Z []*mat.Dense) {
  324. A = make([]*mat.Dense, nn.LayerCount)
  325. Z = make([]*mat.Dense, nn.LayerCount)
  326. A[0] = mat.DenseCopyOf(aIn)
  327. if nn.watcher != nil {
  328. nn.watcher.UpdateActivations(0, A[0])
  329. }
  330. for l := 1; l < nn.LayerCount; l++ {
  331. A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
  332. aSrc := A[l-1]
  333. aDst := A[l]
  334. // Each iteration implements formula bellow for neuron activation values
  335. // A[l]=σ(W[l]*A[l−1]+B[l])
  336. // W[l]*A[l−1]
  337. aDst.Mul(nn.Weights[l], aSrc)
  338. // W[l]*A[l−1]+B[l]
  339. aDst.Add(aDst, nn.Biases[l])
  340. // Save raw activation value for back propagation
  341. Z[l] = mat.DenseCopyOf(aDst)
  342. // σ(W[l]*A[l−1]+B[l])
  343. aDst.Apply(applySigmoid, aDst)
  344. if nn.watcher != nil {
  345. nn.watcher.UpdateActivations(l, aDst)
  346. }
  347. }
  348. return
  349. }
  350. // Function returns calculated bias and weights derivatives for each
  351. // layer arround aIn/aOut datasets
  352. func (nn NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
  353. A, Z := nn.forward(aIn)
  354. lastLayerNum := nn.LayerCount - 1
  355. dB = make([]*mat.Dense, nn.LayerCount)
  356. dW = make([]*mat.Dense, nn.LayerCount)
  357. // To calculate new values of weights and biases
  358. // following formulas are used:
  359. // ∂E/∂W[l] = A[l−1]*δ[l]
  360. // ∂E/∂B[l] = δ[l]
  361. // For last layer δ value is calculated by following:
  362. // δ = (A[L]−y)⊙σ'(Z[L])
  363. // Calculate initial error for last layer L
  364. // error = A[L]-y
  365. // Where y is expected activations set
  366. err := &mat.Dense{}
  367. err.Sub(A[nn.LayerCount-1], aOut)
  368. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  369. sigmoidsPrime := &mat.Dense{}
  370. sigmoidsPrime.Apply(applySigmoidPrime, Z[lastLayerNum])
  371. // (A[L]−y)⊙σ'(Z[L])
  372. delta := &mat.Dense{}
  373. delta.MulElem(err, sigmoidsPrime)
  374. // ∂E/∂B[L] = δ[L]
  375. biases := mat.DenseCopyOf(delta)
  376. // ∂E/∂W[L] = A[L−1]*δ[L]
  377. weights := &mat.Dense{}
  378. weights.Mul(delta, A[lastLayerNum-1].T())
  379. // Initialize new weights and biases values with last layer values
  380. dB[lastLayerNum] = biases
  381. dW[lastLayerNum] = weights
  382. // Next layer derivatives of Weights and Biases are calculated using same formulas:
  383. // ∂E/∂W[l] = A[l−1]*δ[l]
  384. // ∂E/∂B[l] = δ[l]
  385. // But δ[l] is calculated using different formula:
  386. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  387. // Where Wt[l+1] is transposed matrix of actual Weights from
  388. // forward step
  389. for l := nn.LayerCount - 2; l > 0; l-- {
  390. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  391. sigmoidsPrime := &mat.Dense{}
  392. sigmoidsPrime.Apply(applySigmoidPrime, Z[l])
  393. // (Wt[l+1])*δ[l+1]
  394. // err bellow is delta from previous step(l+1)
  395. wdelta := &mat.Dense{}
  396. wdelta.Mul(nn.Weights[l+1].T(), delta)
  397. // Calculate new delta and store it to temporary variable err
  398. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  399. delta = &mat.Dense{}
  400. delta.MulElem(wdelta, sigmoidsPrime)
  401. // ∂E/∂B[l] = δ[l]
  402. biases := mat.DenseCopyOf(delta)
  403. // ∂E/∂W[l] = A[l−1]*δ[l]
  404. // At this point it's required to give explanation for inaccuracy
  405. // in the formula
  406. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  407. // because view of matrices are following:
  408. // A[l-1] δ[l]
  409. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  410. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  411. // ⎢ ... ⎥ ⎢ ... ⎥
  412. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  413. // ⎢ ... ⎥ ⎢ ... ⎥
  414. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  415. // So we need to modify these matrices to apply mutiplications and got
  416. // Weights matrix of following view:
  417. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  418. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  419. // ⎢ ... ⎥
  420. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  421. // ⎢ ... ⎥
  422. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  423. // So we swap matrices and transpose A[l-1] to get valid multiplication
  424. // of following view:
  425. // δ[l] A[l-1]
  426. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  427. // ⎢δ[1] ⎥
  428. // ⎢ ... ⎥
  429. // ⎢δ[i] ⎥
  430. // ⎢ ... ⎥
  431. // ⎣δ[s] ⎦
  432. weights := &mat.Dense{}
  433. weights.Mul(delta, A[l-1].T())
  434. dB[l] = biases
  435. dW[l] = weights
  436. }
  437. return
  438. }