neuralnetwork.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>, Tatyana Borisova <tanusshhka@mail.ru>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetwork
  26. import (
  27. "encoding/binary"
  28. "errors"
  29. "fmt"
  30. "io"
  31. "log"
  32. "math"
  33. "os"
  34. "sync"
  35. "time"
  36. training "git.semlanik.org/semlanik/NeuralNetwork/training"
  37. mat "gonum.org/v1/gonum/mat"
  38. )
  39. // NeuralNetwork is artificial neural network implementation
  40. //
  41. // Resources:
  42. // http://neuralnetworksanddeeplearning.com
  43. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  44. // http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
  45. //
  46. // Matrix: A (local matrices used in forward and backward methods)
  47. // Description: A is set of calculated neuron activations after sigmoid correction
  48. // Format: 0 l L
  49. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  50. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  51. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  52. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  53. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  54. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  55. // Where s = Sizes[l] - Neural network layer size
  56. // L = len(Sizes) - Number of neural network layers
  57. //
  58. // Matrix: Z (local matrices used in forward and backward methods)
  59. // Description: Z is set of calculated raw neuron activations
  60. // Format: 0 l L
  61. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  62. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  63. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  64. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  65. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  66. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  67. // Where s = Sizes[l] - Neural network layer size
  68. // L = len(Sizes) - Number of neural network layers
  69. //
  70. // Matrix: Biases
  71. // Description: Biases is set of biases per layer except l0
  72. // NOTE: l0 is always empty Dense because first layer
  73. // doesn't have connections to previous layer
  74. // Format: 1 l L
  75. // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
  76. // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
  77. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  78. // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
  79. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  80. // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
  81. // Where s = Sizes[l] - Neural network layer size
  82. // L = len(Sizes) - Number of neural network layers
  83. //
  84. // Matrix: Weights
  85. // Description: Weights is set of weights per layer except l0
  86. // NOTE: l0 is always empty Dense because first layer
  87. // doesn't have connections to previous layer
  88. // Format: 1 l L
  89. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  90. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  91. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  92. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  93. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  94. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  95. // Where s = Sizes[l] - Neural network layer size
  96. // s' = Sizes[l-1] - Previous neural network layer size
  97. // L = len(Sizes) - Number of neural network layers
  98. type NeuralNetwork struct {
  99. LayerCount int
  100. Sizes []int
  101. Biases []*mat.Dense
  102. Weights []*mat.Dense
  103. BGradient []interface{}
  104. WGradient []interface{}
  105. gradientDescentInitializer GradientDescentInitializer
  106. watcher StateWatcher
  107. syncMutex *sync.Mutex
  108. batchWorkerFactory BatchWorkerFactory
  109. earlyStop EarlyStop
  110. }
  111. // NewNeuralNetwork construction method that initializes new NeuralNetwork based
  112. // on provided list of layer sizes and GradientDescentInitializer that used for
  113. // backpropagation mechanism.
  114. // If gradientDescentInitializer is not provided (is nil) backpropagation won't
  115. // be possible. Common usecase when it's used is natural selection and genetic
  116. // training.
  117. func NewNeuralNetwork(sizes []int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
  118. err = nil
  119. if len(sizes) < 3 {
  120. fmt.Printf("Invalid network configuration: %v\n", sizes)
  121. return nil, errors.New("Invalid network configuration: %v\n")
  122. }
  123. for i := 0; i < len(sizes); i++ {
  124. if sizes[i] < 2 {
  125. fmt.Printf("Invalid network configuration: %v\n", sizes)
  126. return nil, errors.New("Invalid network configuration: %v\n")
  127. }
  128. }
  129. lenSizes := len(sizes)
  130. nn = &NeuralNetwork{
  131. Sizes: sizes,
  132. LayerCount: len(sizes),
  133. Biases: make([]*mat.Dense, lenSizes),
  134. Weights: make([]*mat.Dense, lenSizes),
  135. BGradient: make([]interface{}, lenSizes),
  136. WGradient: make([]interface{}, lenSizes),
  137. gradientDescentInitializer: gradientDescentInitializer,
  138. syncMutex: &sync.Mutex{},
  139. earlyStop: &noEarlyStop{},
  140. }
  141. for l := 1; l < nn.LayerCount; l++ {
  142. nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
  143. nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
  144. if nn.gradientDescentInitializer != nil {
  145. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  146. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  147. }
  148. }
  149. return
  150. }
  151. // Copy makes complete copy of NeuralNetwork data. Output network has the same
  152. // weights and biases values and but might be used independend of original one,
  153. // e.g. in separate goroutine
  154. func (nn *NeuralNetwork) Copy() (outNN *NeuralNetwork) {
  155. nn.syncMutex.Lock()
  156. defer nn.syncMutex.Unlock()
  157. outNN = &NeuralNetwork{
  158. Sizes: nn.Sizes,
  159. LayerCount: len(nn.Sizes),
  160. Biases: make([]*mat.Dense, nn.LayerCount),
  161. Weights: make([]*mat.Dense, nn.LayerCount),
  162. BGradient: make([]interface{}, nn.LayerCount),
  163. WGradient: make([]interface{}, nn.LayerCount),
  164. gradientDescentInitializer: nn.gradientDescentInitializer,
  165. watcher: nn.watcher,
  166. syncMutex: &sync.Mutex{},
  167. earlyStop: &noEarlyStop{},
  168. }
  169. for l := 1; l < outNN.LayerCount; l++ {
  170. outNN.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
  171. outNN.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
  172. if outNN.gradientDescentInitializer != nil {
  173. outNN.BGradient[l] = outNN.gradientDescentInitializer(outNN, l, BiasGradient)
  174. outNN.WGradient[l] = outNN.gradientDescentInitializer(outNN, l, WeightGradient)
  175. }
  176. }
  177. return
  178. }
  179. // SetBatchWorkerFactory setup batch worker factory for batch training. In case if
  180. // factory is not setup localBatchWorkerFactory will be used.
  181. func (nn *NeuralNetwork) SetBatchWorkerFactory(factory BatchWorkerFactory) {
  182. nn.batchWorkerFactory = factory
  183. }
  184. // SetEarlyStop setup early stop analyser to stop training before all training epocs finished.
  185. // Usually early stop required to avoid overfitting in neural network.
  186. func (nn *NeuralNetwork) SetEarlyStop(earlyStop EarlyStop) {
  187. nn.earlyStop = earlyStop
  188. }
  189. // Reset resets network state to intial/random one with specified in argument
  190. // layers configuration.
  191. func (nn *NeuralNetwork) Reset(sizes []int) (err error) {
  192. nn.syncMutex.Lock()
  193. defer nn.syncMutex.Unlock()
  194. err = nil
  195. if len(sizes) < 3 {
  196. fmt.Printf("Invalid network configuration: %v\n", sizes)
  197. return errors.New("Invalid network configuration: %v\n")
  198. }
  199. for i := 0; i < len(sizes); i++ {
  200. if sizes[i] < 2 {
  201. fmt.Printf("Invalid network configuration: %v\n", sizes)
  202. return errors.New("Invalid network configuration: %v\n")
  203. }
  204. }
  205. lenSizes := len(sizes)
  206. nn.Sizes = sizes
  207. nn.LayerCount = len(sizes)
  208. nn.Biases = make([]*mat.Dense, lenSizes)
  209. nn.Weights = make([]*mat.Dense, lenSizes)
  210. nn.BGradient = make([]interface{}, lenSizes)
  211. nn.WGradient = make([]interface{}, lenSizes)
  212. for l := 1; l < nn.LayerCount; l++ {
  213. nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
  214. nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
  215. if nn.gradientDescentInitializer != nil {
  216. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  217. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  218. }
  219. }
  220. return
  221. }
  222. // SetStateWatcher setups state watcher for NeuralNetwork. StateWatcher is common
  223. // interface that collects data about NeuralNetwork behavior. If not specified (is
  224. // set to nil) NeuralNetwork will ignore StateWatcher interations.
  225. func (nn *NeuralNetwork) SetStateWatcher(watcher StateWatcher) {
  226. nn.watcher = watcher
  227. if watcher != nil {
  228. watcher.Init(nn)
  229. if nn.watcher.GetSubscriptionFeatures().Has(StateSubscription) {
  230. watcher.UpdateState(StateIdle)
  231. }
  232. }
  233. }
  234. // Predict method invokes prediction based on input activations provided in argument.
  235. // Returns index of best element in output activation matrix and its value.
  236. func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  237. nn.syncMutex.Lock()
  238. defer nn.syncMutex.Unlock()
  239. if nn.watcher != nil {
  240. if nn.watcher.GetSubscriptionFeatures().Has(StateSubscription) {
  241. nn.watcher.UpdateState(StatePredict)
  242. defer nn.watcher.UpdateState(StateIdle)
  243. }
  244. }
  245. r, _ := aIn.Dims()
  246. if r != nn.Sizes[0] {
  247. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  248. return -1, 0.0
  249. }
  250. A, _ := nn.forward(aIn)
  251. result := A[nn.LayerCount-1]
  252. r, _ = result.Dims()
  253. max = 0.0
  254. maxIndex = 0
  255. for i := 0; i < r; i++ {
  256. if result.At(i, 0) > max {
  257. max = result.At(i, 0)
  258. maxIndex = i
  259. }
  260. }
  261. return
  262. }
  263. // Validate runs basic network validation/verification based on validation data that
  264. // provided by training.Trainer passed as argument.
  265. // Returns count of failure predictions and total amount of verified samples and mean square sum of errors for all samples
  266. func (nn *NeuralNetwork) Validate(trainer training.Trainer) (squareError float64, failCount, total int) {
  267. failCount = 0
  268. squareError = 0.0
  269. total = trainer.ValidatorCount()
  270. nn.syncMutex.Lock()
  271. defer nn.syncMutex.Unlock()
  272. if nn.watcher != nil {
  273. if nn.watcher.GetSubscriptionFeatures().Has(StateSubscription) {
  274. nn.watcher.UpdateState(StateValidation)
  275. defer nn.watcher.UpdateState(StateIdle)
  276. }
  277. }
  278. for i := 0; i < trainer.ValidatorCount(); i++ {
  279. aIn, aOut := trainer.GetValidator(i)
  280. r, _ := aIn.Dims()
  281. if r != nn.Sizes[0] {
  282. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  283. return math.MaxFloat64, total, total
  284. }
  285. A, _ := nn.forward(aIn)
  286. result := A[nn.LayerCount-1]
  287. r, _ = result.Dims()
  288. err := &mat.Dense{}
  289. err.Sub(result, aOut)
  290. var squareErrorLocal float64 = 0.0
  291. max := 0.0
  292. maxIndex := 0
  293. for i := 0; i < r; i++ {
  294. if result.At(i, 0) > max {
  295. max = result.At(i, 0)
  296. maxIndex = i
  297. }
  298. squareErrorLocal += err.At(i, 0) * err.At(i, 0)
  299. }
  300. if aOut.At(maxIndex, 0) != 1.0 {
  301. failCount++
  302. }
  303. squareError += squareErrorLocal / float64(r)
  304. }
  305. if nn.watcher != nil {
  306. if nn.watcher.GetSubscriptionFeatures().Has(ValidationSubscription) {
  307. nn.watcher.UpdateValidation(total, failCount)
  308. }
  309. }
  310. return
  311. }
  312. // Train is common training function that invokes one of training methods depends on
  313. // gradient descent used buy NeuralNetwork. training.Trainer passed as argument used
  314. // to get training data. Training loops are limited buy number of epocs.
  315. func (nn *NeuralNetwork) Train(trainer training.Trainer, epocs int) {
  316. if nn.watcher != nil {
  317. if nn.watcher.GetSubscriptionFeatures().Has(StateSubscription) {
  318. nn.watcher.UpdateState(StateLearning)
  319. defer nn.watcher.UpdateState(StateIdle)
  320. }
  321. }
  322. if nn.earlyStop != nil {
  323. nn.earlyStop.Reset()
  324. nn.earlyStop.Test()
  325. }
  326. if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
  327. nn.trainOnline(trainer, epocs)
  328. } else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
  329. nn.trainBatch(trainer, epocs)
  330. } else {
  331. panic("Invalid gradient descent type")
  332. }
  333. }
  334. func (nn *NeuralNetwork) trainOnline(trainer training.Trainer, epocs int) {
  335. for t := 0; t < epocs; t++ {
  336. for i := 0; i < trainer.DataCount(); i++ {
  337. if nn.watcher != nil {
  338. if nn.watcher.GetSubscriptionFeatures().Has(TrainingSubscription) {
  339. nn.watcher.UpdateTraining(t, epocs, i, trainer.DataCount())
  340. }
  341. }
  342. nn.syncMutex.Lock()
  343. dB, dW := nn.backward(trainer.GetData(i))
  344. for l := 1; l < nn.LayerCount; l++ {
  345. bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
  346. if !ok {
  347. panic("bGradient is not a OnlineGradientDescent")
  348. }
  349. wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
  350. if !ok {
  351. panic("wGradient is not a OnlineGradientDescent")
  352. }
  353. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
  354. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
  355. if nn.watcher != nil {
  356. if nn.watcher.GetSubscriptionFeatures().Has(BiasesSubscription) {
  357. nn.watcher.UpdateBiases(l, mat.DenseCopyOf(nn.Biases[l]))
  358. }
  359. if nn.watcher.GetSubscriptionFeatures().Has(WeightsSubscription) {
  360. nn.watcher.UpdateWeights(l, mat.DenseCopyOf(nn.Weights[l]))
  361. }
  362. }
  363. }
  364. nn.syncMutex.Unlock()
  365. }
  366. if nn.earlyStop != nil && nn.earlyStop.Test() {
  367. log.Printf("Training stopped due to fail rate grow\n")
  368. break
  369. }
  370. }
  371. }
  372. func (nn *NeuralNetwork) trainBatch(trainer training.Trainer, epocs int) {
  373. for t := 0; t < epocs; t++ {
  374. if nn.watcher != nil {
  375. if nn.watcher.GetSubscriptionFeatures().Has(TrainingSubscription) {
  376. nn.watcher.UpdateTraining(t, epocs, 0, trainer.DataCount())
  377. }
  378. }
  379. batchWorkers := nn.runBatchWorkers(trainer)
  380. nn.syncMutex.Lock()
  381. for l := 1; l < nn.LayerCount; l++ {
  382. bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
  383. if !ok {
  384. panic("bGradient is not a BatchGradientDescent")
  385. }
  386. wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
  387. if !ok {
  388. panic("wGradient is not a BatchGradientDescent")
  389. }
  390. for _, bw := range batchWorkers {
  391. dB, dW := bw.Result(l)
  392. bGradient.AccumGradients(dB)
  393. wGradient.AccumGradients(dW)
  394. }
  395. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
  396. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
  397. if nn.watcher != nil {
  398. if nn.watcher.GetSubscriptionFeatures().Has(BiasesSubscription) {
  399. nn.watcher.UpdateBiases(l, mat.DenseCopyOf(nn.Biases[l]))
  400. }
  401. if nn.watcher.GetSubscriptionFeatures().Has(WeightsSubscription) {
  402. nn.watcher.UpdateWeights(l, mat.DenseCopyOf(nn.Weights[l]))
  403. }
  404. }
  405. }
  406. nn.syncMutex.Unlock()
  407. if nn.earlyStop != nil && nn.earlyStop.Test() {
  408. log.Printf("Training stopped due to fail rate grow\n")
  409. break
  410. }
  411. if nn.watcher.GetSubscriptionFeatures().Has(BiasesSubscription) || nn.watcher.GetSubscriptionFeatures().Has(WeightsSubscription) {
  412. time.Sleep(100 * time.Millisecond) //TODO: it's better to add 'Latency() int' method to watcher, for check above
  413. }
  414. }
  415. }
  416. func (nn *NeuralNetwork) runBatchWorkers(trainer training.Trainer) (workers []BatchWorker) {
  417. if nn.batchWorkerFactory == nil {
  418. nn.batchWorkerFactory = NewLocalBatchWorkerFactory(nn)
  419. log.Printf("Batch Worker factory is not set, using local one\n")
  420. }
  421. wg := sync.WaitGroup{}
  422. threadCount := nn.batchWorkerFactory.GetAvailableThreads()
  423. chunkSize := trainer.DataCount() / threadCount
  424. workers = make([]BatchWorker, threadCount)
  425. for i, _ := range workers {
  426. workers[i] = nn.batchWorkerFactory.GetBatchWorker()
  427. wg.Add(1)
  428. s := i
  429. go func() {
  430. workers[s].Run(trainer, s*chunkSize, (s+1)*chunkSize)
  431. wg.Done()
  432. }()
  433. }
  434. wg.Wait()
  435. return
  436. }
  437. // SaveState saves state of NeuralNetwork to io.Writer. It's usefull to keep training results
  438. // between NeuralNetwork "power cycles" or to share traing results between clustered neural
  439. // network hosts.
  440. func (nn *NeuralNetwork) SaveState(writer io.Writer) {
  441. nn.syncMutex.Lock()
  442. defer nn.syncMutex.Unlock()
  443. //save input array count
  444. bufferSize := make([]byte, 4)
  445. binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
  446. _, err := writer.Write(bufferSize)
  447. check(err)
  448. //fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
  449. // save an input array
  450. buffer := make([]byte, nn.LayerCount*4)
  451. for i := 0; i < nn.LayerCount; i++ {
  452. binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
  453. }
  454. _, err = writer.Write(buffer)
  455. check(err)
  456. // fmt.Printf("wrote buffer %d bytes\n", n2)
  457. //save biases
  458. for i := 1; i < nn.LayerCount; i++ {
  459. saveDense(writer, nn.Biases[i])
  460. }
  461. //save weights
  462. for i := 1; i < nn.LayerCount; i++ {
  463. saveDense(writer, nn.Weights[i])
  464. }
  465. }
  466. // SaveStateToFile saves NeuralNetwork state to file by specific filePath.
  467. func (nn *NeuralNetwork) SaveStateToFile(filePath string) {
  468. outFile, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
  469. check(err)
  470. defer outFile.Close()
  471. nn.SaveState(outFile)
  472. }
  473. // LoadState loads NeuralNetwork state from io.Reader. All existing data in NeuralNetwork
  474. // will be rewritten buy this method, including layers configuration and weights and biases.
  475. func (nn *NeuralNetwork) LoadState(reader io.Reader) {
  476. nn.syncMutex.Lock()
  477. defer nn.syncMutex.Unlock()
  478. // Read count
  479. nn.LayerCount = readInt(reader)
  480. // Read an input array
  481. sizeBuffer := readByteArray(reader, nn.LayerCount*4)
  482. nn.Sizes = make([]int, nn.LayerCount)
  483. for l := 0; l < nn.LayerCount; l++ {
  484. nn.Sizes[l] = int(binary.LittleEndian.Uint32(sizeBuffer[l*4:]))
  485. fmt.Printf("LoadState: nn.Sizes[%d] %d \n", l, nn.Sizes[l])
  486. }
  487. nn.Weights = []*mat.Dense{&mat.Dense{}}
  488. nn.Biases = []*mat.Dense{&mat.Dense{}}
  489. // read Biases
  490. nn.Biases[0] = &mat.Dense{}
  491. for l := 1; l < nn.LayerCount; l++ {
  492. nn.Biases = append(nn.Biases, &mat.Dense{})
  493. nn.Biases[l] = readDense(reader, nn.Biases[l])
  494. }
  495. // read Weights and initialize gradient descents
  496. nn.BGradient = make([]interface{}, nn.LayerCount)
  497. nn.WGradient = make([]interface{}, nn.LayerCount)
  498. nn.Weights[0] = &mat.Dense{}
  499. for l := 1; l < nn.LayerCount; l++ {
  500. nn.Weights = append(nn.Weights, &mat.Dense{})
  501. nn.Weights[l] = readDense(reader, nn.Weights[l])
  502. if nn.gradientDescentInitializer != nil {
  503. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  504. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  505. }
  506. }
  507. // fmt.Printf("\nLoadState end\n")
  508. }
  509. // LoadStateFromFile loads NeuralNetwork state from file by specific filePath.
  510. func (nn *NeuralNetwork) LoadStateFromFile(filePath string) {
  511. inFile, err := os.Open(filePath)
  512. check(err)
  513. defer inFile.Close()
  514. nn.LoadState(inFile)
  515. }
  516. func (nn NeuralNetwork) forward(aIn mat.Matrix) (A, Z []*mat.Dense) {
  517. A = make([]*mat.Dense, nn.LayerCount)
  518. Z = make([]*mat.Dense, nn.LayerCount)
  519. A[0] = mat.DenseCopyOf(aIn)
  520. if nn.watcher != nil {
  521. if nn.watcher.GetSubscriptionFeatures().Has(ActivationsSubscription) {
  522. nn.watcher.UpdateActivations(0, mat.DenseCopyOf(A[0]))
  523. }
  524. }
  525. for l := 1; l < nn.LayerCount; l++ {
  526. A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
  527. aSrc := A[l-1]
  528. aDst := A[l]
  529. // Each iteration implements formula bellow for neuron activation values
  530. // A[l]=σ(W[l]*A[l−1]+B[l])
  531. // W[l]*A[l−1]
  532. aDst.Mul(nn.Weights[l], aSrc)
  533. // W[l]*A[l−1]+B[l]
  534. aDst.Add(aDst, nn.Biases[l])
  535. // Save raw activation value for back propagation
  536. Z[l] = mat.DenseCopyOf(aDst)
  537. // σ(W[l]*A[l−1]+B[l])
  538. aDst.Apply(applySigmoid, aDst)
  539. if nn.watcher != nil {
  540. if nn.watcher.GetSubscriptionFeatures().Has(ActivationsSubscription) {
  541. nn.watcher.UpdateActivations(l, mat.DenseCopyOf(aDst))
  542. }
  543. }
  544. }
  545. return
  546. }
  547. // Function returns calculated bias and weights derivatives for each
  548. // layer arround aIn/aOut datasets.
  549. func (nn NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
  550. A, Z := nn.forward(aIn)
  551. lastLayerNum := nn.LayerCount - 1
  552. dB = make([]*mat.Dense, nn.LayerCount)
  553. dW = make([]*mat.Dense, nn.LayerCount)
  554. // To calculate new values of weights and biases
  555. // following formulas are used:
  556. // ∂E/∂W[l] = A[l−1]*δ[l]
  557. // ∂E/∂B[l] = δ[l]
  558. // For last layer δ value is calculated by following:
  559. // δ = (A[L]−y)⊙σ'(Z[L])
  560. // Calculate initial error for last layer L
  561. // error = A[L]-y
  562. // Where y is expected activations set
  563. err := &mat.Dense{}
  564. err.Sub(A[nn.LayerCount-1], aOut)
  565. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  566. sigmoidsPrime := &mat.Dense{}
  567. sigmoidsPrime.Apply(applySigmoidPrime, Z[lastLayerNum])
  568. // (A[L]−y)⊙σ'(Z[L])
  569. delta := &mat.Dense{}
  570. delta.MulElem(err, sigmoidsPrime)
  571. // ∂E/∂B[L] = δ[L]
  572. biases := mat.DenseCopyOf(delta)
  573. // ∂E/∂W[L] = A[L−1]*δ[L]
  574. weights := &mat.Dense{}
  575. weights.Mul(delta, A[lastLayerNum-1].T())
  576. // Initialize new weights and biases values with last layer values
  577. dB[lastLayerNum] = biases
  578. dW[lastLayerNum] = weights
  579. // Next layer derivatives of Weights and Biases are calculated using same formulas:
  580. // ∂E/∂W[l] = A[l−1]*δ[l]
  581. // ∂E/∂B[l] = δ[l]
  582. // But δ[l] is calculated using different formula:
  583. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  584. // Where Wt[l+1] is transposed matrix of actual Weights from
  585. // forward step
  586. for l := nn.LayerCount - 2; l > 0; l-- {
  587. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  588. sigmoidsPrime := &mat.Dense{}
  589. sigmoidsPrime.Apply(applySigmoidPrime, Z[l])
  590. // (Wt[l+1])*δ[l+1]
  591. // err bellow is delta from previous step(l+1)
  592. wdelta := &mat.Dense{}
  593. wdelta.Mul(nn.Weights[l+1].T(), delta)
  594. // Calculate new delta and store it to temporary variable err
  595. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  596. delta = &mat.Dense{}
  597. delta.MulElem(wdelta, sigmoidsPrime)
  598. // ∂E/∂B[l] = δ[l]
  599. biases := mat.DenseCopyOf(delta)
  600. // ∂E/∂W[l] = A[l−1]*δ[l]
  601. // At this point it's required to give explanation for inaccuracy
  602. // in the formula
  603. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  604. // because view of matrices are following:
  605. // A[l-1] δ[l]
  606. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  607. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  608. // ⎢ ... ⎥ ⎢ ... ⎥
  609. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  610. // ⎢ ... ⎥ ⎢ ... ⎥
  611. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  612. // So we need to modify these matrices to apply mutiplications and got
  613. // Weights matrix of following view:
  614. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  615. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  616. // ⎢ ... ⎥
  617. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  618. // ⎢ ... ⎥
  619. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  620. // So we swap matrices and transpose A[l-1] to get valid multiplication
  621. // of following view:
  622. // δ[l] A[l-1]
  623. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  624. // ⎢δ[1] ⎥
  625. // ⎢ ... ⎥
  626. // ⎢δ[i] ⎥
  627. // ⎢ ... ⎥
  628. // ⎣δ[s] ⎦
  629. weights := &mat.Dense{}
  630. weights.Mul(delta, A[l-1].T())
  631. dB[l] = biases
  632. dW[l] = weights
  633. }
  634. return
  635. }