neuralnetwork.go 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. /*
  2. * MIT License
  3. *
  4. * Copyright (c) 2019 Alexey Edelev <semlanik@gmail.com>, Tatyana Borisova <tanusshhka@mail.ru>
  5. *
  6. * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy of this
  9. * software and associated documentation files (the "Software"), to deal in the Software
  10. * without restriction, including without limitation the rights to use, copy, modify,
  11. * merge, publish, distribute, sublicense, and/or sell copies of the Software, and
  12. * to permit persons to whom the Software is furnished to do so, subject to the following
  13. * conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in all copies
  16. * or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  19. * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  20. * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE
  21. * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  22. * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23. * DEALINGS IN THE SOFTWARE.
  24. */
  25. package neuralnetwork
  26. import (
  27. "encoding/binary"
  28. "errors"
  29. "fmt"
  30. "io"
  31. "os"
  32. "runtime"
  33. "sync"
  34. "time"
  35. training "git.semlanik.org/semlanik/NeuralNetwork/training"
  36. mat "gonum.org/v1/gonum/mat"
  37. )
  38. // NeuralNetwork is artificial neural network implementation
  39. //
  40. // Resources:
  41. // http://neuralnetworksanddeeplearning.com
  42. // https://www.youtube.com/watch?v=fNk_zzaMoSs
  43. // http://www.inf.fu-berlin.de/lehre/WS06/Musterererkennung/Paper/rprop.pdf
  44. //
  45. // Matrix: A (local matrices used in forward and backward methods)
  46. // Description: A is set of calculated neuron activations after sigmoid correction
  47. // Format: 0 l L
  48. // ⎡A[0] ⎤ ... ⎡A[0] ⎤ ... ⎡A[0] ⎤
  49. // ⎢A[1] ⎥ ... ⎢A[1] ⎥ ... ⎢A[1] ⎥
  50. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  51. // ⎢A[i] ⎥ ... ⎢A[i] ⎥ ... ⎢A[i] ⎥
  52. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  53. // ⎣A[s] ⎦ ... ⎣A[s] ⎦ ... ⎣A[s] ⎦
  54. // Where s = Sizes[l] - Neural network layer size
  55. // L = len(Sizes) - Number of neural network layers
  56. //
  57. // Matrix: Z (local matrices used in forward and backward methods)
  58. // Description: Z is set of calculated raw neuron activations
  59. // Format: 0 l L
  60. // ⎡Z[0] ⎤ ... ⎡Z[0] ⎤ ... ⎡Z[0] ⎤
  61. // ⎢Z[1] ⎥ ... ⎢Z[1] ⎥ ... ⎢Z[1] ⎥
  62. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  63. // ⎢Z[i] ⎥ ... ⎢Z[i] ⎥ ... ⎢Z[i] ⎥
  64. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  65. // ⎣Z[s] ⎦ ... ⎣Z[s] ⎦ ... ⎣Z[s] ⎦
  66. // Where s = Sizes[l] - Neural network layer size
  67. // L = len(Sizes) - Number of neural network layers
  68. //
  69. // Matrix: Biases
  70. // Description: Biases is set of biases per layer except l0
  71. // NOTE: l0 is always empty Dense because first layer
  72. // doesn't have connections to previous layer
  73. // Format: 1 l L
  74. // ⎡b[0] ⎤ ... ⎡b[0] ⎤ ... ⎡b[0] ⎤
  75. // ⎢b[1] ⎥ ... ⎢b[1] ⎥ ... ⎢b[1] ⎥
  76. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  77. // ⎢b[i] ⎥ ... ⎢b[i] ⎥ ... ⎢b[i] ⎥
  78. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  79. // ⎣b[s] ⎦ ... ⎣b[s] ⎦ ... ⎣b[s] ⎦
  80. // Where s = Sizes[l] - Neural network layer size
  81. // L = len(Sizes) - Number of neural network layers
  82. //
  83. // Matrix: Weights
  84. // Description: Weights is set of weights per layer except l0
  85. // NOTE: l0 is always empty Dense because first layer
  86. // doesn't have connections to previous layer
  87. // Format: 1 l L
  88. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤ ... ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  89. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥ ... ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  90. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  91. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥ ... ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  92. // ⎢ ... ⎥ ... ⎢ ... ⎥ ... ⎢ ... ⎥
  93. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦ ... ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  94. // Where s = Sizes[l] - Neural network layer size
  95. // s' = Sizes[l-1] - Previous neural network layer size
  96. // L = len(Sizes) - Number of neural network layers
  97. type NeuralNetwork struct {
  98. LayerCount int
  99. Sizes []int
  100. Biases []*mat.Dense
  101. Weights []*mat.Dense
  102. BGradient []interface{}
  103. WGradient []interface{}
  104. gradientDescentInitializer GradientDescentInitializer
  105. watcher StateWatcher
  106. syncMutex *sync.Mutex
  107. }
  108. // NewNeuralNetwork construction method that initializes new NeuralNetwork based
  109. // on provided list of layer sizes and GradientDescentInitializer that used for
  110. // backpropagation mechanism.
  111. // If gradientDescentInitializer is not provided (is nil) backpropagation won't
  112. // be possible. Common usecase when it's used is natural selection and genetic
  113. // training.
  114. func NewNeuralNetwork(sizes []int, gradientDescentInitializer GradientDescentInitializer) (nn *NeuralNetwork, err error) {
  115. err = nil
  116. if len(sizes) < 3 {
  117. fmt.Printf("Invalid network configuration: %v\n", sizes)
  118. return nil, errors.New("Invalid network configuration: %v\n")
  119. }
  120. for i := 0; i < len(sizes); i++ {
  121. if sizes[i] < 2 {
  122. fmt.Printf("Invalid network configuration: %v\n", sizes)
  123. return nil, errors.New("Invalid network configuration: %v\n")
  124. }
  125. }
  126. lenSizes := len(sizes)
  127. nn = &NeuralNetwork{
  128. Sizes: sizes,
  129. LayerCount: len(sizes),
  130. Biases: make([]*mat.Dense, lenSizes),
  131. Weights: make([]*mat.Dense, lenSizes),
  132. BGradient: make([]interface{}, lenSizes),
  133. WGradient: make([]interface{}, lenSizes),
  134. gradientDescentInitializer: gradientDescentInitializer,
  135. syncMutex: &sync.Mutex{},
  136. }
  137. for l := 1; l < nn.LayerCount; l++ {
  138. nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
  139. nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
  140. if nn.gradientDescentInitializer != nil {
  141. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  142. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  143. }
  144. }
  145. return
  146. }
  147. // Copy makes complete copy of NeuralNetwork data. Output network has the same
  148. // weights and biases values and but might be used independend of original one,
  149. // e.g. in separate goroutine
  150. func (nn *NeuralNetwork) Copy() (outNN *NeuralNetwork) {
  151. nn.syncMutex.Lock()
  152. defer nn.syncMutex.Unlock()
  153. outNN = &NeuralNetwork{
  154. Sizes: nn.Sizes,
  155. LayerCount: len(nn.Sizes),
  156. Biases: make([]*mat.Dense, nn.LayerCount),
  157. Weights: make([]*mat.Dense, nn.LayerCount),
  158. BGradient: make([]interface{}, nn.LayerCount),
  159. WGradient: make([]interface{}, nn.LayerCount),
  160. gradientDescentInitializer: nn.gradientDescentInitializer,
  161. watcher: nn.watcher,
  162. syncMutex: &sync.Mutex{},
  163. }
  164. for l := 1; l < outNN.LayerCount; l++ {
  165. outNN.Biases[l] = mat.DenseCopyOf(nn.Biases[l])
  166. outNN.Weights[l] = mat.DenseCopyOf(nn.Weights[l])
  167. if outNN.gradientDescentInitializer != nil {
  168. outNN.BGradient[l] = outNN.gradientDescentInitializer(outNN, l, BiasGradient)
  169. outNN.WGradient[l] = outNN.gradientDescentInitializer(outNN, l, WeightGradient)
  170. }
  171. }
  172. return
  173. }
  174. // Reset resets network state to intial/random one with specified in argument
  175. // layers configuration
  176. func (nn *NeuralNetwork) Reset(sizes []int) (err error) {
  177. nn.syncMutex.Lock()
  178. defer nn.syncMutex.Unlock()
  179. err = nil
  180. if len(sizes) < 3 {
  181. fmt.Printf("Invalid network configuration: %v\n", sizes)
  182. return errors.New("Invalid network configuration: %v\n")
  183. }
  184. for i := 0; i < len(sizes); i++ {
  185. if sizes[i] < 2 {
  186. fmt.Printf("Invalid network configuration: %v\n", sizes)
  187. return errors.New("Invalid network configuration: %v\n")
  188. }
  189. }
  190. lenSizes := len(sizes)
  191. nn.Sizes = sizes
  192. nn.LayerCount = len(sizes)
  193. nn.Biases = make([]*mat.Dense, lenSizes)
  194. nn.Weights = make([]*mat.Dense, lenSizes)
  195. nn.BGradient = make([]interface{}, lenSizes)
  196. nn.WGradient = make([]interface{}, lenSizes)
  197. for l := 1; l < nn.LayerCount; l++ {
  198. nn.Biases[l] = generateRandomDense(nn.Sizes[l], 1)
  199. nn.Weights[l] = generateRandomDense(nn.Sizes[l], nn.Sizes[l-1])
  200. if nn.gradientDescentInitializer != nil {
  201. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  202. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  203. }
  204. }
  205. return
  206. }
  207. // SetStateWatcher setups state watcher for NeuralNetwork. StateWatcher is common
  208. // interface that collects data about NeuralNetwork behaivor. If not specified (is
  209. // set to nil) NeuralNetwork will ignore StateWatcher interations
  210. func (nn *NeuralNetwork) SetStateWatcher(watcher StateWatcher) {
  211. nn.watcher = watcher
  212. if watcher != nil {
  213. watcher.Init(nn)
  214. watcher.UpdateState(StateIdle)
  215. }
  216. }
  217. // Predict method invokes prediction based on input activations provided in argument.
  218. // Returns index of best element in output activation matrix and its value
  219. func (nn *NeuralNetwork) Predict(aIn mat.Matrix) (maxIndex int, max float64) {
  220. nn.syncMutex.Lock()
  221. defer nn.syncMutex.Unlock()
  222. if nn.watcher != nil {
  223. nn.watcher.UpdateState(StatePredict)
  224. defer nn.watcher.UpdateState(StateIdle)
  225. }
  226. r, _ := aIn.Dims()
  227. if r != nn.Sizes[0] {
  228. fmt.Printf("Invalid rows number of input matrix size: %v\n", r)
  229. return -1, 0.0
  230. }
  231. A, _ := nn.forward(aIn)
  232. result := A[nn.LayerCount-1]
  233. r, _ = result.Dims()
  234. max = 0.0
  235. maxIndex = 0
  236. for i := 0; i < r; i++ {
  237. if result.At(i, 0) > max {
  238. max = result.At(i, 0)
  239. maxIndex = i
  240. }
  241. }
  242. return
  243. }
  244. // Validate runs basic network validation/verification based on validation data that
  245. // provided by training.Trainer passed as argument.
  246. // Returns count of failure predictions and total amount of verified samples.
  247. func (nn *NeuralNetwork) Validate(trainer training.Trainer) (failCount, total int) {
  248. nn.syncMutex.Lock()
  249. defer nn.syncMutex.Unlock()
  250. failCount = 0
  251. total = 0
  252. for i := 0; i < trainer.ValidatorCount(); i++ {
  253. dataSet, expect := trainer.GetValidator(i)
  254. index, _ := nn.Predict(dataSet)
  255. if expect.At(index, 0) != 1.0 {
  256. failCount++
  257. }
  258. total++
  259. }
  260. return
  261. }
  262. // Train is common training function that invokes one of training methods depends on
  263. // gradient descent used buy NeuralNetwork. training.Trainer passed as argument used
  264. // to get training data. Training loops are limited buy number of epocs
  265. func (nn *NeuralNetwork) Train(trainer training.Trainer, epocs int) {
  266. if nn.watcher != nil {
  267. nn.watcher.UpdateState(StateLearning)
  268. defer nn.watcher.UpdateState(StateIdle)
  269. }
  270. if _, ok := nn.WGradient[nn.LayerCount-1].(OnlineGradientDescent); ok {
  271. nn.trainOnline(trainer, epocs)
  272. } else if _, ok := nn.WGradient[nn.LayerCount-1].(BatchGradientDescent); ok {
  273. nn.trainBatch(trainer, epocs)
  274. } else {
  275. panic("Invalid gradient descent type")
  276. }
  277. }
  278. func (nn *NeuralNetwork) trainOnline(trainer training.Trainer, epocs int) {
  279. for t := 0; t < epocs; t++ {
  280. for i := 0; i < trainer.DataCount(); i++ {
  281. nn.syncMutex.Lock()
  282. dB, dW := nn.backward(trainer.GetData(i))
  283. for l := 1; l < nn.LayerCount; l++ {
  284. bGradient, ok := nn.BGradient[l].(OnlineGradientDescent)
  285. if !ok {
  286. panic("bGradient is not a OnlineGradientDescent")
  287. }
  288. wGradient, ok := nn.WGradient[l].(OnlineGradientDescent)
  289. if !ok {
  290. panic("wGradient is not a OnlineGradientDescent")
  291. }
  292. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l], dB[l])
  293. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l], dW[l])
  294. if nn.watcher != nil {
  295. nn.watcher.UpdateBiases(l, nn.Biases[l])
  296. nn.watcher.UpdateWeights(l, nn.Weights[l])
  297. }
  298. }
  299. nn.syncMutex.Unlock()
  300. }
  301. }
  302. }
  303. func (nn *NeuralNetwork) trainBatch(trainer training.Trainer, epocs int) {
  304. fmt.Printf("Start training in %v threads\n", runtime.NumCPU())
  305. for t := 0; t < epocs; t++ {
  306. batchWorkers := nn.runBatchWorkers(runtime.NumCPU(), trainer)
  307. nn.syncMutex.Lock()
  308. for l := 1; l < nn.LayerCount; l++ {
  309. bGradient, ok := nn.BGradient[l].(BatchGradientDescent)
  310. if !ok {
  311. panic("bGradient is not a BatchGradientDescent")
  312. }
  313. wGradient, ok := nn.WGradient[l].(BatchGradientDescent)
  314. if !ok {
  315. panic("wGradient is not a BatchGradientDescent")
  316. }
  317. for _, bw := range batchWorkers {
  318. dB, dW := bw.result(l)
  319. bGradient.AccumGradients(dB)
  320. wGradient.AccumGradients(dW)
  321. }
  322. nn.Biases[l] = bGradient.ApplyDelta(nn.Biases[l])
  323. nn.Weights[l] = wGradient.ApplyDelta(nn.Weights[l])
  324. if nn.watcher != nil {
  325. nn.watcher.UpdateBiases(l, nn.Biases[l])
  326. nn.watcher.UpdateWeights(l, nn.Weights[l])
  327. }
  328. }
  329. nn.syncMutex.Unlock()
  330. //TODO: remove this is not used for visualization
  331. time.Sleep(100 * time.Millisecond)
  332. }
  333. }
  334. func (nn *NeuralNetwork) runBatchWorkers(threadCount int, trainer training.Trainer) (workers []*batchWorker) {
  335. wg := sync.WaitGroup{}
  336. chunkSize := trainer.DataCount() / threadCount
  337. workers = make([]*batchWorker, threadCount)
  338. for i, _ := range workers {
  339. workers[i] = newBatchWorker(nn)
  340. wg.Add(1)
  341. s := i
  342. go func() {
  343. workers[s].run(trainer, s*chunkSize, (s+1)*chunkSize)
  344. wg.Done()
  345. }()
  346. }
  347. wg.Wait()
  348. return
  349. }
  350. // SaveState saves state of NeuralNetwork to io.Writer. It's usefull to keep training results
  351. // between NeuralNetwork "power cycles" or to share traing results between clustered neural
  352. // network hosts
  353. func (nn *NeuralNetwork) SaveState(writer io.Writer) {
  354. nn.syncMutex.Lock()
  355. defer nn.syncMutex.Unlock()
  356. //save input array count
  357. bufferSize := make([]byte, 4)
  358. binary.LittleEndian.PutUint32(bufferSize[0:], uint32(nn.LayerCount))
  359. _, err := writer.Write(bufferSize)
  360. check(err)
  361. //fmt.Printf("wrote value %d\n", uint32(nn.LayerCount))
  362. // save an input array
  363. buffer := make([]byte, nn.LayerCount*4)
  364. for i := 0; i < nn.LayerCount; i++ {
  365. binary.LittleEndian.PutUint32(buffer[i*4:], uint32(nn.Sizes[i]))
  366. }
  367. _, err = writer.Write(buffer)
  368. check(err)
  369. // fmt.Printf("wrote buffer %d bytes\n", n2)
  370. //save biases
  371. for i := 1; i < nn.LayerCount; i++ {
  372. saveDense(writer, nn.Biases[i])
  373. }
  374. //save weights
  375. for i := 1; i < nn.LayerCount; i++ {
  376. saveDense(writer, nn.Weights[i])
  377. }
  378. }
  379. // SaveStateToFile saves NeuralNetwork state to file by specific filePath
  380. func (nn *NeuralNetwork) SaveStateToFile(filePath string) {
  381. outFile, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
  382. check(err)
  383. defer outFile.Close()
  384. nn.SaveState(outFile)
  385. }
  386. // LoadState loads NeuralNetwork state from io.Reader. All existing data in NeuralNetwork
  387. // will be rewritten buy this method, including layers configuration and weights and biases
  388. func (nn *NeuralNetwork) LoadState(reader io.Reader) {
  389. nn.syncMutex.Lock()
  390. defer nn.syncMutex.Unlock()
  391. // Reade count
  392. nn.LayerCount = readInt(reader)
  393. // Read an input array
  394. sizeBuffer := readByteArray(reader, nn.LayerCount*4)
  395. nn.Sizes = make([]int, nn.LayerCount)
  396. for l := 0; l < nn.LayerCount; l++ {
  397. nn.Sizes[l] = int(binary.LittleEndian.Uint32(sizeBuffer[l*4:]))
  398. fmt.Printf("LoadState: nn.Sizes[%d] %d \n", l, nn.Sizes[l])
  399. }
  400. nn.Weights = []*mat.Dense{&mat.Dense{}}
  401. nn.Biases = []*mat.Dense{&mat.Dense{}}
  402. // read Biases
  403. nn.Biases[0] = &mat.Dense{}
  404. for l := 1; l < nn.LayerCount; l++ {
  405. nn.Biases = append(nn.Biases, &mat.Dense{})
  406. nn.Biases[l] = readDense(reader, nn.Biases[l])
  407. }
  408. // read Weights and initialize gradient descents
  409. nn.BGradient = make([]interface{}, nn.LayerCount)
  410. nn.WGradient = make([]interface{}, nn.LayerCount)
  411. nn.Weights[0] = &mat.Dense{}
  412. for l := 1; l < nn.LayerCount; l++ {
  413. nn.Weights = append(nn.Weights, &mat.Dense{})
  414. nn.Weights[l] = readDense(reader, nn.Weights[l])
  415. if nn.gradientDescentInitializer != nil {
  416. nn.BGradient[l] = nn.gradientDescentInitializer(nn, l, BiasGradient)
  417. nn.WGradient[l] = nn.gradientDescentInitializer(nn, l, WeightGradient)
  418. }
  419. }
  420. // fmt.Printf("\nLoadState end\n")
  421. }
  422. // LoadStateFromFile loads NeuralNetwork state from file by specific filePath
  423. func (nn *NeuralNetwork) LoadStateFromFile(filePath string) {
  424. inFile, err := os.Open(filePath)
  425. check(err)
  426. defer inFile.Close()
  427. nn.LoadState(inFile)
  428. }
  429. func (nn NeuralNetwork) forward(aIn mat.Matrix) (A, Z []*mat.Dense) {
  430. A = make([]*mat.Dense, nn.LayerCount)
  431. Z = make([]*mat.Dense, nn.LayerCount)
  432. A[0] = mat.DenseCopyOf(aIn)
  433. if nn.watcher != nil {
  434. nn.watcher.UpdateActivations(0, A[0])
  435. }
  436. for l := 1; l < nn.LayerCount; l++ {
  437. A[l] = mat.NewDense(nn.Sizes[l], 1, nil)
  438. aSrc := A[l-1]
  439. aDst := A[l]
  440. // Each iteration implements formula bellow for neuron activation values
  441. // A[l]=σ(W[l]*A[l−1]+B[l])
  442. // W[l]*A[l−1]
  443. aDst.Mul(nn.Weights[l], aSrc)
  444. // W[l]*A[l−1]+B[l]
  445. aDst.Add(aDst, nn.Biases[l])
  446. // Save raw activation value for back propagation
  447. Z[l] = mat.DenseCopyOf(aDst)
  448. // σ(W[l]*A[l−1]+B[l])
  449. aDst.Apply(applySigmoid, aDst)
  450. if nn.watcher != nil {
  451. nn.watcher.UpdateActivations(l, aDst)
  452. }
  453. }
  454. return
  455. }
  456. // Function returns calculated bias and weights derivatives for each
  457. // layer arround aIn/aOut datasets
  458. func (nn NeuralNetwork) backward(aIn, aOut mat.Matrix) (dB, dW []*mat.Dense) {
  459. A, Z := nn.forward(aIn)
  460. lastLayerNum := nn.LayerCount - 1
  461. dB = make([]*mat.Dense, nn.LayerCount)
  462. dW = make([]*mat.Dense, nn.LayerCount)
  463. // To calculate new values of weights and biases
  464. // following formulas are used:
  465. // ∂E/∂W[l] = A[l−1]*δ[l]
  466. // ∂E/∂B[l] = δ[l]
  467. // For last layer δ value is calculated by following:
  468. // δ = (A[L]−y)⊙σ'(Z[L])
  469. // Calculate initial error for last layer L
  470. // error = A[L]-y
  471. // Where y is expected activations set
  472. err := &mat.Dense{}
  473. err.Sub(A[nn.LayerCount-1], aOut)
  474. // Calculate sigmoids prime σ'(Z[L]) for last layer L
  475. sigmoidsPrime := &mat.Dense{}
  476. sigmoidsPrime.Apply(applySigmoidPrime, Z[lastLayerNum])
  477. // (A[L]−y)⊙σ'(Z[L])
  478. delta := &mat.Dense{}
  479. delta.MulElem(err, sigmoidsPrime)
  480. // ∂E/∂B[L] = δ[L]
  481. biases := mat.DenseCopyOf(delta)
  482. // ∂E/∂W[L] = A[L−1]*δ[L]
  483. weights := &mat.Dense{}
  484. weights.Mul(delta, A[lastLayerNum-1].T())
  485. // Initialize new weights and biases values with last layer values
  486. dB[lastLayerNum] = biases
  487. dW[lastLayerNum] = weights
  488. // Next layer derivatives of Weights and Biases are calculated using same formulas:
  489. // ∂E/∂W[l] = A[l−1]*δ[l]
  490. // ∂E/∂B[l] = δ[l]
  491. // But δ[l] is calculated using different formula:
  492. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  493. // Where Wt[l+1] is transposed matrix of actual Weights from
  494. // forward step
  495. for l := nn.LayerCount - 2; l > 0; l-- {
  496. // Calculate sigmoids prime σ'(Z[l]) for last layer l
  497. sigmoidsPrime := &mat.Dense{}
  498. sigmoidsPrime.Apply(applySigmoidPrime, Z[l])
  499. // (Wt[l+1])*δ[l+1]
  500. // err bellow is delta from previous step(l+1)
  501. wdelta := &mat.Dense{}
  502. wdelta.Mul(nn.Weights[l+1].T(), delta)
  503. // Calculate new delta and store it to temporary variable err
  504. // δ[l] = ((Wt[l+1])*δ[l+1])⊙σ'(Z[l])
  505. delta = &mat.Dense{}
  506. delta.MulElem(wdelta, sigmoidsPrime)
  507. // ∂E/∂B[l] = δ[l]
  508. biases := mat.DenseCopyOf(delta)
  509. // ∂E/∂W[l] = A[l−1]*δ[l]
  510. // At this point it's required to give explanation for inaccuracy
  511. // in the formula
  512. // Multiplying of activations matrix for layer l-1 and δ[l] is imposible
  513. // because view of matrices are following:
  514. // A[l-1] δ[l]
  515. // ⎡A[0] ⎤ ⎡δ[0] ⎤
  516. // ⎢A[1] ⎥ ⎢δ[1] ⎥
  517. // ⎢ ... ⎥ ⎢ ... ⎥
  518. // ⎢A[i] ⎥ X ⎢δ[i] ⎥
  519. // ⎢ ... ⎥ ⎢ ... ⎥
  520. // ⎣A[s'] ⎦ ⎣δ[s] ⎦
  521. // So we need to modify these matrices to apply mutiplications and got
  522. // Weights matrix of following view:
  523. // ⎡w[0,0] ... w[0,j] ... w[0,s']⎤
  524. // ⎢w[1,0] ... w[1,j] ... w[1,s']⎥
  525. // ⎢ ... ⎥
  526. // ⎢w[i,0] ... w[i,j] ... w[i,s']⎥
  527. // ⎢ ... ⎥
  528. // ⎣w[s,0] ... w[s,j] ... w[s,s']⎦
  529. // So we swap matrices and transpose A[l-1] to get valid multiplication
  530. // of following view:
  531. // δ[l] A[l-1]
  532. // ⎡δ[0] ⎤ x [A[0] A[1] ... A[i] ... A[s']]
  533. // ⎢δ[1] ⎥
  534. // ⎢ ... ⎥
  535. // ⎢δ[i] ⎥
  536. // ⎢ ... ⎥
  537. // ⎣δ[s] ⎦
  538. weights := &mat.Dense{}
  539. weights.Mul(delta, A[l-1].T())
  540. dB[l] = biases
  541. dW[l] = weights
  542. }
  543. return
  544. }