/* * MIT License * * Copyright (c) 2019 Alexey Edelev * * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork * * Permission is hereby granted, free of charge, to any person obtaining a copy of this * software and associated documentation files (the "Software"), to deal in the Software * without restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, and * to permit persons to whom the Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be included in all copies * or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package training import ( "bufio" "fmt" "log" "math/rand" "os" "strconv" "strings" "sync" "time" mat "gonum.org/v1/gonum/mat" ) type TextDataReader struct { dataSet []*mat.Dense result []*mat.Dense index int validationIndex int validationCount int mutex *sync.Mutex } func NewTextDataReader(filename string, validationPart int) *TextDataReader { r := &TextDataReader{ index: 0, validationIndex: 0, mutex: &sync.Mutex{}, } r.readData(filename) r.validationCount = len(r.dataSet) / validationPart r.validationIndex = len(r.dataSet) - r.validationCount return r } func (r *TextDataReader) readData(filename string) { inputFile, err := os.Open(filename) if err != nil { log.Fatal(err) } defer inputFile.Close() scanner := bufio.NewScanner(inputFile) scanner.Split(bufio.ScanLines) var results []string var uniqueResults []string var max []float64 for scanner.Scan() { dataLine := scanner.Text() data := strings.Split(dataLine, ",") dataSetSize := len(data) - 1 if len(max) <= 0 { max = make([]float64, dataSetSize) } if dataSetSize != len(max) { fmt.Printf("Garbage record: %s\n", dataLine) continue } var dataRaw []float64 for i := 0; i < dataSetSize; i++ { val, err := strconv.ParseFloat(data[i], 64) if err != nil { break } dataRaw = append(dataRaw, val) if max[i] < val { max[i] = val } } if len(dataRaw) < dataSetSize { fmt.Printf("Garbage record: %s\n", dataLine) continue } r.dataSet = append(r.dataSet, mat.NewDense(dataSetSize, 1, dataRaw)) found := false for _, uniqueResult := range uniqueResults { if uniqueResult == data[dataSetSize] { found = true break } } if !found { uniqueResults = append(uniqueResults, data[dataSetSize]) } results = append(results, data[dataSetSize]) } for i, result := range results { k := 0 for k, _ = range uniqueResults { if uniqueResults[k] == result { break } } r.result = append(r.result, mat.NewDense(len(uniqueResults), 1, nil)) r.result[i].Set(k, 0, 1.0) } //normalize for i := 0; i < len(r.dataSet); i++ { r.dataSet[i].Apply(func(r, _ int, val float64) float64 { return val / max[r] }, r.dataSet[i]) } rand.Seed(time.Now().UnixNano()) for k := 0; k < 25; k++ { rand.Shuffle(len(r.dataSet), func(i, j int) { r.result[i], r.result[j] = r.result[j], r.result[i] r.dataSet[i], r.dataSet[j] = r.dataSet[j], r.dataSet[i] }) } } func (r *TextDataReader) GetData() (*mat.Dense, *mat.Dense) { // r.mutex.Lock() // defer r.mutex.Unlock() return r.dataSet[r.index], r.result[r.index] } func (r *TextDataReader) NextData() bool { // r.mutex.Lock() // defer r.mutex.Unlock() if (r.index + 1) >= len(r.result)-r.validationCount { r.index = 0 return false } r.index++ return true } func (r *TextDataReader) GetValidator() (*mat.Dense, *mat.Dense) { return r.dataSet[r.validationIndex], r.result[r.validationIndex] } func (r *TextDataReader) NextValidator() bool { if (r.validationIndex + 1) >= len(r.dataSet) { r.validationIndex = len(r.dataSet) - r.validationCount return false } r.validationIndex++ return true } func (r *TextDataReader) Reset() { r.index = 0 r.validationIndex = len(r.dataSet) - r.validationCount } func (r *TextDataReader) Index() int { return r.index } func (r *TextDataReader) ValidationIndex() int { return r.validationIndex } func (r *TextDataReader) GetDataByIndex(i int) (*mat.Dense, *mat.Dense) { if i >= len(r.result)-r.validationCount { return nil, nil } return r.dataSet[i], r.result[i] } func (r *TextDataReader) GetDataCount() int { return len(r.dataSet) - r.validationCount }