/* * MIT License * * Copyright (c) 2019 Alexey Edelev * * This file is part of NeuralNetwork project https://git.semlanik.org/semlanik/NeuralNetwork * * Permission is hereby granted, free of charge, to any person obtaining a copy of this * software and associated documentation files (the "Software"), to deal in the Software * without restriction, including without limitation the rights to use, copy, modify, * merge, publish, distribute, sublicense, and/or sell copies of the Software, and * to permit persons to whom the Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be included in all copies * or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR * PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package training import ( "bufio" "fmt" "log" "math/rand" "os" "strconv" "strings" "sync" "time" mat "gonum.org/v1/gonum/mat" ) type TextDataReader struct { dataSet []*mat.Dense result []*mat.Dense dataCount int mutex *sync.Mutex } func NewTextDataReader(filename string, validationPart int) *TextDataReader { r := &TextDataReader{ mutex: &sync.Mutex{}, } r.readData(filename) r.dataCount = int((float64(len(r.dataSet)) * float64(100.0 - validationPart)) / 100.0) return r } func (r *TextDataReader) readData(filename string) { inputFile, err := os.Open(filename) if err != nil { log.Fatal(err) } defer inputFile.Close() scanner := bufio.NewScanner(inputFile) scanner.Split(bufio.ScanLines) var results []string var uniqueResults []string var max []float64 for scanner.Scan() { dataLine := scanner.Text() data := strings.Split(dataLine, ",") dataSetSize := len(data) - 1 if len(max) <= 0 { max = make([]float64, dataSetSize) } if dataSetSize != len(max) { fmt.Printf("Garbage record: %s\n", dataLine) continue } var dataRaw []float64 for i := 0; i < dataSetSize; i++ { val, err := strconv.ParseFloat(data[i], 64) if err != nil { break } dataRaw = append(dataRaw, val) if max[i] < val { max[i] = val } } if len(dataRaw) < dataSetSize { fmt.Printf("Garbage record: %s\n", dataLine) continue } r.dataSet = append(r.dataSet, mat.NewDense(dataSetSize, 1, dataRaw)) found := false for _, uniqueResult := range uniqueResults { if uniqueResult == data[dataSetSize] { found = true break } } if !found { uniqueResults = append(uniqueResults, data[dataSetSize]) } results = append(results, data[dataSetSize]) } for i, result := range results { k := 0 for k, _ = range uniqueResults { if uniqueResults[k] == result { break } } r.result = append(r.result, mat.NewDense(len(uniqueResults), 1, nil)) r.result[i].Set(k, 0, 1.0) } //normalize for i := 0; i < len(r.dataSet); i++ { r.dataSet[i].Apply(func(r, _ int, val float64) float64 { return val / max[r] }, r.dataSet[i]) } rand.Seed(time.Now().UnixNano()) for k := 0; k < 25; k++ { rand.Shuffle(len(r.dataSet), func(i, j int) { r.result[i], r.result[j] = r.result[j], r.result[i] r.dataSet[i], r.dataSet[j] = r.dataSet[j], r.dataSet[i] }) } } func (r *TextDataReader) GetData(i int) (*mat.Dense, *mat.Dense) { if i >= r.dataCount { return nil, nil } return r.dataSet[i], r.result[i] } func (r *TextDataReader) DataCount() int { return r.dataCount; } func (r *TextDataReader) GetValidator(i int) (*mat.Dense, *mat.Dense) { if i >= len(r.result) - r.dataCount { return nil, nil } return r.dataSet[r.dataCount + i], r.result[r.dataCount + i] } func (r *TextDataReader) ValidatorCount() int { return len(r.result) - r.dataCount; }