繁体   English   中英

使用 strconv 将 csv 数据([]string)转换为 float64,然后对数据求和

[英]Converting csv data ([]string) to float64 with strconv then summing the data

我正在尝试对从以前的 function 创建的 CSV 文件中的数据求和。 这是该文件的一个片段:

datetime,open,high,low,close,volume
2020-11-09 00:00,69.58,137.45,69.00,100.00,273517274.00
2020-11-10 00:00,104.65,128.80,101.75,107.00,141284399.00
2020-11-11 00:00,109.00,114.45,96.76,98.42,96648953.00
2020-11-12 00:00,95.98,106.60,89.15,90.00,149794913.00

[关于上下文:这是来自雅虎财经的劳斯莱斯控股股票价格的历史价格数据。 我计划使用多达 200 行]。

我面临的问题是将[]string数据从 CSV 转换为 float64。 ParseFloat() function 试图转换我的标题,显然不能,因为它是“无效的语法”。 这是错误代码:

Error converting string: strconv.ParseFloat: parsing "open": invalid syntaxError converting string: strconv.ParseFloat: parsing "high": invalid syntaxError converting string: strconv.ParseFloat: parsing "low": invalid syntaxError converting string: strconv.ParseFloat: parsing "close": invalid syntaxError converting string: strconv.ParseFloat: parsing "volume": invalid syntax&{ 0 0 0 0 0}

我的代码如下供参考:

package main

import (
    "encoding/csv"
    "fmt"
    "log"
    "os"
    "strconv"
)

const file = "./rr.csv"

// Data struct is the data from the csv file
type Data struct {
    datetime string
    open     float64
    high     float64
    low      float64
    close    float64
    volume   float64
}

func readAmounts(r []string) (d *Data, err error) {
    d = new(Data)
    open := r[1]
    d.open, err = strconv.ParseFloat(open, 64)
    if err != nil {
        fmt.Printf("Error converting string: %v", err)
    }
    high := r[2]
    d.high, err = strconv.ParseFloat(high, 64)
    if err != nil {
        fmt.Printf("Error converting string: %v", err)
    }
    low := r[3]
    d.low, err = strconv.ParseFloat(low, 64)
    if err != nil {
        fmt.Printf("Error converting string: %v", err)
    }
    close := r[4]
    d.close, err = strconv.ParseFloat(close, 64)
    if err != nil {
        fmt.Printf("Error converting string: %v", err)
    }
    volume := r[5]
    d.volume, err = strconv.ParseFloat(volume, 64)
    if err != nil {
        fmt.Printf("Error converting string: %v", err)
    }
    return d, nil
}

func main() {
    csvFile, err := os.Open(file)
    if err != nil {
        log.Fatal(err)
    }

    r := csv.NewReader(csvFile)
    lines, err := r.Read()
    if err != nil {
        log.Fatal(err)
    }

    data, err := readAmounts(lines)
    if err != nil {
        fmt.Printf("Error reading file: %v", err)
    }
    fmt.Println(data)

}

我只是打印数据以查看 ParseFloat() 是否有效,然后我将创建一个 function 来对列求和。 所以,我要问的是; 我如何忽略 header 行,而只阅读数字行。

仅供参考:我已经阅读了其他答案(例如:FieldsPerRecord),但它们并没有解决我的具体问题,因为我随后试图对整个列求和。

您可以将文件加载到 CSV 中,然后逐行处理文件:

package main

import (
    "bytes"
    "encoding/csv"
    "fmt"
    "io/ioutil"
    "strconv"
)

const file = "./data.csv"

// Data struct is the data from the csv file
type Data struct {
    datetime string
    open     float64
    high     float64
    low      float64
    close    float64
    volume   float64
}

func main() {

    f, err := ioutil.ReadFile(file)
    if err != nil {
        panic(err)
    }
    rawData, err := readCsv(f, ',')
    if err != nil {
        panic(err)
    }

    amounts, err := readAmounts(rawData[1:])
    if err != nil {
        panic(err)
    }
    fmt.Printf("%+v\n", amounts)

}

func readAmounts(r [][]string) ([]Data, error) {
    var d []Data = make([]Data, len(r))
    var err error
    for i, row := range r {

        d[i].datetime = row[0]
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }
        d[i].open, err = strconv.ParseFloat(row[1], 64)
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }
        d[i].high, err = strconv.ParseFloat(row[2], 64)
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }
        d[i].low, err = strconv.ParseFloat(row[3], 64)
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }
        d[i].close, err = strconv.ParseFloat(row[4], 64)
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }

        d[i].volume, err = strconv.ParseFloat(row[5], 64)
        if err != nil {
            fmt.Printf("Error converting string: %v", err)
        }
    }
    return d, nil

}

func readCsv(data []byte, separator rune) ([][]string, error) {
    csvReader := csv.NewReader(bytes.NewReader(data))
    csvReader.Comma = separator
    lines, err := csvReader.ReadAll()
    if err != nil {
        return nil, err
    }
    return lines, nil
}

output 示例:

[{datetime:2020-11-09 00:00 open:69.58 high:137.45 low:69 close:100 volume:2.73517274e+08} {datetime:2020-11-10 00:00 open:104.65 high:128.8 low:101.75 close:107 volume:1.41284399e+08} {datetime:2020-11-11 00:00 open:109 high:114.45 low:96.76 close:98.42 volume:9.6648953e+07} {datetime:2020-11-12 00:00 open:95.
98 high:106.6 low:89.15 close:90 volume:1.49794913e+08}]

笔记:

您可以找到一些使用CSV库的代码示例,您可以查看以下存储库: https://github.com/alessiosavi/GoSFTPtoS3

我已经注释掉了这个程序,这样很容易理解。 基本思想是忽略 header。 此外,当您索引和获取记录的字段时; 最好检查记录中存在的字段数( FieldsPerRecord )。

package main

import (
    "encoding/csv"
    "errors"
    "fmt"
    "io"
    "log"
    "os"
    "strconv"
)

// file stores the filepath
const file = "./rr.csv"

// Data store metadata
type Data struct {
    datetime string
    open     float64
    high     float64
    low      float64
    close    float64
    volume   float64
}

// s2f converts string to float64
func s2f(str string) (float64, error) {
    f, err := strconv.ParseFloat(str, 64)
    if err != nil {
        return 0, fmt.Errorf("Error converting string \"%v\" to float", err)
    }
    return f, nil
}

// ReadAmounts processes the fields from the record and stores them in Data
func ReadAmounts(r []string) (*Data, error) {

    var (
        dt     = r[0]
        open   = r[1]
        high   = r[2]
        low    = r[3]
        close  = r[4]
        volume = r[5]

        d   = new(Data)
        err error
    )

    d.datetime = dt

    d.open, err = s2f(open)
    if err != nil {
        return nil, err
    }

    d.high, err = s2f(high)
    if err != nil {
        return nil, err
    }

    d.low, err = s2f(low)
    if err != nil {
        return nil, err
    }

    d.close, err = s2f(close)
    if err != nil {
        return nil, err
    }

    d.volume, err = s2f(volume)
    if err != nil {
        return nil, err
    }

    return d, nil
}

func main() {
    // Open the file
    file, err := os.Open(file)
    if err != nil {
        log.Fatalln(err)
    }

    // CSV Reader
    r := csv.NewReader(file)
    // Set Options for the reader
    {
        r.Comma = ','             // Delimiter
        r.TrimLeadingSpace = true // Trim the leading spaces
        r.FieldsPerRecord = 0     // Rows should have same number of columns as header
        r.ReuseRecord = true      // Reuse the same backing array (Efficient)
    }

    // Alternatively, r.ReadAll() could be also used and slicing it using [1:] ignores
    // the header as well.

    // Ignore header
    _, _ = r.Read()

    for {
        // Read record (one by one)
        record, err := r.Read()
        if err != nil {
            // Exit out. Done!
            if errors.Is(err, io.EOF) {
                break
            }
            // Log and continue
            log.Printf("Error reading record: %v\n", err)
            continue
        }

        // Process
        data, err := ReadAmounts(record)
        if err != nil {
            // Log and continue
            fmt.Printf("Error reading record: %v\n", err)
            continue
        }

        // Print the filled Data struct
        fmt.Printf("Record: %+v\n", *data)
    }
}

Output:

Record: {datetime:2020-11-09 00:00 open:69.58 high:137.45 low:69 close:100 volume:2.73517274e+08}
Record: {datetime:2020-11-10 00:00 open:104.65 high:128.8 low:101.75 close:107 volume:1.41284399e+08}
Record: {datetime:2020-11-11 00:00 open:109 high:114.45 low:96.76 close:98.42 volume:9.6648953e+07}
Record: {datetime:2020-11-12 00:00 open:95.98 high:106.6 low:89.15 close:90 volume:1.49794913e+08}

一些不同的选择:

  1. 跳过解析第一行。 这假设每个文件都以 header 开头。

  2. 跳过有解析错误的行。 最简单的方法,但是当 go 错误时很难调试。

  3. 如果第一行有解析错误,请跳过它,因为它可能是 header 行。

附带说明一下,您应该正确处理代码中的错误,而您目前没有这样做。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM