[英]Data race fail to understand
動機:我有一個巨大的 JSON 文件,我打算對其進行解析和處理。
現在,我確信會有一些庫會這樣做,但我想自己這樣做是為了更好地理解 Go 的並發結構。
所以我的目標是使用scanner
讀取文件並將數據泵入[]byte chan
,如下所示:
// Not the actual code.
for scanner.Scan() {
input <- []byte(scanner.Text())
}
我要求超過 1 個 go-routine 從input
chan 接收數據並解組 JSON 並返回結果(編組是否成功)並顯示進度條
// not the actual code.
for {
bytes := <- input
if err := json.Unmarshal(bytes); err != nil {
errorchan <- true
} else {
successchan <- true
}
progress <- size_of_byte(bytes)
}
// now have other go-routine to handle errorchan, successchan and progress thing.
在紙上看起來一切都是合乎邏輯的,但是當我設法組裝代碼(如下所示)時,我看到了數據競爭,我盡力了解數據競爭是如何發生的,但不能(因為我刪除了我們在之前的代碼)
workers 0xc0000c2000
Completed 0.000000==================
WARNING: DATA RACE
Read at 0x00c0000c2048 by goroutine 8:
mongo_import/race-d.readFile()
/Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:197 +0x6ff
mongo_import/race-d.TestReadJson()
/Users/admin/Documents/goProject/src/mongo_import/race-d/main_test.go:8 +0x47
testing.tRunner()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:909 +0x199
Previous write at 0x00c0000c2048 by goroutine 12:
mongo_import/race-d.(*Worker).trackSuccess()
/Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:103 +0xc0
Goroutine 8 (running) created at:
testing.(*T).Run()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:960 +0x651
testing.runTests.func1()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:1202 +0xa6
testing.tRunner()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:909 +0x199
testing.runTests()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:1200 +0x521
testing.(*M).Run()
/usr/local/Cellar/go/1.13.7/libexec/src/testing/testing.go:1117 +0x2ff
main.main()
_testmain.go:44 +0x223
Goroutine 12 (running) created at:
mongo_import/race-d.(*Worker).Start()
/Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:72 +0x15f
==================
--- FAIL: TestReadJson (1.18s)
testing.go:853: race detected during execution of test
FAIL
FAIL mongo_import/race-d 1.192s
FAIL
測試 package 中的數據競賽對我來說是新事物。
但我無法理解為什么這會導致數據競爭(這對我來說毫無意義)
Previous write at 0x00c0000c2048 by goroutine 12: mongo_import/race-d.(*Worker).trackSuccess() /Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:103 +0xc0 Goroutine 12 (running) created at: mongo_import/race-d.(*Worker).Start() /Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:72 +0x15f
代碼:
這是代碼的樣子
package main
import (
"bufio"
"encoding/binary"
"encoding/json"
"fmt"
"log"
"os"
"sync"
"time"
)
// thread that does that job of unmarshal
type Thread struct {
w *Worker
}
// Run the individual thread and process the bytes
// read for worter.input chan
func (thread Thread) Run() {
for {
bytes, ok := <-thread.w.input
if !ok {
return
}
var data map[string]interface{}
if err := json.Unmarshal(bytes, &data); err != nil {
thread.w.errorChan <- true
} else {
thread.w.successChan <- true
}
thread.w.progress <- int64(binary.Size(bytes))
// do other thing
// like insert in db etc.
}
}
// worker that
type Worker struct {
errmutex sync.Mutex
succmutex sync.Mutex
progmutex sync.Mutex
wg sync.WaitGroup
done bool
workers int
fileSize int64
completedByte int64
errorCount int
successCount int
input chan []byte
progress chan int64
errorChan chan bool
successChan chan bool
}
// NewWorker
func NewWorker(count int) *Worker {
return &Worker{workers: count}
}
// start the worker
func (w *Worker) Start() {
fmt.Printf("workers %p\n", w)
w.wg.Add(1)
go w.display()
w.wg.Add(1)
go w.trackProgress()
w.wg.Add(1)
go w.trackSuccess()
w.wg.Add(1)
go w.trackError()
w.wg.Add(1)
go w.Spawn()
w.wg.Wait()
}
// add the error count
func (w *Worker) trackError() {
w.wg.Done()
for {
_, ok := <-w.errorChan
if !ok {
return
}
w.errmutex.Lock()
w.errorCount = w.errorCount + 1
w.errmutex.Unlock()
}
}
// add the success count
func (w *Worker) trackSuccess() {
defer w.wg.Done()
for {
_, ok := <-w.successChan
if !ok {
return
}
w.succmutex.Lock()
w.successCount += 1
w.succmutex.Unlock()
}
}
// spawn individual thread to process the bytes
func (w *Worker) Spawn() {
defer w.wg.Done()
defer w.clean()
var wg sync.WaitGroup
for i := 0; i < w.workers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
Thread{w: w}.Run()
}()
}
wg.Wait()
}
// close the other open chan
func (w *Worker) clean() {
close(w.errorChan)
close(w.successChan)
close(w.progress)
}
// close the input chan
func (w *Worker) Done() {
close(w.input)
}
// sum the total byte we have processed
func (w *Worker) trackProgress() {
defer w.wg.Done()
for {
read, ok := <-w.progress
if !ok {
w.done = true
return
}
w.progmutex.Lock()
w.completedByte += read
w.progmutex.Unlock()
}
}
// display the progress bar
func (w *Worker) display() {
defer w.wg.Done()
for !w.done {
w.progmutex.Lock()
percentage := (float64(w.completedByte) / float64(w.fileSize)) * 100
w.progmutex.Unlock()
fmt.Printf("\r Completed %f", percentage)
time.Sleep(5 * time.Second)
}
}
func readFile(path string) map[string]int {
handler, err := os.Open(path)
if err != nil {
log.Fatal(err)
}
defer handler.Close()
worker := &Worker{workers: 2}
worker.input = make(chan []byte, 2)
worker.progress = make(chan int64, 1)
worker.errorChan = make(chan bool, 1)
worker.successChan = make(chan bool, 1)
if fi, err := handler.Stat(); err != nil {
log.Fatal(err)
} else {
worker.fileSize = fi.Size()
}
scanner := bufio.NewScanner(handler)
go worker.Start()
for scanner.Scan() {
worker.input <- []byte(scanner.Text())
}
worker.Done()
if err := scanner.Err(); err != nil {
log.Fatal(err)
return nil
}
return map[string]int{
"error": worker.errorCount,
"success": worker.successCount,
}
}
func main() {
readFile("dump.json")
}
和測試代碼
package main // main_test.go
import (
"testing"
)
func TestReadJson(t *testing.T) {
data := readFile("dump2.json")
if data == nil {
t.Error("we got a nil data")
}
}
這里是示例dump2.json
數據
{"name": "tutorialspoint10"}
{"name":"tutorialspoint2", "age": 15}
{"name":"tutorialspoint3", "age": 25}
{"name":"tutorialspoint4", "age": 28}
{"name":"tutorialspoint5", "age": 40}
{"name": "tutorialspoint6"}
{"name":"tutorialspoint8", "age": 7}
{"name":"tutorialspoint4", "age": 55}
{"name":"tutorialspoint1","age":4}
{"name":"tutorialspoint2"}
最后,我知道這里發布的代碼必須是極簡的,但我盡力保持代碼極簡(從原始項目中提取)。 我不確定如何(或目前能夠)進一步減少它。
您需要在 main.go:197 行有一個讀鎖
"success": worker.successCount,
正如日志所說。 您嘗試閱讀,而另一個 go-routine 嘗試寫入。 /Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:197
一個簡短的解釋:
https://dev.to/wagslane/golang-mutexes-what-is-rwmutex-for-57a0
在這種情況下,使用 Atomic 可能會更好。 https://gobyexample.com/atomic-counters
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.