[英]goroutine consuming the same line more than once
目前,我有一个场景,其中我有巨大的文件(例如,我要说50万行文本),其想法是使用工作线程(线程)按每个线程100个的速度处理它们。 运行代码后,我仍然想知道为什么goroutine多次消耗同一行? 我猜想它正在努力完成工作。
这是我的代码
package main
import (
"log"
"bufio"
"fmt"
"encoding/csv"
"encoding/json"
"io"
"os"
"sync"
)
type IMDBDataModel struct {
Color string `json:"color"`
DirectorName string `json:"director_name"`
NumCriticForReviews string `json:"num_critic_for_reviews"`
Duration string `json:"duration"`
DirectorFacebookLikes string `json:"director_facebook_likes"`
Actor3FacebookLikes string `json:"actor_3_facebook_likes"`
Actor2Name string `json:"actor_2_name"`
Actor1FacebookLikes string `json:"actor_1_facebook_likes"`
Gross string `json:"gross"`
Genre string `json:"genres"`
Actor1Name string `json:"actor_1_name"`
MovieTitle string `json:"movie_title"`
NumVotedUser string `json:"num_voted_users"`
CastTotalFacebookLikes string `json:"cast_total_facebook_likes"`
Actor3Name string `json:"actor_3_name"`
FaceNumberInPoster string `json:"facenumber_in_poster"`
PlotKeywords string `json:"plot_keywords"`
MovieIMDBLink string `json:"movie_imdb_link"`
NumUserForReviews string `json:"num_user_for_reviews"`
Language string `json:"language"`
Country string `json:"country"`
ContentRating string `json:"content_rating"`
Budget string `json:"budget"`
TitleYear string `json:"title_year"`
Actor2FacebookLikes string `json:"actor_2_facebook_likes"`
IMDBScore string `json:"imdb_score"`
AspectRatio string `json:"aspect_ratio"`
MovieFacebookLikes string `json:"movie_facebook_likes"`
}
var iterated int64
var out []*IMDBDataModel
func populateString(input []IMDBDataModel, out []*IMDBDataModel, wg *sync.WaitGroup) {
for _ , data := range input {
out = append(out, &data)
}
wg.Done()
}
func consumeData(input <-chan *IMDBDataModel, wg *sync.WaitGroup){
defer wg.Done()
for data := range input {
iterated++
fmt.Printf("%d : %s\n", iterated, data.MovieTitle)
out = append(out, data)
}
fmt.Println("output size : ", len(out))
}
func processCSV(path string) (imdbList []IMDBDataModel){
csvFile, _ := os.Open(path)
reader := csv.NewReader(bufio.NewReader(csvFile))
for {
line, error := reader.Read()
if error == io.EOF {
break
} else if error != nil {
log.Fatal(error)
}
imdbList = append(imdbList,
IMDBDataModel{
Color: line[0],
DirectorName: line[1],
NumCriticForReviews : line[2],
Duration: line[3],
DirectorFacebookLikes: line[4],
Actor3FacebookLikes: line[5],
Actor2Name: line[6],
Actor1FacebookLikes: line[7],
Gross: line[8],
Genre: line[9],
Actor1Name: line[10],
MovieTitle: line[11],
NumVotedUser: line[12],
CastTotalFacebookLikes: line[13],
Actor3Name: line[14],
FaceNumberInPoster: line[15],
PlotKeywords: line[16],
MovieIMDBLink: line[17],
NumUserForReviews: line[18],
Language: line[19],
Country: line[20],
ContentRating: line[21],
Budget: line[22],
TitleYear: line[23],
Actor2FacebookLikes: line[24],
IMDBScore: line[25],
AspectRatio: line[26],
MovieFacebookLikes: line[27],
},
)
}
imdbJson, err := json.Marshal(imdbList)
if err != nil {
log.Println(imdbJson)
}
return
}
func main() {
imdbList := processCSV("movie_metadata.csv")
imdbChannel := make(chan *IMDBDataModel, 100) // buffer
var wg sync.WaitGroup
for i := 0; i < 5;i++ {
wg.Add(1)
go consumeData(imdbChannel,&wg)
}
for _ ,task := range imdbList {
imdbChannel <- &task
}
close(imdbChannel)
wg.Wait()
// for _, item := range out {
// fmt.Println(item.MovieTitle)
// }
fmt.Println("Total Channel :", len(imdbChannel))
fmt.Println("Total IMDB :", len(imdbList))
fmt.Println("Total Data: ", len(out))
fmt.Println("Iterated : ", iterated)
fmt.Println("Goroutines finished..")
}
编辑:在添加互斥锁和另一个通道的一些建议后,这是修改后的消耗函数
func consumeData(input <-chan *IMDBDataModel, output chan *IMDBDataModel, wg *sync.WaitGroup) {
defer wg.Done()
for data := range input {
iterated++
// outLock.Lock()
// out = append(out, data)
// outLock.Unlock()
output <- data
}
}
但是仍然消耗同一条线(发生种族)不止一次。
....
My Date with Drew
My Date with Drew
My Date with Drew
My Date with Drew
My Date with Drew
Total Channel : 0
Total IMDB : 5044
Total Data: 4944
Iterated : 5000
Goroutines finished..
您的问题是:
var out []*IMDBDataModel
func consumeData(input <-chan *IMDBDataModel, wg *sync.WaitGroup){
defer wg.Done()
for data := range input {
iterated++
fmt.Printf("%d : %s\n", iterated, data.MovieTitle)
out = append(out, data)
}
fmt.Println("output size : ", len(out))
}
您将从多个线程附加到“输出”:
尝试在写入“ out”的地方周围添加一个锁,如下所示:
var out []*IMDBDataModel
var outLock sync.Mutex
func consumeData(input <-chan *IMDBDataModel, wg *sync.WaitGroup){
defer wg.Done()
for data := range input {
iterated++
fmt.Printf("%d : %s\n", iterated, data.MovieTitle)
outLock.Lock()
out = append(out, &data)
outLock.Unlock()
}
outLock.Lock()
fmt.Println("output size : ", len(out))
outLock.Unlock()
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.