并发：限制goroutines无法按预期工作

Question

I'm working on a search engine project currently. 我目前正在研究搜索引擎项目。 For faster crawl speed I use one goroutine per a link visit. 为了提高搜寻速度，我每次访问链接时都使用一个goroutine。 But I encountered two problems that made me wonder! 但是我遇到了两个问题，这使我感到奇怪！

First one is a code sample: 第一个是代码示例：

package main

import "fmt"
import "sync"
import "time"

type test struct {
    running int
    max     int
    mu      sync.Mutex
}

func main() {
    t := &test{max: 1000}
    t.start()
}

func (t *test) start() {
    for {
        if t.running >= t.max {
            time.Sleep(200 * time.Millisecond)
            continue
        }
        go t.visit()
    }
}

func (t *test) visit() {
    t.inc()
    defer t.dec()
    fmt.Println("visit called")
    fmt.Printf("running: %d, max: %d\n", t.running, t.max)
    fmt.Println()
    time.Sleep(time.Second)
}

func (t *test) inc() {
    t.mu.Lock()
    t.running++
    t.mu.Unlock()
}
func (t *test) dec() {
    t.mu.Lock()
    t.running--
    t.mu.Unlock()
}

Output (cropped): 输出（裁剪）：

running: 2485, max: 1000

running: 2485, max: 1000

running: 2485, max: 1000

visit called
running: 2485, max: 1000

running: 2485, max: 1000

running: 2485, max: 1000

running: 2485, max: 1000


visit called
running: 2485, max: 1000


running: 2485, max: 1000

While I'm explicitly checking for maximum allowed goroutines in the loop, Why running goroutines exceeds the maximum? 在显式检查循环中允许的最大goroutine时，为什么运行goroutine超过了最大值？

Second one is a part of real project code: 第二个是真实项目代码的一部分：

UPDATE: This is actually fixed, the problem was in LinkProvider.Get() implementation that took too long to return. 更新：这实际上是固定的，问题出在LinkProvider.Get()实现中，返回时间太长。 parser.visit() returns in the mean time, but the loop in Parser.Start() is waiting for a new link... and the output seems sequential! parser.visit()在此期间返回，但是Parser.Start()的循环正在等待新链接...并且输出似乎是顺序的！

package worker

import (
    "errors"
    "fmt"
    "sync"
    "time"

    "bitbucket.org/codictive/ise/components/crawler/models"
    "bitbucket.org/codictive/ise/components/log/logger"
    "bitbucket.org/codictive/ise/core/component"
    "bitbucket.org/codictive/ise/core/database"
)

// Worker is a service that processes crawlable links.
type Worker interface {
    Start() error
    Stop() error
    Restart() error
    Status() Status
}

// Status contains runtime status of a worker.
type Status struct {
    Running             bool
    RunningParsersCount int
}

// New return a new defaultWorker with given config.
func New() Worker {
    return &defaultWorker{
        flow: make(chan bool),
        stop: make(chan bool),
    }
}

// defaultWorker is a Worker implementation.
type defaultWorker struct {
    linkProvider         LinkProvider
    handlersLimit        int
    runningHandlersCount int
    running              bool
    mu                   sync.Mutex
    flow                 chan bool
    stop                 chan bool
}

func (w *defaultWorker) init() {
    prate, _ := component.IntConfig("crawler.crawlInterval")
    arate, _ := component.IntConfig("crawler.ad_crawlInterval")
    concLimit, _ := component.IntConfig("crawler.concurrent_workers_limit")
    w.linkProvider = NewLinkProvider(time.Duration(prate)*time.Hour, time.Duration(arate)*time.Hour)
    w.handlersLimit = concLimit
}

// Start runs worker.
func (w *defaultWorker) Start() error {
    logger.Info("Starting crawler worker...")
    w.running = true
    w.init()

    defer func() {
        w.running = false
        logger.Info("Worker stopped.")
    }()

    for {
        select {
        case <-w.stop:
            w.flow <- true
            return nil
        default:
            fmt.Printf("running: %d limit: %d\n", w.runningHandlersCount, w.handlersLimit)
            if w.runningHandlersCount >= w.handlersLimit {
                time.Sleep(200 * time.Millisecond)
                continue
            }

            link := w.linkProvider.Get()
            if link.ID == 0 {
                logger.Debug("no link to crawl")
                time.Sleep(time.Minute)
                continue
            }

            go func(l *models.CrawlLink) {
                go w.visit(l)
            }(link)
        }
    }
}

// Stop stops worker.
func (w *defaultWorker) Stop() error {
    logger.Info("Stopping crawler worker...")
    w.stop <- true
    select {
    case <-w.flow:
        return nil
    case <-time.After(2 * time.Minute):
        return errors.New("worker did not stopped properly")
    }
}

// Restart re-starts worker.
func (w *defaultWorker) Restart() error {
    logger.Info("Re-starting crawler worker...")
    w.stop <- true
    select {
    case <-w.flow:
        return w.Start()
    case <-time.After(2 * time.Minute):
        return errors.New("can not restart worker")
    }
}

// Status reports current worker status.
func (w *defaultWorker) Status() Status {
    return Status{
        Running:             w.running,
        RunningParsersCount: w.runningHandlersCount,
    }
}

func (w *defaultWorker) visit(cl *models.CrawlLink) {
    w.incrementRunningWorkers()
    defer w.decrementRunningWorkers()

    if cl == nil {
        logger.Warning("[crawler.worker.visit] Can not visit a nil link.")
        return
    }
    if err := cl.LoadFull(); err != nil {
        logger.Error("[crawler.worker.visit] Can not load link relations. (%v)", err)
        return
    }

    parser := NewParser(cl)
    if parser == nil {
        logger.Error("[crawler.worker.visit] Parser instantiation failed.")
        return
    }
    before := time.Now()
    if err := parser.Parse(); err != nil {
        cl.Error = err.Error()
        logger.Error("[crawler.worker.visit] Parser finished with error: %v.", err)
        db := database.Open()
        if err := db.Save(&cl).Error; err != nil {
            logger.Error("[crawler.worker.visit] can not update crawl link. (%v)", err)
        }
    }
    logger.Debug("[crawler.worker.visit] Parsing %q took %s.", cl.URL, time.Since(before))
    fmt.Printf("[crawler.worker.visit] Parsing %q took %s.\n", cl.URL, time.Since(before))
}

func (w *defaultWorker) incrementRunningWorkers() {
    w.mu.Lock()
    w.runningHandlersCount++
    w.mu.Unlock()
    fmt.Printf("increment called. current: %d\n", w.runningHandlersCount)
}

func (w *defaultWorker) decrementRunningWorkers() {
    w.mu.Lock()
    w.runningHandlersCount--
    w.mu.Unlock()
    fmt.Printf("decrement called. current: %d\n", w.runningHandlersCount)
}

Output: 输出：

2017/12/03 11:24:36 profile: cpu profiling enabled, /var/folders/1x/01d32mrs2plcj9pnb3mnnrhw0000gn/T/profile924798503/cpu.pprof
running: 0 limit: 1000
Running server on :8080
running: 0 limit: 1000
increment called. current: 1
[crawler.worker.visit] Parsing "https://www.sheypoor.com/%D9%81%D8%B1%D8%A7%D8%B4%D8%A8%D9%86%D8%AF/%D8%A7%D9%85%D9%84%D8%A7%DA%A9/%D9%81%D8%B1%D9%88%D8%B4-%D8%A7%D8%AF%D8%A7%D8%B1%DB%8C-%D9%88-%D8%AA%D8%AC%D8%A7%D8%B1%DB%8C" took 370.140513ms.
decrement called. current: 0
running: 0 limit: 1000
increment called. current: 1
[crawler.worker.visit] Parsing "https://www.sheypoor.com/%D8%B3%D8%A7%D9%85%D8%B3%D9%88%D9%86%DA%AF-s3-neo-24252682.html" took 193.193357ms.
decrement called. current: 0
running: 0 limit: 1000
increment called. current: 1
[crawler.worker.visit] Parsing "https://www.sheypoor.com/%D9%85%DB%8C%D8%B2%D9%88%D8%B5%D9%86%D8%AF%D9%84%DB%8C-%D8%AA%D8%A7%D9%84%D8%A7%D8%B1-22399505.html" took 201.636741ms.
decrement called. current: 0
running: 0 limit: 1000
increment called. current: 1
[crawler.worker.visit] Parsing "https://www.sheypoor.com/50000%D9%85%D8%AA%D8%B1-%D8%B2%D9%85%DB%8C%D9%86-%D9%85%D8%B1%D8%BA%D8%AF%D8%A7%D8%B1%DB%8C-%D9%88%D8%A7%D9%82%D8%B9-%D8%AF%D8%B1-%D8%AE%D8%B1%D9%85%D8%AF%D8%B1%D9%87-23075331.html" took 210.360596ms.
decrement called. current: 0
^C2017/12/03 11:24:43 profile: caught interrupt, stopping profiles
2017/12/03 11:24:43 profile: cpu profiling disabled, /var/folders/1x/01d32mrs2plcj9pnb3mnnrhw0000gn/T/profile924798503/cpu.pprof

As you can see the visit method runs completely sequential! 如您所见， visit方法完全按顺序运行！ Whether I call it with just go visit(link) or the one used in above code. 无论我用go visit(link)还是上面的代码中的那个来调用它。 Why this happens? 为什么会这样？ What is stopping the loop from iterating? 是什么阻止循环迭代？

Answer 1

I would solve this problem using channels and blocking feature - https://play.golang.org/p/KbYOI1oGNs 我会使用渠道和屏蔽功能解决此问题-https: //play.golang.org/p/KbYOI1oGNs

The main change is that we have a channel guard , we put new item there when goroutine is started (and it will block if size reaches limit), release when finished. 主要的变化是我们有一个通道guard ，我们在goroutine启动时将新项目放在那里（如果大小达到限制，它将阻塞），完成后释放。

func (t *test) start() {
    maxGoroutines := t.max
    guard := make(chan struct{}, maxGoroutines)

    for {
        guard <- struct{}{}
        go func() {
            t.visit()
            <-guard
        }()
    }
}

并发：限制goroutines无法按预期工作

问题描述

1 个解决方案

解决方案1
5 已采纳 2017-12-03 08:30:21

并发：限制goroutines无法按预期工作

问题描述

1 个解决方案

解决方案1 5 已采纳 2017-12-03 08:30:21

解决方案1
5 已采纳 2017-12-03 08:30:21