简体   繁体   中英

Delay each loop iteration in node js, async

I have the code below:

var request = require('request');
var cheerio = require ("cheerio");
var async= require("async");

var MyLink="www.mylink.com";

    async.series([

        function(callback){
            request(Mylink, function (error, response, body) {
                if (error) return callback(error); 
                var $ = cheerio.load(body);
                //Some calculations where I get NewUrl variable...
                TheUrl=NewUrl;
                callback();
            });
        },
        function(callback){
            for (var i = 0; i <=TheUrl.length-1; i++) {
                var url = 'www.myurl.com='+TheUrl[i];
                request(url, function(error, resp, body) { 
                    if (error) return callback(error); 
                    var $ = cheerio.load(body);
                    //Some calculations again...
                    callback();
                });
            };
        }
      ], function(error){
        if (error) return next(error);
    });

Does anyone have a suggestion about how I can delay each loop iteration in the for loop ? Say, the code waits 10 seconds after each iteration is complete. I tried setTimeout but didn't manage that to work.

You can set a timeout for the execution of the code at increasing intervals like this:

var interval = 10 * 1000; // 10 seconds;

for (var i = 0; i <=TheUrl.length-1; i++) {
    setTimeout( function (i) {
        var url = 'www.myurl.com='+TheUrl[i];
        request(url, function(error, resp, body) { 
            if (error) return callback(error); 
            var $ = cheerio.load(body);
            //Some calculations again...
            callback();
        });
    }, interval * i, i);
}

So the first one runs right away (interval * 0 is 0), second one runs after ten seconds, etc.

You need to send i as the final parameter in the setTimeout() so that its value is bound to the function argument. Otherwise the attempt to access the array value will be out of bounds and you will get undefined .

Another alternative would be to use async.eachSeries . For example:

async.eachSeries(TheUrl, function (eachUrl, done) {
    setTimeout(function () {
        var url = 'www.myurl.com='+eachUrl;
        request(url, function(error, resp, body) { 
            if (error) return callback(error); 
            var $ = cheerio.load(body);
            //Some calculations again...
            done();
        });
    }, 10000);
}, function (err) {
    if (!err) callback();
});

Delaying multiple page fetches with async/await

I am a big fan of the async library and I've used for a long time. However, now there's async/await . Your code becomes easier to read. For instance, this would be your main function:

const urls = await fetchUrls(INITIAL_URL);

for (const url of urls) {
    await sleep(10000);
    const $ = await fetchPage(url);
    // do stuff with cheerio-processed page
}

Much better, isn't it? Before I get into the details of how fetchPage() and fetchUrls() work, let's first answer your question of how to wait before fetching the next page. The sleep function is pretty straightforward:

async function sleep(millis) {
    return new Promise(resolve => setTimeout(resolve, millis));
}

You can get a full explanation of how it works in my other answer here .

Ok, back to the other functions. The request library has a promise-enabled version of it that you can use with async/await . Let's check how's fetchPage() implemented:

async function fetchPage(url) {
    return await request({
        url: url,
        transform: (body) => cheerio.load(body)
    });
}

Since request is returning a promise, we can await on it. I also took the chance to use the transform property which allows us to tranform the response body before resolving the promise. I'm passing it through Cheerio, just like you did in your code.

Finally, fetchUrls() can just call fetchPage() and process it to fetch your array of URLs before resolving its promise. Here's the full code:

const
    request = require("request-promise-native"),
    cheerio = require("cheerio");

const
    INITIAL_URL = "http://your-initial-url.com";

/**
 * Asynchronously fetches the page referred to by `url`.
 *
 * @param {String} url - the URL of the page to be fetched
 * @return {Promise} promise to a cheerio-processed page
 */
async function fetchPage(url) {
    return await request({
        url: url,
        transform: (body) => cheerio.load(body)
    });
}

/**
 * Your initial fetch which will bring the list of URLs your looking for.
 *
 * @param {String} initialUrl - the initial URL
 * @return {Promise<string[]>} an array of URL strings
 */
async function fetchUrls(initialUrl) {
    const $ = await fetchPage(initialUrl);
    // process $ here and get urls
    return ["http://foo.com", "http://bar.com"];
}

/**
 * Clever way to do asynchronous sleep. 
 * Check this: https://stackoverflow.com/a/46720712/778272
 *
 * @param {Number} millis - how long to sleep in milliseconds
 * @return {Promise<void>}
 */
async function sleep(millis) {
    return new Promise(resolve => setTimeout(resolve, millis));
}

async function run() {
    const urls = await fetchUrls(INITIAL_URL);
    for (const url of urls) {
        await sleep(10000);
        const $ = await fetchPage(url);
        // do stuff with cheerio-processed page
    }
}

run();

To use request with promises, install it like this:

npm install request
npm install request-promise-native

And then require("request-promise-native") in your code, like in the example above.

Since you're already using async , async.wilst would do nicely as a replacement for for .

whilst is an asynchronous while -like function. Each iteration is only run after the previous iteration has called its completion callback. In this case, we can simply postpone execution of the completion callback by 10 seconds with setTimeout .

var i = 0;
async.whilst(
    // test to perform next iteration
    function() { return i <= TheUrl.length-1; },

    // iterated function
    // call `innerCallback` when the iteration is done
    function(innerCallback) {
        var url = 'www.myurl.com='+TheUrl[i];
        request(url, function(error, resp, body) { 
            if (error) return innerCallback(error); 
            var $ = cheerio.load(body);
            //Some calculations again...

            // wait 10 secs to run the next iteration
            setTimeout(function() { i++; innerCallback(); }, 10000);
        });
    },

    // when all iterations are done, call `callback`
    callback
);

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM