简体   繁体   中英

Node.js - Looping through array of URLS one at a time

I am a beginner at node js and I'm trying to write a web scraping script. I got permission from the site admin to scrape their products if I make less then 15 requests a minute. When I started out it used to request all the URLs at once but after some tooling around, I was able to go through each item in the array, but the script doesn't stop when there is no more items in the array? I'm not really happy with my result and feel like there is a better way to do this.

    var express = require('express');
    var fs = require('fs');
    var request = require('request');
    var cheerio = require('cheerio');
    var app     = express();
    var async = require('async');

app.get('/scrape', function(req, res){
productListing = ['ohio-precious-metals-1-ounce-silver-bar','morgan-1-ounce-silver-bar']
var i = 0;
async.eachLimit(productListing, 1, function (product, callback) {
    var getProducts = function () {
        var url = 'http://cbmint.com/' + productListing[i];
        request(url, function(error, response, html) {
            if(!error){
                var $ = cheerio.load(html);

                var title;
                var json = { title : ""};

                $('.product-name').filter(function(){
                    var data = $(this);
                    title = data.children().children().first().text();

                    json.title = title;
                })
            }
            var theTime = new Date().getTime();
            console.log(i);
            console.log(json.title);
            console.log(theTime);
            i++;
        });
    }
    setInterval(getProducts,10000); 
})
res.send('Check your console!')
})

app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app; 

You aren't calling callback inside the iterator function. Take a look at the docs for eachLimit .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM