繁体   English   中英

在NodeJS中使用嵌套回调很麻烦

[英]Trouble using nested callbacks in NodeJS

我正在编写一个程序,该程序将站点刮取为链接,然后对这些链接进行刮取以获取信息。 为了抓取该站点,必须先登录。 顺序如下:登录->删除链接索引->删除链接以获取信息

登录函数的回调会输出一个空数组{ results: [], hasMore: true } ,所以我的代码有问题(抓取部分有效):

var request = require('request');
var request = request.defaults({jar: true}); // necessary for persistent login
var cheerio = require('cheerio');

var url1 = "https://example.org/torrents/browse/index/";
var loginUrl = "https://example.org/user/account/login/";

var credentials = {
    username: 'user1',
    password: 'passpass'
};

login(function (result) {
    console.log(result);
});

function login(callback) {
    request.post({
        uri: loginUrl,
        headers: { 'content-type': 'application/x-www-form-urlencoded' },
        body: require('querystring').stringify(credentials)
    }, function(err, res, body){
        if(err) {
            console.log("Login error");
            return;
        }
        scrapeTorrents(url1, function (result) {
            callback(result);
        });
    });
}

function scrapeTorrents(url, callback) {
    request(url, function(err, res, body) {
        if(err) {
            console.log("Main scrape error");
            return;
        }
        var links = []
        var $ = cheerio.load(body);
        $('span.title').each(function(i, element){
            var title = $(this);
            var a = $(this).children().eq(0);
            var detailsUrl = a.attr('href');
            //console.log(detailsUrl);
            links.push(detailsUrl);
        });
         scrapeTorrentDetails(links, function (result) {
             callback(result);
         });
    });
}

function scrapeTorrentDetails(links, callback) {
    var results = [];

    function getDetails(url) {
        request(url, function(err, res, body) {
                if(err) {
                    console.log("Detail scrape error");
                    return;
                }
                console.log("Scraping: " + url);
                var $ = cheerio.load(body);
                var tds = $('td');
                var title = $(tds).get(1).firstChild.data;
                var hash = $(tds).get(3).firstChild.data.trim();
                var size = $(tds).get(9).firstChild.data;
                //  console.log(tds.length);
                if (tds.length > 23) {
                    var rlsDate = $(tds).get(23).firstChild.data || '';;
                    var genres = $(tds).get(27).firstChild.data || '';;
                    var runtime = $(tds).get(31).firstChild.data || '';;
                    if ( $(tds).get(33).firstChild != null) {
                        var plot = $(tds).get(33).firstChild.data || '';;
                    }
                    var rating = $('#imdb_rating').parent().next().text() || '';; // of 10
                    var imdb_id = $('[name=imdbID]').get(0).attribs.value || '';;
                    var cover = $('#cover').children().eq(0).get(0).attribs.href || '';;
                    var thumb = $('[alt=Cover]').get(0).attribs.src || '';;
                    if (typeof cover == 'undefined') {
                        cover = thumb;
                    }
                } else {
                    var rlsDate = "notfound";
                    var genres = "notfound";
                    var runtime = "notfound";
                    var plot = "notfound";
                    var rating = "notfound"; // of 10
                    var imdb_id = "notfound";
                    var cover = "notfound";
                    var thumb = "notfound";
                }

                var movie = {
                    type: 'movie',
                    imdb_id: imdb_id,
                    title: title,
                    year: rlsDate,
                    genre: genres,
                    rating: rating,
                    runtime: runtime,
                    image: thumb,
                    cover: cover,
                    synopsis: plot,
                    torrents: {
                        magnet: 'magnet:?xt=urn:btih:' + hash + '&tr=http://tracker.example.org:2710/a/announce',
                        filesize: size
                    }
                };

                results.push(movie);
            });
    }

    for (var i=0; i<links.length; i++){
            getDetails("https://example.org" + links[i]);
    }

    callback( {
        results: results,
        hasMore: true
    });
}

也许Q承诺会更好。 如何在上面的代码中实现呢?

如果您想知道代码的用途,我打算修改Popcorn-time以使用另一个torrent-tracker(不带API)。

谢谢

一个主要问题是与此代码:

for (var i=0; i<links.length; i++){
        getDetails("https://example.org" + links[i]);
}

callback( {
    results: results,
    hasMore: true
});

getDetails()是异步的,但是您只需将其links.length次并继续前进即可-就像它们都已完成一样。 因此,在调用回调并尝试传递结果之前, getDetails()中的所有请求getDetails()完成。 但是,尚未填写任何结果,因此它们将为空。

您的代码中到处都有所有其他嵌套的回调(根据需要),但是您将球放在了这个地方。 您需要知道何时完成所有getDetails()调用,然后再调用带有结果的最终回调。

此外,您还必须确定是否可以并行调用所有getDetails()调用(一次都在运行中),或者您真正想做的是调用一个,等待其完成,然后调用接下来,等等...现在,如果目标服务器不一次拒绝那么多请求,那么您可以一次将它们全部进行传输。


有几种解决此问题的潜在策略。

  1. getDetails()添加一个回调,然后保留从getDetails()获得links.length回调的计数,并且仅在整个计数完成后才进行计数,因此您可以调用最终的回调。

  2. 更改getDetails()以返回承诺。 然后,您可以使用类似links.map(getDetails)方法创建一个links.map(getDetails)数组,然后可以使用Promise.all()来了解它们何时完成。

就个人而言,我会更改您的所有代码以使用Promise.map()并使用Bluebird Promise.map()库,因为它具有Promise.map()这样的额外功能,可以使此操作更加简单。

这是一个向getDetails()添加回调然后计算完成次数的修复程序:

function scrapeTorrentDetails(links, callback) {
    var results = [];

    function getDetails(url, done) {
        request(url, function(err, res, body) {
                if(err) {
                    console.log("Detail scrape error");
                    done(err);
                    return;
                }
                console.log("Scraping: " + url);
                var $ = cheerio.load(body);
                var tds = $('td');
                var title = $(tds).get(1).firstChild.data;
                var hash = $(tds).get(3).firstChild.data.trim();
                var size = $(tds).get(9).firstChild.data;
                //  console.log(tds.length);
                if (tds.length > 23) {
                    var rlsDate = $(tds).get(23).firstChild.data || '';;
                    var genres = $(tds).get(27).firstChild.data || '';;
                    var runtime = $(tds).get(31).firstChild.data || '';;
                    if ( $(tds).get(33).firstChild != null) {
                        var plot = $(tds).get(33).firstChild.data || '';;
                    }
                    var rating = $('#imdb_rating').parent().next().text() || '';; // of 10
                    var imdb_id = $('[name=imdbID]').get(0).attribs.value || '';;
                    var cover = $('#cover').children().eq(0).get(0).attribs.href || '';;
                    var thumb = $('[alt=Cover]').get(0).attribs.src || '';;
                    if (typeof cover == 'undefined') {
                        cover = thumb;
                    }
                } else {
                    var rlsDate = "notfound";
                    var genres = "notfound";
                    var runtime = "notfound";
                    var plot = "notfound";
                    var rating = "notfound"; // of 10
                    var imdb_id = "notfound";
                    var cover = "notfound";
                    var thumb = "notfound";
                }

                var movie = {
                    type: 'movie',
                    imdb_id: imdb_id,
                    title: title,
                    year: rlsDate,
                    genre: genres,
                    rating: rating,
                    runtime: runtime,
                    image: thumb,
                    cover: cover,
                    synopsis: plot,
                    torrents: {
                        magnet: 'magnet:?xt=urn:btih:' + hash + '&tr=http://tracker.example.org:2710/a/announce',
                        filesize: size
                    }
                };

                results.push(movie);
                done();
            });
    }

    var doneCnt = 0;
    for (var i=0; i<links.length; i++){
        getDetails("https://example.org" + links[i], function() {
            ++doneCnt;
            if (doneCnt === links.length) {
                callback( {
                    results: results,
                    hasMore: true
                });
            }
        });
    }

}

以下是重写的给定示例代码,以使用bind,自定义此对象以及尚未完成的请求计数(我认为承诺掩盖了执行路径)。

回调返回空数组的原因似乎是文档中没有带有title属性的跨度,因此没有触发更多请求。

 var request = require('request').defaults({ jar: true }), // necessary for persistent login cheerio = require('cheerio'), process = require('process'), url1 = "https://example.org/torrents/browse/index/", loginUrl = "https://example.org/user/account/login/", login = function(callback) { request.post({ uri: loginUrl, headers: { 'content-type': 'application/x-www-form-urlencoded' }, body: require('querystring').stringify({ username: 'user1', password: 'passpass' }) }, fna.bind({ callback: callback })); }, fna = function(err, res, body) { if (err) { console.log("Login error"); return; } request(url1, fnb.bind(this)); }, fnb = function(err, res, body) { if (err) { console.log("Main scrape error"); return; } var $ = cheerio.load(body), links = [], fnd = fne.bind(this); $('span.title').each(function() { links.push($(this).children().first().attr('href')); }); this.results = []; this.resultCount = links.length; if (this.resultCount) { fnd = fnc.bind(this); for (var i = 0; i < links.length; i++) { request("https://example.org" + links[i], fnd); } } else { process.nextTick(fnd); } }, fnc = function(err, res, body) { if (err) { console.log("Detail scrape error"); return; } console.log("Scraping: " + url); var $ = cheerio.load(body), tds = $('td'), title = $(tds).get(1).firstChild.data, hash = $(tds).get(3).firstChild.data.trim(), size = $(tds).get(9).firstChild.data, rlsDate = "notfound", genres = "notfound", runtime = "notfound", plot = "notfound", rating = "notfound", // of 10 imdb_id = "notfound", cover = "notfound", thumb = "notfound"; if (tds.length > 23) { rlsDate = $(tds).get(23).firstChild.data || ''; genres = $(tds).get(27).firstChild.data || ''; runtime = $(tds).get(31).firstChild.data || ''; if ($(tds).get(33).firstChild != null) { plot = $(tds).get(33).firstChild.data || ''; } rating = $('#imdb_rating').parent().next().text() || ''; // of 10 imdb_id = $('[name=imdbID]').get(0).attribs.value || ''; cover = $('#cover').children().eq(0).get(0).attribs.href || ''; thumb = $('[alt=Cover]').get(0).attribs.src || ''; if (typeof cover == 'undefined') { cover = thumb; } } this.results.push({ type: 'movie', imdb_id: imdb_id, title: title, year: rlsDate, genre: genres, rating: rating, runtime: runtime, image: thumb, cover: cover, synopsis: plot, torrents: { magnet: 'magnet:?xt=urn:btih:' + hash + '&tr=http://tracker.example.org:2710/a/announce', filesize: size } }); this.resultCount--; if (this.resultCount === 0) { this.callback({ results: this.results, hasMore: true }); } }, fne = function() { this.callback({ results: this.results, hasMore: true }); }; login(function(result) { console.log(result); }); 

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM