簡體   English   中英

Node.js中的Webscraper返回帶有異步和Promise的空數組

[英]Webscraper in Node.js returns empty array with async and promise

我在使nodejs異步時遇到問題,並承諾使用forloop與webscraper一起訪問網站。 看了幾篇文章並在stackoverflow上測試了不同的解決方案后,我無法使我的異步功能正常工作。 謝謝!

碼:

var data = {};

async function run() {
    console.log("Setup links..");
    var links = ['https://example1.com', 'https://example2.com'];

    await Promise.all(links.map(async (element) => {
        const contents = await scrape(element);
        console.log("After call in Promise: " + JSON.stringify(data));
    }));

    console.log("------------");
    console.log(JSON.stringify(data));
    return JSON.stringify(data);
}

async function scrape(element) {
    request(element, function (error, response, html) {
        console.log("Scrape website...");
        if (!error && response.statusCode == 200) {
            var $ = cheerio.load(html);
            var rowCounter = 0;
            var columnCounter = 0;
            var dates = [];
            var item = [];
            var mainTitle = false;
            var title;

            $('tr td').each(function(i, elem) {
                var txt = $(elem).text().trim();
                if (rowCounter == 0) {
                    if (columnCounter != 0) {
                        dates.push(txt.substring(txt.length - 4, txt.length));
                    }
                } else {
                    if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
                        mainTitle = true;
                    } else {
                        if (columnCounter == 0) {
                            title = txt.split(' ').join('');
                            data[title] = {};
                        } else {
                            item.push(txt);
                        }
                    }
                }

                columnCounter++;

                if (mainTitle) {
                    columnCounter = 0;
                    mainTitle = false;
                }

                if (columnCounter == 5) {
                    columnCounter = 0;
                    if (rowCounter != 0) {
                        data[title][0] = item[0];
                        data[title][1] = item[1];
                        data[title][2] = item[2];
                        data[title][3] = item[3];
                        item = [];
                    } 
                    rowCounter++;
                }
            });
        }
    });   
}

module.exports.run = run;

上面控制台中的代碼:

Server started!
Route called
Setup links..
After call in Promise: {}
After call in Promise: {}
------------
{}
Scrape website...
Scrape website...

因此,使用循環時,promise存在問題。

我相信這就是您想要的(未經測試,只是被黑):

async function scrape(element) {
return new Promise( (resolve, reject ) => {
    request(element, function (error, response, html) {
        if( error ) return reject( error );
        if (response.statusCode != 200) return reject( "Got HTTP code: " + response.statusCode);

        console.log("Scrape website...");
        var $ = cheerio.load(html);
        var rowCounter = 0;
        var columnCounter = 0;
        var dates = [];
        var item = [];
        var mainTitle = false;
        var title;

        $('tr td').each(function(i, elem) {
            var txt = $(elem).text().trim();
            if (rowCounter == 0) {
                if (columnCounter != 0) {
                    dates.push(txt.substring(txt.length - 4, txt.length));
                }
            } else {
                if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
                    mainTitle = true;
                } else {
                    if (columnCounter == 0) {
                        title = txt.split(' ').join('');
                        data[title] = {};
                    } else {
                        item.push(txt);
                    }
                }
            }

            columnCounter++;

            if (mainTitle) {
                columnCounter = 0;
                mainTitle = false;
            }

            if (columnCounter == 5) {
                columnCounter = 0;
                if (rowCounter != 0) {
                    data[title][0] = item[0];
                    data[title][1] = item[1];
                    data[title][2] = item[2];
                    data[title][3] = item[3];
                    item = [];
                } 
                rowCounter++;
            }
        });
        resolve();
    });   
} );

}

將代碼包裝在Promise ,稱為“ resolve並使用reject處理錯誤-但是您最了解如何處理錯誤。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM