[英]Node.js with Express: Push to an empty Array returns an empty Array
[英]Webscraper in Node.js returns empty array with async and promise
我在使nodejs异步时遇到问题,并承诺使用forloop与webscraper一起访问网站。 看了几篇文章并在stackoverflow上测试了不同的解决方案后,我无法使我的异步功能正常工作。 谢谢!
码:
var data = {};
async function run() {
console.log("Setup links..");
var links = ['https://example1.com', 'https://example2.com'];
await Promise.all(links.map(async (element) => {
const contents = await scrape(element);
console.log("After call in Promise: " + JSON.stringify(data));
}));
console.log("------------");
console.log(JSON.stringify(data));
return JSON.stringify(data);
}
async function scrape(element) {
request(element, function (error, response, html) {
console.log("Scrape website...");
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var rowCounter = 0;
var columnCounter = 0;
var dates = [];
var item = [];
var mainTitle = false;
var title;
$('tr td').each(function(i, elem) {
var txt = $(elem).text().trim();
if (rowCounter == 0) {
if (columnCounter != 0) {
dates.push(txt.substring(txt.length - 4, txt.length));
}
} else {
if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
mainTitle = true;
} else {
if (columnCounter == 0) {
title = txt.split(' ').join('');
data[title] = {};
} else {
item.push(txt);
}
}
}
columnCounter++;
if (mainTitle) {
columnCounter = 0;
mainTitle = false;
}
if (columnCounter == 5) {
columnCounter = 0;
if (rowCounter != 0) {
data[title][0] = item[0];
data[title][1] = item[1];
data[title][2] = item[2];
data[title][3] = item[3];
item = [];
}
rowCounter++;
}
});
}
});
}
module.exports.run = run;
上面控制台中的代码:
Server started!
Route called
Setup links..
After call in Promise: {}
After call in Promise: {}
------------
{}
Scrape website...
Scrape website...
因此,使用循环时,promise存在问题。
我相信这就是您想要的(未经测试,只是被黑):
async function scrape(element) {
return new Promise( (resolve, reject ) => {
request(element, function (error, response, html) {
if( error ) return reject( error );
if (response.statusCode != 200) return reject( "Got HTTP code: " + response.statusCode);
console.log("Scrape website...");
var $ = cheerio.load(html);
var rowCounter = 0;
var columnCounter = 0;
var dates = [];
var item = [];
var mainTitle = false;
var title;
$('tr td').each(function(i, elem) {
var txt = $(elem).text().trim();
if (rowCounter == 0) {
if (columnCounter != 0) {
dates.push(txt.substring(txt.length - 4, txt.length));
}
} else {
if (txt == "Current Assets" || txt == "Current Liabilities" || txt == "Stockholders' Equity" || txt == "Revenue" || txt == "Operating Expenses" || txt == "Income from Continuing Operations" || txt == "Non-recurring Events" || txt == "Net Income") {
mainTitle = true;
} else {
if (columnCounter == 0) {
title = txt.split(' ').join('');
data[title] = {};
} else {
item.push(txt);
}
}
}
columnCounter++;
if (mainTitle) {
columnCounter = 0;
mainTitle = false;
}
if (columnCounter == 5) {
columnCounter = 0;
if (rowCounter != 0) {
data[title][0] = item[0];
data[title][1] = item[1];
data[title][2] = item[2];
data[title][3] = item[3];
item = [];
}
rowCounter++;
}
});
resolve();
});
} );
}
将代码包装在Promise
,称为“ resolve
并使用reject
处理错误-但是您最了解如何处理错误。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.