[英]Upload Node Request-Response Response To MongoDB
我在 Node.js 中玩弄 Cheerio。 我有一個抓取文章列表,抓取所有文章 URL,然后轉到每篇文章並抓取標題和 url。 一切正常,除了當我嘗試將結果更新到我的 Mongodb 時,我得到未定義。
我假設它在定義值之前嘗試 upsert ......但即使使用請求響應我也無法讓它工作。 任何幫助將不勝感激! 由於代碼不太長,我將粘貼整個內容,以便更容易看到我要做什么。 同樣,主要問題是讓upsertArticle
實際更新變量。
const request = require('request');
const cheerio = require('cheerio');
const rp = require('request-promise');
const mongoose = require('mongoose');
const Article = require('./models/article');
var urls = [];
//get the list of articles to scrape
rp('https://www.somesite.com/', function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
$('.c-entry-box--compact__title').each(function(i, element) {
var a = $(this);
urls.push(a.children().attr('href'));
}); } })
//scrape over each article individually
.then(function(getStuff) {
var arrayLength = urls.length;
//get the list of articles to scrape and upsert each one
for (var i = 0; i < arrayLength; i++) {
const result = rp(urls[i], function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var parsedResults = [];
$('.l-main-content').each(function(n, element) {
var a = $(this);
var title = a.find('.c-page-title').text();
var url = response.request.uri.href;
//I also tried upserting the variables right here, that didn't work
return { title, url };
});
} else {console.log(error);}
}).then(function(upsertStuff) {
//also tried returning and upserting stuff here... but nothing gets upserted
upsertArticle({
title: result.title,
source: result.url,
dateCrawled: new Date()
});
console.log('Upserted ' + result.title);
}).catch(function(err) {console.log(err); }); }
}) .catch(function(err) {console.log(err); });
function upsertArticle(userObj) {
const DB_URL = 'mongodb://localhost/articles';
if (mongoose.connection.readyState == 0) {
mongoose.connect(DB_URL, {
useMongoClient: true
});
}
let conditions = {
title: userObj.title
};
let options = {
upsert: true,
new: true,
setDefaultsOnInsert: true
};
Article.findOneAndUpdate(conditions, userObj, options, (err, result) => {
if (err) throw err;
});
}
我對提供的代碼進行了一些更改。 也就是說,我使用承諾而不是回調來讓您的邏輯保持一致並確保一切都在應該運行的時候運行。
對於for
循環,我將upsertArticle({...})
移回each
函數內部,以便在運行時定義title
和url
。
最后,我使用 Bluebird 的Promise.all
( request-promise
已經依賴於 Bluebird)在所有鏈接都已插入時發出信號。 此更改是可選的,但我認為在一切完成后獲得反饋會很有用:
試試這個:
const request = require('request');
const cheerio = require('cheerio');
const rp = require('request-promise');
const mongoose = require('mongoose');
const Article = require('./models/article');
const Promise = require("bluebird");
var urls = [];
rp({uri: 'https://www.somesite.com', resolveWithFullResponse: true}).then(function(response) {
if(response.statusCode != 200) throw "Response: " + response.statusCode;
var html = response.body;
var $ = cheerio.load(html);
$('.c-entry-box--compact__title').each(function(i, element) {
var a = $(this);
urls.push(a.children().attr('href'));
});
}).then(function(getStuff) {
var arrayLength = urls.length;
var promiseArray = [];
for(var i = 0; i < arrayLength; i++) {
const p = rp({uri: urls[i], resolveWithFullResponse: true}).then(function(response) {
if(response.statusCode != 200) throw "Response: " + response.statusCode;
var html = response.body;
var $ = cheerio.load(html);
var parsedResults = [];
$('.l-main-content').each(function(n, element) {
var a = $(this);
var title = a.find('.c-page-title').text();
var url = response.request.uri.href;
upsertArticle({
title: title,
source: url,
dateCrawled: new Date()
});
console.log('Upserted ' + title);
});
});
promiseArray.push(p);
}
return Promise.all(promiseArray);
}).then(function() {
console.log("Done upserting!");
})
.catch(function(err) {
console.log(err);
});
function upsertArticle(userObj) {
const DB_URL = 'mongodb://localhost/articles';
if (mongoose.connection.readyState == 0) {
mongoose.connect(DB_URL, {
useMongoClient: true
});
}
let conditions = {
title: userObj.title
};
let options = {
upsert: true,
new: true,
setDefaultsOnInsert: true
};
Article.findOneAndUpdate(conditions, userObj, options, (err, result) => {
if (err) throw err;
});
}
我無法在不知道https://www.somesite.com
的真實值的情況下測試代碼,所以如果代碼給您帶來任何新錯誤, https://www.somesite.com
告訴我。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.