[英]Unable to find elements using Cheerio (Node.js/jQuery)
我正在尝试编写一个脚本,以从用户的公共Codecademy个人资料中抓取用户的成就。 当前,这就是我试图解析数据的方式(后端,使用Cheerio的Node.js):
var express = require('express');
var cheerio = require('cheerio');
var request = require('request');
var app = express();
app.get('/scrape/:username', function(req, res){
var user = req.params.username;
console.log('Processing request for ' + user);
url = 'http://www.codecademy.com/' + user + '/achievements';
request(url, function(error, response, body){
if (!error){
var $ = cheerio.load(body);
var title, date
var json = {
achievements: [],
meta: {
request: {
user: user,
time: Date.now(),
},
server: {
version: 1,
contact: 'benedict@ovalbit.com'
}
}
};
console.log('Running parser and scraping achievements.');
$('.achievement-card').each(function() {
var data = $(this);
title = data.children('h5').text();
date = data.find('small.text--ellipsis').text();
console.log('Title: ' + title);
console.log('Date: ' + date);
json.achievements.push({
title: title,
date: date
});
});
res.type('application/json');
res.json(json);
}
});
});
app.listen('3006');
console.log('Running on port 3006.');
作为参考, 这是一个什么样的个人资料页的样子:
但是,我的每个函数都无法运行。 有什么想法吗?
您似乎缺少了用户名之前的/users/
部分,因此生成的URL错误。 我只是在示例成就页面上使用了该选择器,它对我有用(节点v0.10.30,cheerio v0.17.0,请求v2.46.0):
var request = require('request'),
cheerio = require('cheerio');
var url = 'http://www.codecademy.com/users/BenedictLewis/achievements';
request(url, function(err, res, body) {
var $ = cheerio.load(body);
$('.achievement-card').each(function() {
var data = $(this);
title = data.children('h5').text();
date = data.find('small.text--ellipsis').text();
console.log('Title: ' + title);
console.log('Date: ' + date + '\n');
});
});
输出:
Title: Introduction to 'For' Loops in JS Date: Feb 1, 2014 Title: 50 Exercises Date: Feb 1, 2014 Title: Build "Rock, Paper, Scissors" Date: Jan 18, 2014 Title: Introduction to Functions in JS Date: Jan 18, 2014 Title: 25 points earned in one day Date: Jan 11, 2014 Title: Choose Your Own Adventure! Date: Jan 11, 2014 Title: 25 Exercises Date: Jan 11, 2014 Title: Getting Started with Programming Date: Jan 11, 2014 Title: 10 Exercises Date: Jan 11, 2014 Title: First Lesson Date: Jan 11, 2014 Title: Max Streak Count of 1 Date: Jan 11, 2014
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.