It's the first time I use Node.js and Express. I would create a web scraping. This is my project structure:
WebScrape:
|_ bin
|_ node_modules
|_ public
|_ routes
|_ view
|_ app.js
|_ package.js
|_ package-lock.json
I've created a scrape.js
file inside routes
directory:
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var app = express();
app.get('/scrape', function(req, res) {
// the URL we will scrape from - in our example Anchorman 2
url = 'http://www.imdb.com/title/tt1229340/';
/**
* The structure of our request call.
* The first parameter is our URL.
* The callback function takes 3 parameters: an error, a response status code and the html.
*/
request(url, function(error, response, html) {
// check to make sure no errors occurred when making the request
if(!error) {
// utilize the cheerio library on the returned html which will essentially give us jQuery functionality
var $ = cheerio.load(html);
// finally, we'll define the variables we're going to capture
var title, release, rating;
var json = { title : "", release : "", rating : ""};
}
}) // end request
}) // end get
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
How can I test it? Is this the right place to put it?
var express = require('express');
var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var router = express.Router();
router.get('/scrape', function(req, res) {
// the URL we will scrape from - in our example Anchorman 2
url = 'http://www.imdb.com/title/tt1229340/';
/**
* The structure of our request call.
* The first parameter is our URL.
* The callback function takes 3 parameters: an error, a response status code and the html.
*/
request(url, function(error, response, html) {
// check to make sure no errors occurred when making the request
if(!error) {
// utilize the cheerio library on the returned html which will essentially give us jQuery functionality
var $ = cheerio.load(html);
// finally, we'll define the variables we're going to capture
var title, release, rating;
var json = { title : "", release : "", rating : ""};
}
}) // end request
}) // end get
exports = module.exports = router;
Generally, app.js
listens on ports for requests. You use express.Router
to further extend and add routes in seperate router files.
In app.js
you have to do this to actually add the routes:
const routes = require('./routes/scraper.js');
// app is the express() app
app.use(routes);
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.