简体   繁体   中英

Parsing multiple pages of website and count total items

My script simply gathers the number of reports on a page, then goes to the next page and does the same. The goal is to get the total number of reports across multiple pages.

UPDATED

var casper = require('casper').create({
    clientScripts: ["./lib/jquery-2.1.3.min.js"],
    // verbose: true,
    logLevel: "debug"
});

casper.on('remote.message', function(msg) {
    this.echo('LOG: ' + msg);
});

casper.on('page.error', function (msg, trace) {
    this.echo( 'Error: ' + msg, 'ERROR' );
});

var reportCount, newReportCount, reportPages;

casper.start("reports.html", function() {

    reportPages = this.evaluate(function() {
        return $('#table2 tbody tr td').children('a').length -1;
  });

  //first page of reports
  reportCount = this.evaluate(function() {
      return $('#table1 tbody').first().children('tr').length;
  });

  this.echo('initial count: ' + reportCount);
  this.echo('pages: ' + reportPages);

  //check if more than 1 page and add report count
  if (reportPages > 1) {
    newReportCount = this.thenOpen('reports2.html', function(){
        var newCount = this.evaluate(function(count) {
            add = count + $('#table1 tbody').first().children('tr').length;
            // console.log('new count inside: ' + add);
            return add;
        }, reportCount);
        console.log(newCount); //this shows correct new value 32
    });
    console.log(newReportCount); //this shows [object Casper]

    neoReportCount = this.thenOpen('reports3.html', function(count){
        console.log(newReportCount); //this shows [object Casper]
        //do the same count
    }, newReportCount);
  }

casper.run();

Here is the output in console

Pages: 3
First count: 15
[object Casper], currently at file:///**/reports.html
32
[object Casper], currently at file:///**/reports3.html

Yes, it is possible, but you use casper.thenOpenAndEvaluate() which has the word then in it. It means that this function is asynchronous and it returns the casper object to enable a builder/promise pattern. So you cannot return anything from a function like this. Since it is asynchronous, it will be executed after the current step ends, which is after console.log(newCount); .

You would need to split the function, for example like this:

//check if more than 1 page and add report count
if (reportPages > 1) {
  var newCount;
  this.thenOpen('reports2.html', function(count){
    newCount = this.evaluate(function(count){
      add = count + $('#table1 tbody').first().children('tr').length;
      console.log('new count inside: ' + add);
      return add;
    }, reportCount);
    console.log(newCount);
  }).thenOpen('reports3.html', function(count){
    newCount += this.evaluate(function(count){
      add = count + $('#table1 tbody').first().children('tr').length;
      console.log('new count inside: ' + add);
      return add;
    }, reportCount);
    console.log(newCount);
  }).then(function(){
    console.log(newCount);
  });
}

It seems like you want to loop over multiple pages. This is usually done recursively, because CasperJS is asynchronous and you don't know beforehand how many pages you need to open. I suggest you look at this question for some examples: CasperJS loop or iterate through multiple web pages?

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM