简体   繁体   English

CasperJS,试图刮擦一张桌子

[英]CasperJS, trying to scrape a table

This function should extract all table rows but it doesn't work. 此函数应提取所有表行,但不起作用。 It makes no output. 它没有输出。

var casper = require("casper").create({
pageSettings: {
    userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
},
verbose: true,
logLevel: 'debug'
});


var url = 'http://cnt.rm.ingv.it/';
casper.start(url);//
casper.waitForSelector('#dataTablesEvents', processPage, stopScript);
casper.run();


var stopScript = function() {
     casper.echo("STOPPING SCRIPT").exit();
};

var processPage = function() {

    pageData = this.evaluate(getPageData);

    if (this.exists('a[rel="next"]') == false) {
        stopScript();
    }

    this.thenClick('a[rel="next"]').then(function() {
        this.waitForSelector("#dataTablesEvents", processPage, stopScript);
    });
 };

function getPageData(){

   var rows = casper.evaluate(function(){
       return document.querySelectorAll("table tbody tr");
   });

   return rows;
}

I try to debug and this is the result: 我尝试调试,这是结果:

[debug] [phantom] opening url: http://cnt.rm.ingv.it/, HTTP GET
[debug] [phantom] Navigation requested: url=http://cnt.rm.ingv.it/, 
type=Other, willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "http://cnt.rm.ingv.it/"
[debug] [phantom] Successfully injected Casper client-side utilities
[debug] [phantom] start page is loaded
[info] [phantom] Step _step 3/3 http://cnt.rm.ingv.it/ (HTTP 200)
[info] [phantom] Step _step 3/3: done in 945ms.
[info] [phantom] waitFor() finished in 40ms.
[info] [phantom] Done 3 steps in 1003ms
[debug] [phantom] Navigation requested: url=about:blank, type=Other, 
willNavigate=true, isMainFrame=true
[debug] [phantom] url changed to "about:blank"

I'm not able to understand well this.. It's like WaitForSelector doesn't start.. Any Help ? 我对此不太了解。就像WaitForSelector无法启动。任何帮助吗?

here is a way which should work for you: 这是一种应该为您工作的方法:

var casper = require('casper').create();
var url = 'http://cnt.rm.ingv.it/';
var length;

casper.start(url);

casper.then(function() {
    this.waitForSelector('table#dataTablesEvents');
});

function getCellContent(row, cell) {
    cellText = casper.evaluate(function(row, cell) {
        return document.querySelectorAll('table tbody tr')[row].childNodes[cell].innerText.trim();
    }, row, cell);
    return cellText;
}

casper.then(function() {
    var rows = casper.evaluate(function() {
        return document.querySelectorAll('table tbody tr');
    });
    length = rows.length;
    this.echo("table length: " + length);
});

// This part can be done nicer, but it's the way it should work ...
casper.then(function() {
    for (var i = 0; i < length; i++) {
        this.echo("Data: " + getCellContent(i, 1));
        this.echo("Magnitudo: " + getCellContent(i, 3));
        this.echo("Zona: " + getCellContent(i, 5));
        this.echo("Profondità: " + getCellContent(i, 7));
        this.echo("Latitudine: " + getCellContent(i, 9));
        this.echo("Longitudine: " + getCellContent(i, 11));
    }
});

casper.run();

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM