繁体   English   中英

PhantomJS:网页加载失败

[英]PhantomJS: Web-page loading failure

尝试使用PhantomJS获取http://gibdd.ru屏幕截图。 但是,到目前为止,没有任何帮助。

我有:

  • Ubuntu 14.04
  • PhantomJS 2.0.0版
  • PhantomJS v 2.1.1(没有帮助)

gibdd.js:

var page = require('webpage').create();
var system = require('system');

page.onResourceRequested = function (request) {
  system.stderr.writeLine('= onResourceRequested()');
  system.stderr.writeLine('  request: ' + JSON.stringify(request, undefined, 4));
};

page.onResourceReceived = function(response) {
  system.stderr.writeLine('= onResourceReceived()' );
  system.stderr.writeLine('  id: ' + response.id + ', stage: "' + response.stage + '", response: ' + JSON.stringify(response));
};

page.onLoadStarted = function() {
  system.stderr.writeLine('= onLoadStarted()');
  var currentUrl = page.evaluate(function() {
    return window.location.href;
  });
  system.stderr.writeLine('  leaving url: ' + currentUrl);
};

page.onLoadFinished = function(status) {
  system.stderr.writeLine('= onLoadFinished()');
  system.stderr.writeLine('  status: ' + status);
};

page.onNavigationRequested = function(url, type, willNavigate, main) {
  system.stderr.writeLine('= onNavigationRequested');
  system.stderr.writeLine('  destination_url: ' + url);
  system.stderr.writeLine('  type (cause): ' + type);
  system.stderr.writeLine('  will navigate: ' + willNavigate);
  system.stderr.writeLine('  from page\'s main frame: ' + main);
};

page.onResourceError = function(resourceError) {
  system.stderr.writeLine('= onResourceError()');
  system.stderr.writeLine('  - unable to load url: "' + resourceError.url + '"');
  system.stderr.writeLine('  - error code: ' + resourceError.errorCode + ', description: ' + resourceError.errorString );
};

page.onError = function(msg, trace) {
  system.stderr.writeLine('= onError()');
  var msgStack = ['  ERROR: ' + msg];
  if (trace) {
    msgStack.push('  TRACE:');
    trace.forEach(function(t) {
      msgStack.push('    -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : ''));
    });
  }
  system.stderr.writeLine(msgStack.join('\n'));
};

page.open('http://gibdd.ru/', function(status, b, c) {
    page.render('g.png');
    console.log("DONE WITH ", status, page);
    phantom.exit();
    });

上面代码的控制台输出:

= onNavigationRequested
  destination_url: http://gibdd.ru/
  type (cause): Other
  will navigate: true
  from page's main frame: true
= onResourceRequested()
  request: {
    "headers": [
        {
            "name": "Accept",
            "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        },
        {
            "name": "User-Agent",
            "value": "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1"
        }
    ],
    "id": 1,
    "method": "GET",
    "time": "2016-05-18T16:35:37.982Z",
    "url": "http://gibdd.ru/"
}
= onLoadStarted()
  leaving url: about:blank
= onResourceReceived()
  id: 1, stage: "start", response: {"bodySize":302,"contentType":"text/html; charset=iso-8859-1","headers":[{"name":"Date","value":"Wed, 18 May 2016 16:33:07 GMT"},{"name":"Server","value":"Apache/2.2.22 (Ubuntu)"},{"name":"Location","value":"http://www.gibdd.ru/"},{"name":"Vary","value":"Accept-Encoding"},{"name":"Content-Encoding","value":"gzip"},{"name":"Keep-Alive","value":"timeout=5, max=100"},{"name":"Connection","value":"Keep-Alive"},{"name":"Content-Type","value":"text/html; charset=iso-8859-1"}],"id":1,"redirectURL":"http://www.gibdd.ru/","stage":"start","status":301,"statusText":"Moved Permanently","time":"2016-05-18T16:35:38.176Z","url":"http://gibdd.ru/"}
= onNavigationRequested
  destination_url: http://www.gibdd.ru/
  type (cause): Other
  will navigate: true
  from page's main frame: true
= onResourceRequested()
  request: {
    "headers": [
        {
            "name": "Accept",
            "value": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
        },
        {
            "name": "User-Agent",
            "value": "Mozilla/5.0 (Unknown; Linux x86_64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.0.0 Safari/538.1"
        }
    ],
    "id": 2,
    "method": "GET",
    "time": "2016-05-18T16:35:38.182Z",
    "url": "http://www.gibdd.ru/"
}
= onResourceReceived()
  id: 1, stage: "end", response: {"contentType":"text/html; charset=iso-8859-1","headers":[{"name":"Date","value":"Wed, 18 May 2016 16:33:07 GMT"},{"name":"Server","value":"Apache/2.2.22 (Ubuntu)"},{"name":"Location","value":"http://www.gibdd.ru/"},{"name":"Vary","value":"Accept-Encoding"},{"name":"Content-Encoding","value":"gzip"},{"name":"Keep-Alive","value":"timeout=5, max=100"},{"name":"Connection","value":"Keep-Alive"},{"name":"Content-Type","value":"text/html; charset=iso-8859-1"}],"id":1,"redirectURL":"http://www.gibdd.ru/","stage":"end","status":301,"statusText":"Moved Permanently","time":"2016-05-18T16:35:38.185Z","url":"http://gibdd.ru/"}
= onResourceError()
  - unable to load url: "http://www.gibdd.ru/"
  - error code: 4, description: Socket operation timed out
= onResourceReceived()
  id: 2, stage: "end", response: {"contentType":null,"headers":[],"id":2,"redirectURL":null,"stage":"end","status":null,"statusText":null,"time":"2016-05-18T16:36:38.586Z","url":"http://www.gibdd.ru/"}
= onLoadFinished()
  status: fail
DONE WITH  fail WebPage(name = "WebPage")
= onNavigationRequested
  destination_url: about:blank
  type (cause): Other
  will navigate: true
  from page's main frame: true

该网页似乎阻止了与PhantomJS相关项目的连接,因为CasperJS不能很好地工作。

我尝试将标头更改为:

page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36';

并没有帮助。

反正有绕过它吗? 该脚本可以与其他任何网页正常工作。

使用PhantomJS 2.1.1尝试了脚本,即使没有欺骗用户代理也可以正常工作: screenshot

尝试的事情:

  • 升级到v.2.1.1
  • 从其他IP连接

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM