繁体   English   中英

用PhatomJs登录后如何获得下一页?

[英]How get the next page after login with PhatomJs?

我在这里发现了很多这方面的问题,但不确定为什么他们没有回答。

我在使用以下代码登录后尝试抓取网页: source

var steps=[];
var testindex = 0;
var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var webPage = require('webpage');
var page = webPage.create();
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36';
page.settings.javascriptEnabled = true;
page.settings.loadImages = false;//Script is much faster with this field set to false
phantom.cookiesEnabled = true;
phantom.javascriptEnabled = true;
/*********SETTINGS END*****************/

console.log('All settings loaded, start with execution');
page.onConsoleMessage = function(msg) {
    console.log(msg);
};
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/
steps = [

    //Step 1 - Open Amazon home page
    function(){
        console.log('Step 1 - Abrindo página de login');
        page.open("http://parceriascury.housecrm.com.br", function(status){

        });
    },
    //Step 3 - Populate and submit the login form
    function(){
        console.log('Step 3 - Preenchendo o form');
        page.evaluate(function(){
            document.getElementById("login").value="xxxxx";
            document.getElementById("senha").value="xxxxx";
            document.getElementById("frmlandingpage").submit();
        });
    },
    //Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.
    function(){
        console.log("Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
         var fs = require('fs');
         var result = page.evaluate(function() {
            return document.documentElement.outerHTML;
        });
        fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
    },
];
/**********END STEPS THAT FANTOM SHOULD DO***********************/

//Execute steps one by one
interval = setInterval(executeRequestsStepByStep,5000);

function executeRequestsStepByStep(){
    if (loadInProgress == false && typeof steps[testindex] == "function") {
        //console.log("step " + (testindex + 1));
        steps[testindex]();
        testindex++;
    }
    if (typeof steps[testindex] != "function") {
        console.log("test complete!");
        phantom.exit();
    }
}

/**
 * These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
 * Without this, we will get content of the page, even a page is not fully loaded.
 */
page.onLoadStarted = function() {
    loadInProgress = true;
    console.log('Loading started');
};
page.onLoadFinished = function() {
    loadInProgress = false;
    console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
    console.log(msg);
};

但只有这样的反应:

<html><head></head><body>ok</body></html>

我需要使用URL获取下一页的内容:

http://parceriascury.housecrm.com.br/parceiro_busca

我可以直接访问此页面,但不能访问所有补充,因为它需要登录。

没有错误,我不知道我在哪里犯了错误。

编辑其他解决方案是受欢迎的,我想也许curl ......但是在js加载之后......

对不起,我的英语不好。

这段代码可能更好:

var loadInProgress = false;//This is set to true when a page is still loading

/*********SETTINGS*********************/
var page = require('webpage').create({viewportSize:{width: 1600,height: 900},
settings:{userAgent:'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36',
javascriptEnabled:'true',
loadImages:'false'
}});
var fs = require('fs');
/*********SETTINGS END*****************/
console.log('All settings loaded, start with execution');

/**
 * These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded.
 * Without this, we will get content of the page, even a page is not fully loaded.
 */
page.onLoadStarted = function() {
    loadInProgress = true;
    console.log('Loading started');
};
page.onLoadFinished = function() {
    loadInProgress = false;
    console.log('Loading finished');
};
page.onConsoleMessage = function(msg) {
    console.log(msg);
};

//Log in to your account, then view the cookie you got, now you can use these cookies to login
   // the site will recognize you with your cookies.

//for freebitco.in auth
phantom.cookies = [{// an array of objects
  'name'     : 'btc_address',   
  'value'    : '1AuMxR6sPtB2Z6TkahSnpmm1H4KpYPBKqe',  
  'domain'   : 'freebitco.in',        
  'path'     : '/',
  'httponly' : false,
  'secure'   : true,
  'expires'  : (new Date()).getTime() + (1000 * 60 * 60 * 43800) //5 years 
},{ 'name'     : 'password',   
  'value'    : 'f574ca68a8650d1264d38da4b7687ca3bf631e6dfc59a98c89dd2564c7601f84', 
  'domain'   : 'freebitco.in',        
  'path'     : '/',
  'httponly' : false,
  'secure'   : true,
  'expires'  : (new Date()).getTime() + (1000 * 60 * 60 * 43800) }]

//Execute steps one by one
page.open("http://parceriascury.housecrm.com.br/parceiro_busca", function(status){
console.log('Step 1 has been completed - we are on the target page!');
setTimeout(step2,5000);// Maybe we don't need to wait here, we can execute step2 immediately.
function step2(){
console.log("Step 2 - Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in.");
var result = page.evaluate(function(){ return document.documentElement.outerHTML; });
fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w');
phantom.exit(); 
}
});

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM