簡體   English   中英

如何讓 puppeteer 探索頁面上的鏈接,直到找到登錄表單?

[英]How can I get puppeteer to explore the links on a page until it finds a login form?

我正在編寫一個腳本,它將動態查找並填寫給定 URL 的登錄表單。 只要登錄表單可以在給定 URL 的第一頁上找到,它就可以工作(在一些站點上)。

在找不到登錄表單的情況下,我希望腳本探索頁面上的每個鏈接,直到找到一個。 檢查每個鏈接,然后返回到最初的 URL。 我只需要 go 一層深,我認為不太可能找到比這更深的登錄表單,我不需要它來處理 100% 的 URL。

如果您現在想查看代碼:

const puppeteer = require('puppeteer');
const C = require('./constants');
var USERNAME_SELECTOR;
var PASSWORD_SELECTOR;
var CTA_SELECTOR;
const URLL = process.argv[2];
var usernameFieldWasFound = false;



async function startBrowser() {
  const browser = await puppeteer.launch({
  headless: false})
  const page = await browser.newPage();
  return {browser, page};
}



async function closeBrowser(browser) {
  return browser.close();
}



(async () => {
  await playTest(URLL);
  process.exit(1);
})();


async function playTest(url) {
  const {browser, page} = await startBrowser();
  page.setViewport({width: 1366, height: 768});
  await page.goto(url);

  await delay(5000);


  if (await page.$('#si_username') !== null) {
    USERNAME_SELECTOR = '#si_username';
    console.log ('Changed username field selector to #si_username')
    usernameFieldWasFound = true; }
 
  if (await page.$('[name="username"]') !== null) {
    USERNAME_SELECTOR = '[name="username"]' 
  console.log ('Changed username field selector to name=username with quotes')
  usernameFieldWasFound = true; }

  if (await page.$('#username') !== null) {
    USERNAME_SELECTOR = '#username';
    console.log ('Changed username field selector to #username')
    usernameFieldWasFound = true; }

  if (await page.$('#email') !== null) {
    USERNAME_SELECTOR = '#email';
    console.log ('Changed username field selector to #email')
    usernameFieldWasFound = true; }


  if (usernameFieldWasFound == false) {
    console.log('No username field was found, exploring links...')

// this is where I need to add code to explore each link and then go back to original URL

  }  

 

  await page.waitForSelector(USERNAME_SELECTOR);

  await page.click(USERNAME_SELECTOR);
  await page.keyboard.type(C.username);





  if (await page.$('#next') !== null) {
    CTA_SELECTOR = '#next';
    console.log ('Changed button selector to #next')
    await page.click(CTA_SELECTOR);}
  else console.log('not found');






  if (await page.$('#si_password') !== null) {
    PASSWORD_SELECTOR = '#si_password';
    console.log ('Changed password field selector to #si_password')}
  else console.log('not found');

  if (await page.$('#password') !== null) {
    PASSWORD_SELECTOR = '#password';
    console.log ('Changed password field selector to #password')}
  else console.log('not found');

  if (await page.$('#pass') !== null) {
    PASSWORD_SELECTOR = '#pass';
    console.log ('Changed password field selector to #pass')}
  else console.log('not found');

  if (await page.$('[type="password"]') !== null) {
    PASSWORD_SELECTOR = '[type="password"]' 
  console.log ('Changed button selector to type=password with quotes')}
  else console.log('not found');




  await page.click(PASSWORD_SELECTOR);
  await page.keyboard.type(C.password);

  await delay(2000);


  if (await page.$([type="submit"]) !== null) {
    CTA_SELECTOR = [type="submit"];
    console.log ('Changed button selector type=submit without quotes')}
  else console.log('not found');

   if (await page.$('[type="submit"]') !== null) {
    CTA_SELECTOR = '[type="submit"]' 
  console.log ('Changed button selector to type=submit with quotes')}
  else console.log('not found');

  if (await page.$('#loginbutton') !== null) {
    CTA_SELECTOR = '#loginbutton' 
    console.log ('Changed button selector to #loginbutton')}
  else console.log('not found');

  if (await page.$('#submit-btn') !== null) {
    CTA_SELECTOR = '#submit-btn' 
    console.log ('Changed button selector to #submit-btn')}
  else console.log('not found');

 




  await page.click(CTA_SELECTOR);
  await page.waitForNavigation();
  await page.screenshot({path: 'screenshot.png'});

  closeBrowser
}

function delay(time) {
  return new Promise(function(resolve) { 
      setTimeout(resolve, time)
  });
} 

您可以只使用頁面 $('a') 來獲取所有鏈接,或者只是在評估中使用基本的 javascript

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM