[英]How can I get puppeteer to explore the links on a page until it finds a login form?
我正在編寫一個腳本,它將動態查找並填寫給定 URL 的登錄表單。 只要登錄表單可以在給定 URL 的第一頁上找到,它就可以工作(在一些站點上)。
在找不到登錄表單的情況下,我希望腳本探索頁面上的每個鏈接,直到找到一個。 檢查每個鏈接,然后返回到最初的 URL。 我只需要 go 一層深,我認為不太可能找到比這更深的登錄表單,我不需要它來處理 100% 的 URL。
如果您現在想查看代碼:
const puppeteer = require('puppeteer');
const C = require('./constants');
var USERNAME_SELECTOR;
var PASSWORD_SELECTOR;
var CTA_SELECTOR;
const URLL = process.argv[2];
var usernameFieldWasFound = false;
async function startBrowser() {
const browser = await puppeteer.launch({
headless: false})
const page = await browser.newPage();
return {browser, page};
}
async function closeBrowser(browser) {
return browser.close();
}
(async () => {
await playTest(URLL);
process.exit(1);
})();
async function playTest(url) {
const {browser, page} = await startBrowser();
page.setViewport({width: 1366, height: 768});
await page.goto(url);
await delay(5000);
if (await page.$('#si_username') !== null) {
USERNAME_SELECTOR = '#si_username';
console.log ('Changed username field selector to #si_username')
usernameFieldWasFound = true; }
if (await page.$('[name="username"]') !== null) {
USERNAME_SELECTOR = '[name="username"]'
console.log ('Changed username field selector to name=username with quotes')
usernameFieldWasFound = true; }
if (await page.$('#username') !== null) {
USERNAME_SELECTOR = '#username';
console.log ('Changed username field selector to #username')
usernameFieldWasFound = true; }
if (await page.$('#email') !== null) {
USERNAME_SELECTOR = '#email';
console.log ('Changed username field selector to #email')
usernameFieldWasFound = true; }
if (usernameFieldWasFound == false) {
console.log('No username field was found, exploring links...')
// this is where I need to add code to explore each link and then go back to original URL
}
await page.waitForSelector(USERNAME_SELECTOR);
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(C.username);
if (await page.$('#next') !== null) {
CTA_SELECTOR = '#next';
console.log ('Changed button selector to #next')
await page.click(CTA_SELECTOR);}
else console.log('not found');
if (await page.$('#si_password') !== null) {
PASSWORD_SELECTOR = '#si_password';
console.log ('Changed password field selector to #si_password')}
else console.log('not found');
if (await page.$('#password') !== null) {
PASSWORD_SELECTOR = '#password';
console.log ('Changed password field selector to #password')}
else console.log('not found');
if (await page.$('#pass') !== null) {
PASSWORD_SELECTOR = '#pass';
console.log ('Changed password field selector to #pass')}
else console.log('not found');
if (await page.$('[type="password"]') !== null) {
PASSWORD_SELECTOR = '[type="password"]'
console.log ('Changed button selector to type=password with quotes')}
else console.log('not found');
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(C.password);
await delay(2000);
if (await page.$([type="submit"]) !== null) {
CTA_SELECTOR = [type="submit"];
console.log ('Changed button selector type=submit without quotes')}
else console.log('not found');
if (await page.$('[type="submit"]') !== null) {
CTA_SELECTOR = '[type="submit"]'
console.log ('Changed button selector to type=submit with quotes')}
else console.log('not found');
if (await page.$('#loginbutton') !== null) {
CTA_SELECTOR = '#loginbutton'
console.log ('Changed button selector to #loginbutton')}
else console.log('not found');
if (await page.$('#submit-btn') !== null) {
CTA_SELECTOR = '#submit-btn'
console.log ('Changed button selector to #submit-btn')}
else console.log('not found');
await page.click(CTA_SELECTOR);
await page.waitForNavigation();
await page.screenshot({path: 'screenshot.png'});
closeBrowser
}
function delay(time) {
return new Promise(function(resolve) {
setTimeout(resolve, time)
});
}
您可以只使用頁面 $('a') 來獲取所有鏈接,或者只是在評估中使用基本的 javascript
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.