I'm trying to have Puppeteer visit multiple urls in an array, but it does not seem to be working. It hangs forever.
//create array of urls
let urlList = [];
//add urls to array
for (let i = 0; i < pageNumberToNumber; i++) {
urlList.push(
`https://www.realtor.ca/map#ZoomLevel=14&Center=43.771556%2C-79.427630&LatitudeMax=43.78395&LongitudeMax=-79.38710&LatitudeMin=43.75916&LongitudeMin=-79.46816&view=list&CurrentPage=${i}&Sort=6-D&PGeoIds=g10_dpz90869&GeoName=Willowdale%2C%20Toronto%2C%20ON&PropertyTypeGroupID=1&PropertySearchTypeId=1&TransactionTypeId=2&PriceMax=1500000&BuildingTypeId=1&Currency=CAD`
);
}
console.log(urlList);
console.log(urlList.length);
for (let i = 0; i < urlList.length; i++) {
const url = urlList[i];
await page.goto(`${url}`);
await page.waitForNavigation({ waitUntil: "networkidle2" });
}
const puppeteer = require("puppeteer-extra");
// Enable stealth plugin with all evasions
puppeteer.use(require("puppeteer-extra-plugin-stealth")());
(async () => {
const browser = await puppeteer.launch({
headless: false,
});
const page = await browser.newPage();
let url =
"https://www.realtor.ca/map#ZoomLevel=14&Center=43.771556%2C-79.427630&LatitudeMax=43.78395&LongitudeMax=-79.38710&LatitudeMin=43.75916&LongitudeMin=-79.46816&view=list&CurrentPage=1&Sort=6-D&PGeoIds=g10_dpz90869&GeoName=Willowdale%2C%20Toronto%2C%20ON&PropertyTypeGroupID=1&PropertySearchTypeId=1&TransactionTypeId=2&PriceMax=1500000&BuildingTypeId=1&Currency=CAD";
await page.goto(url);
await page.waitFor(10000);
//page.evaluate lets you use JavaScript as it was being used in the developer console on the page
const result = await page.evaluate(() => {
let data = []; // Create an empty array that will store our data
let elements = document.querySelectorAll(".cardCon"); // Select all Products
for (var element of elements) {
// Loop through each product
let price =
element.children[0].children[1].childNodes[1].children[1].childNodes[1]
.childNodes[3].innerText; // Select the price
let address =
element.children[0].children[1].childNodes[1].children[1].childNodes[1]
.childNodes[5].innerText; // Select the address
let details = element.children[0].children[1].childNodes[1].children[1].childNodes[3].innerText.replace(
/\n/g,
" "
); // Select the details
let url = element.children[0].childNodes[3].href; // Select the address
data.push({ price, details, address, url }); // Push an object with the data onto our array
}
return data; // Return our data array
});
console.log(result);
//get total page number
let totalPageNumber = await page.$eval(
"#ListViewPagination_Bottom > div > div > div > span.paginationTotalPagesNum",
(element) => element.innerText
);
console.log(totalPageNumber);
//convert total page number from string to number
let pageNumberToNumber = parseInt(totalPageNumber);
//create array of urls
let urlList = [];
//add urls to array
for (let i = 0; i < pageNumberToNumber; i++) {
urlList.push(
`https://www.realtor.ca/map#ZoomLevel=14&Center=43.771556%2C-79.427630&LatitudeMax=43.78395&LongitudeMax=-79.38710&LatitudeMin=43.75916&LongitudeMin=-79.46816&view=list&CurrentPage=${i}&Sort=6-D&PGeoIds=g10_dpz90869&GeoName=Willowdale%2C%20Toronto%2C%20ON&PropertyTypeGroupID=1&PropertySearchTypeId=1&TransactionTypeId=2&PriceMax=1500000&BuildingTypeId=1&Currency=CAD`
);
}
console.log(urlList);
console.log(urlList.length);
for (let i = 0; i < urlList.length; i++) {
const url = urlList[i];
await page.goto(`${url}`);
await page.waitForNavigation({ waitUntil: "networkidle2" });
}
})();
You will already have navigated when your goto
is done, so your await page.waitForNavigation({ waitUntil: "networkidle2" });
never detects a navigation event.
In your case you can add an options argument in your goto
call to decide on your waitUntil
strategy.
Replace this
await page.goto(`${url}`);
await page.waitForNavigation({ waitUntil: "networkidle2" });
With this
await page.goto(`${url}`, { waitUntil: "networkidle2" });
Minimal example: Note that I make a new page for each URL to avoid the issues you are having with goto
with this particular website.
const puppeteer = require("puppeteer-extra");
// Enable stealth plugin with all evasions
puppeteer.use(require("puppeteer-extra-plugin-stealth")());
(async function () {
const browser = await puppeteer.launch({ headless: false });
const page = await browser.newPage();
//create array of urls
let urlList = [];
const pageNumberToNumber = 3; // Inserted by me
console.log(urlList);
//add urls to array
for (let i = 1; i <= pageNumberToNumber; i++) {
console.log(urlList);
urlList.push(
`https://www.realtor.ca/map#ZoomLevel=14&Center=43.771556%2C-79.427630&LatitudeMax=43.78395&LongitudeMax=-79.38710&LatitudeMin=43.75916&LongitudeMin=-79.46816&view=list&CurrentPage=${i}&Sort=6-D&PGeoIds=g10_dpz90869&GeoName=Willowdale%2C%20Toronto%2C%20ON&PropertyTypeGroupID=1&PropertySearchTypeId=1&TransactionTypeId=2&PriceMax=1500000&BuildingTypeId=1&Currency=CAD`
);
}
console.log(urlList);
console.log(urlList.length);
for (let i = 0; i < urlList.length; i++) {
// Creating new page to load each url.
const tempPage = await browser.newPage();
const url = urlList[i];
await tempPage.goto(`${url}`, { waitUntil: "networkidle2" });
// 5s wait to manually confirm page changed
await tempPage.waitFor(5000);
await tempPage.close();
}
await browser.close();
})();
Alternatively you can wrap both calls (and invoke waitForNavigation
first) in a Promise.all
.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.