[英]Playwright with python - Download file from CloudFlare
我正在尝试使用 PlayWright 在 Python 从 CloudFlare 下载此文件: https://www.historique-meteo.net/site/export.php?ville_id=1067
但不可能成功......你能帮忙吗?
注意:没有点击执行下载,等待5秒JS自动检测
这是我的代码:
from playwright.sync_api import sync_playwright
def run(playwright):
browser = playwright.chromium.launch(headless=False)
context = browser.new_context(accept_downloads=True)
# Open new page
page = context.new_page()
# Go to site
page.goto("https://www.historique-meteo.net/site/export.php?ville_id=1067")
# Download
page.on("download", lambda download: download.save_as(download.suggested_filename))
context.close()
browser.close()
with sync_playwright() as playwright:
run(playwright)
非常感谢: :)
不保证成功。 function async_cf_retry
趋于失败时更改参数。 我通过修改我的重模块来编写这段代码,所以它可能无法优雅地工作。
import re
import asyncio
from playwright.async_api import async_playwright, Error, Page
from cf_clearance import stealth_async
import httpx
# local_client = httpx.AsyncClient(verify=False)
async def async_cf_retry(page: Page, tries=10) -> bool:
# use tries=-1 for infinite retries
# excerpted from `from cf_clearance import async_retry`
success = False
while tries != 0:
try:
title = await page.title()
except Error:
tries -= 1
await asyncio.sleep(1)
else:
# print(title)
if title == 'Please Wait... | Cloudflare':
await page.close()
raise NotImplementedError('Encountered recaptcha. Check whether your proxy is an elite proxy.')
elif title == 'Just a moment...':
tries -= 1
await asyncio.sleep(5)
elif "www." in title:
await page.reload()
tries -= 1
await asyncio.sleep(5)
else:
success = True
break
return success
async def get_client_with_clearance(proxy: str = None):
async def get_one_clearance(proxy=proxy, logs=False):
# proxy = {"server": "socks5://localhost:7890"}
if type(proxy) is str:
proxy = {'server': proxy}
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False, proxy=proxy,
args=["--window-position=1000,800", "--disable-web-security",
"--disable-webgl"])
page = await browser.new_page()#viewport={"width": 0, "height": 0})
await stealth_async(page)
# Download
async def on_download(download):
print('download', download)
await download.save_as(download.suggested_filename)
page.on("download", on_download)
if logs:
def log_response(intercepted_response):
print("a response was received:", intercepted_response.url)
page.on("response", log_response)
await page.goto("https://www.historique-meteo.net/site/export.php?ville_id=1067")
res = await async_cf_retry(page)
if res:
cookies = await page.context.cookies()
cookies_for_httpx = {cookie['name']: cookie['value'] for cookie in cookies}
ua = await page.evaluate('() => {return navigator.userAgent}')
# print(ua)
else:
await page.close()
raise InterruptedError("cf challenge fail")
await asyncio.sleep(10000)
return ua, cookies_for_httpx
ua, cookies_for_httpx = await get_one_clearance(logs=True)
print(cookies_for_httpx)
print(asyncio.get_event_loop().run_until_complete(get_client_with_clearance(
# proxy='http://localhost:8888'
# use proxifier on windows as an elite proxy
)))
如果download <Download url='https://www.historique-meteo.net/site/export.php?ville_id=1067' suggested_filename='export-aix-les-bains.csv'>
显示在控制台中,则浏览器必须正在下载您的文件。
即使文件已下载,此代码也可能会继续尝试获得 cloudflare 许可。 如果发生这种情况,请在async_cf_retry
中编辑您的策略。
使用剧作家提供的persisent context
来保存和重复使用您的 cloudflare 许可。 考虑使用httpx
下载。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.