[英]How can I scroll a web page using selenium webdriver in python?
[英]How can I infinite-scroll a web page using selenium webdriver in python
我是 selenium 的新手,我试图弄清楚如何无限滚动我几乎尝试了其他 stackoverflow 所说的所有内容
1.
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko")
driver = webdriver.Chrome('chromedriver', options=chrome_options)
driver.set_window_size(1320, 550)
exchange_link = "https://icodrops.com/ico-stats/"
driver.get(exchange_link)
wait = WebDriverWait(driver, 10)
SCROLL_PAUSE_TIME = 0.5
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
from selenium.webdriver.common.keys import Keys
Number = wait.until(EC.presence_of_element_located((By.XPATH,'html[1]/body[1]/div[1]/div[1]/div[1]/main[1]/div[1]/div[4]/div[2]/div[1]/div[1]/div[1]')))
lastElement = Number.find_elements(By.XPATH,'div')[-1]
lastElement.send_keys(Keys.NULL)
Number = wait.until(EC.presence_of_element_located((By.XPATH,'html[1]/body[1]/div[1]/div[1]/div[1]/main[1]/div[1]/div[4]/div[2]/div[1]/div[1]/div[1]')))
lastElement = Number.find_elements(By.XPATH,'div')[-1]
lastElement.location_once_scrolled_into_view
ETC
driver.execute_script("var scrollingElement = (document.scrollingElement || document.body);scrollingElement.scrollTop = scrollingElement.scrollHeight;")
driver.execute_script("document.getElementById('mydiv').scrollIntoView();")
idk 其他我能做的事情 我花了很多时间来修复它
您应该在execute_script
的帮助下逐个滚动每个 web 元素
代码:
driver = webdriver.Chrome(driver_path)
driver.maximize_window()
wait = WebDriverWait(driver, 30)
driver.get("https://icodrops.com/ico-stats/")
j = 1
while True:
ele = wait.until(EC.visibility_of_element_located((By.XPATH, f"(//div[@id='market-ico-stat-container']/div)[{j}]")))
driver.execute_script("arguments[0].scrollIntoView(true);", ele)
#scrape it here
进口:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
上面的代码永远不会中断并且会无限执行,为了克服这种行为,你应该引入这样的最大限制:
if j == 500:
break
但是,web 应用程序似乎检测到 selenium 脚本。
我能够通过下一个代码更改滚动它:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko")
#extra options
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
driver = webdriver.Chrome('chromedriver', options=chrome_options)
driver.set_window_size(1320, 550)
exchange_link = "https://icodrops.com/ico-stats/"
driver.get(exchange_link)
SCROLL_PAUSE_TIME = 5 #5 seconds
time.sleep(SCROLL_PAUSE_TIME)
# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")
for x in range(0, 10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
driver.find_element(BY.XPATH, "//body").send_keys(Keys.ARROW_UP)
time.sleep(SCROLL_PAUSE_TIME)
new_height = driver.execute_script("return document.body.scrollHeight")
print(x)
if new_height == last_height:
break
last_height = new_height
我已经用 Selenium 4(java 客户端)、Chrome 97、Windows 对此进行了测试。
我已将我的工作 java 代码转换为 python。
这段代码可能会得到改进和优化,但至少我希望它可以工作。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.