![](/img/trans.png)
[英]Retrieving url from google image search for first entry, using python and selenium
[英]I want to get first 10 images url from google search using Selenium Python
我想從谷歌搜索(不是 base64)中獲取前 10 個圖像 url。 我有代碼:
import os
import base64
import time
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
searchterm = 'bananas' # will also be the name of the folder
url = "https://www.google.com/search?q=banan&source=lnms&tbm=isch&sa=X&ved=2ahUKEwj-75rDlJLoAhWLHHcKHStFC6EQ_AUoAXoECA4QAw&biw=1867&bih=951"
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
browser = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=options)
browser.get(url)
actions = webdriver.common.action_chains.ActionChains(browser)
header = {
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"}
counter = 0
succounter = 0
if not os.path.exists(searchterm):
os.mkdir(searchterm)
for i in range(0, 11):
time.sleep(1)
x = browser.find_elements_by_xpath('//*[@id="islrg"]/descendant::img')[i]
x.click()
i += 1
if i > 10:
break
ba = browser.find_element_by_xpath('//*
[@id="Sva75c"]/div/div/div[3]/div[2]/div/div[1]/div[1]/div/div[2]/a/img')
print(ba.get_attribute('src'))
它返回圖像 url,但有時返回 base64。 如何使腳本始終返回圖像 url? 謝謝你。
更改 xpath 以獲取鏈接而不是圖像,然后獲取 href。
ba = browser.find_element_by_xpath("//div[@class='islrc']//a[@href][@rel='noopener']")
print(ba.get_attribute("href")
如果您使用以下代碼抓取另一個搜索引擎DuckDuckGo,您始終只能獲得圖像 URL:
search_query = 'what you want to find'
num_images = 10
driver_location = '/put/location/of/your/driver/here'
# setting up the driver
ser = Service(driver_location)
op = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=ser, options=op)
# searching the query
driver.get(f'https://duckduckgo.com/?q={search_query}&kl=us-en&ia=web')
# going to Images Section
ba = driver.find_element(By.XPATH, "//a[@class='zcm__link js-zci-link js-zci-link--images']")
ba.click()
# getting the images URLs
for result in driver.find_elements(By.CSS_SELECTOR, '.js-images-link')[0:0+num_images]:
imageURL = result.get_attribute('data-id')
print(f'{imageURL}\n')
driver.quit()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.