繁体   English   中英

如何在云 function 环境中将 webdriver_manager 的路径更改为自定义路径

[英]How to change path to webdriver_manager to custom path in the cloud function environment

我正在尝试在云 function 上创建一个无头 web 刮板。我已经使用 Selenium 来自动化 Webdriver 管理器提供的驱动程序。

你能告诉我如何根据虚拟环境更改 wdm.cachePath 吗? 下面是我的代码和我得到的错误。

import os
import logging
# selenium 4
os.environ['GH_TOKEN'] = "gkjkjhjkhjhkjhuihjhgjhg"
os.environ['WDM_LOG'] = str(logging.NOTSET)
os.environ['WDM_LOCAL'] = '1'
os.environ['WDM_SSL_VERIFY'] = '0'

from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

def hello_world(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
    """
    # instance of Options class allows
    # us to configure Headless Chrome
    options = Options()
    print("options")
    options.headless = True
    driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\\temp\\Drivers").install()
), options=options)

    print("driver was initiated")
    
      
    # this parameter tells Chrome that
    # it should be run without UI (Headless)
   
      
    # initializing webdriver for Chrome with our options
    # driver = webdriver.Chrome(options=options)
    
    # driver = webdriver.Chrome(ChromeDriverManager(path = r"/temp/data").install())      
    
    request_json = request.get_json()
    
    if request_json and 'url' in request_json:
        url = request_json['url']
        driver.get('https://www.geeksforgeeks.org')
        print(driver.title)
        driver.close()
        return f'Success!'
    else:
        return f'Not run'
    

错误日志 -

Traceback (most recent call last): File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 2525, in wsgi_app response = self.full_dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1822, in full_dispatch_request rv = self.handle_user_exception(e) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1820, in full_dispatch_request rv = self.dispatch_request() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/flask/app.py", line 1796, in dispatch_request return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/functions_framework/__init__.py", line 98, in view_func return function(request._get_current_object()) File "/workspace/main.py", line 28, in hello_world driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager("2.26", cache_valid_range=1, path = r".\\temp\\Drivers").install() File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/chrome.py", line 39, in install driver_path = self._get_driver_path(self.driver) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/manager.py", line 31, in _get_driver_path binary_path = self.driver_cache.save_file_to_cache(driver, file) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/driver_cache.py", line 45, in save_file_to_cache archive = save_file(file, path) File "/layers/google.python.pip/pip/lib/python3.10/site-packages/webdriver_manager/core/utils.py", line 38, in save_file os.makedirs(directory, exist_ok=True) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok) File "/layers/google.python.runtime/python/lib/python3.10/os.py", line 215, in makedirs makedirs(head, exist_ok=exist_ok)

我认为错误是由于 web 驱动程序管理器试图将驱动程序保存到缓存是一些 static 路径引起的,我已经使用更改了路径设置

path = r".\\temp\\Drivers"

如何正确地做到这一点?

所以我想通了...

import os
import logging
# selenium 4

os.environ['WDM_LOG'] = str(logging.NOTSET)

from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from selenium.webdriver import Chrome

def hello_world(request):
    """Responds to any HTTP request.
    Args:
        request (flask.Request): HTTP request object.
    Returns:
        The response text or any set of values that can be turned into a
        Response object using
        `make_response <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>`.
    """
    # instance of Options class allows
    # us to configure Headless Chrome
  
    print("driver was initiated")
    
      
    # this parameter tells Chrome that
    # it should be run without UI (Headless)
    opts = Options()
    opts.add_experimental_option("detach", True)
    opts.headless= True
      
    # initializing webdriver for Chrome with our options
    driver =  webdriver.Chrome(service= ChromeService(ChromeDriverManager(cache_valid_range=1).install() ), options = opts)
   
    # chrome_driver_path = ChromeDriverManager().install()
    
    request_json = request.get_json()
    
    if request_json and 'url' in request_json:
        # driver = webdriver.Chrome(service= chrome_driver_path, options = opts)
        url = request_json['url']
        driver.get(url)
        driver.get(url)

        # driver.find_element(By.XPATH,'//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[1]/div[1]/div[2]/div/div[1]/div[2]').click()
        #to make sure content is fully loaded we can use time.sleep() after navigating to each page

        import time
        time.sleep(3)

        #Find the total number of reviews
        # total_number_of_reviews = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[9]').text.splitlines()[3]
        # total_number_of_reviews = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[9]').text
        # time.sleep(3)
        # print(total_number_of_reviews)
        # Find scroll layout
        scrollable_div = driver.find_element('xpath','//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]')
        # time.sleep(3)
        #Scroll as many times as necessary to load all reviews
        total_reviews = int(driver.find_element('xpath', '//*[@id="QA0Szd"]/div/div/div[1]/div[2]/div/div[1]/div/div/div[2]/div[2]/div/div[2]/div[2]').text.split(' ')[0].replace(',',''))
        time.sleep(3)
        print(total_reviews)

        for i in range(0, min(total_reviews, 500) ):
                driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight',scrollable_div)
                time.sleep(1.5)



        response = BeautifulSoup(driver.page_source, 'html.parser')
        reviews = response.find_all('span', class_='wiI7pd')

        restaurant__reviews = []
        for review in reviews:
            restaurant__reviews.append(review.text)
        print(restaurant__reviews)
        driver.close()
        return f'Success!'
    else:
        driver.close()
        return f'Not run'

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM