简体   繁体   中英

Python, Selenium and Chromedriver

I'm trying to test out chromedriver and headless chrome on this site. https://car.gocompare.com/vehicle

However when i try with normal chrome it works fine, I'll get a response for a car reg I've put in.

When I use headless chrome it says car cannot be found.

Does anyone know what could be up with it, is it the driver, or the website that is not producing the results back, it seems to work with firefox, so its a little strange.

        from selenium import webdriver
        from selenium.webdriver.common.by import By
        from selenium.webdriver.common.keys import Keys
        from selenium.webdriver.common.action_chains import ActionChains
        from selenium.webdriver.support.ui import Select
        from selenium.common.exceptions import NoSuchElementException

        # Ability to run headless
        from selenium.webdriver.firefox.options import Options as f_Options
        from selenium.webdriver.chrome.options import Options as c_Options
        from selenium.webdriver.firefox.firefox_binary import FirefoxBinary

        # This allows you to download the page
        from parsel import Selector


        import time
        import datetime
        import os



        class headlessbypass:

            my_date_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')

            def my_set_up(self):
                """Executed before running, i.e. opening browser"""

                # This is required for running on the pipeline
                headless = os.getenv('HEADLESS_MODE')

                def firefox_headless_func():
                    self.options = f_Options()
                    self.options.headless = True
                    binary = FirefoxBinary('c:/Users/Anish/AppData/Local/Mozilla Firefox/firefox.exe')
                    self.driver = webdriver.Firefox(firefox_binary=binary, executable_path='bin/geckodriver.exe', options=self.options)#, options=self.options, executable_path='bin/geckodriver.exe')

                def chrome_headless_func():
                    self.options = c_Options()
                    #self.options.headless = True
                    self.options.add_argument("--window-size=1920, 1080")
                    #self.options.add_argument("--disable-extensions")
                    #self.options.add_argument("--proxy-server='direct://'")
                    #self.options.add_argument("--proxy-bypass-list=*")
                    #self.options.add_argument("--start-maximized")
                    self.options.add_argument('--headless')
                    self.options.add_argument('--disable-gpu')
                    #self.options.add_argument('--disable-dev-shm-usage')
                    #self.options.add_argument('--no-sandbox')
                    #self.options.add_argument('--ignore-certificate-errors')
                    #self.options.add_argument("--allow-insecure-localhost")
                    #self.options.add_argument("--allow-running-insecure-content")
                    #self.options.add_argument('--disable-browser-side-navigation')
                    self.options.add_argument("--enable-javascript")
                    self.options.add_argument("--user-agent='Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0'")
                    #self.options.binary_location = "C:/Program Files (x86)/Google/Chrome/Application/chrome.exe"
                    self.driver = webdriver.Chrome(options=self.options, executable_path='bin/chromedriver')




                # This is for running locally; select/toggle what you want to run
                headless_firefox = 0
                headless_chrome = 0
                chrome = 1
                safari = 0

                if headless:
                    firefox_headless_func()
                else:
                    if headless_firefox:
                        firefox_headless_func()

                    elif headless_chrome:
                        chrome_headless_func()

                    elif chrome:
                        self.driver = webdriver.Chrome(executable_path='bin/chromedriver.exe')

                    else:
                        self.driver = webdriver.Firefox(executable_path='bin/geckodriver.exe')

                self.driver.implicitly_wait(30)
                self.driver.maximize_window()

                main_window = self.driver.current_window_handle
                self.driver.switch_to.window(main_window)

            def my_tear_down(self):
                """Executed after running, i.e. closing browser"""
                self.driver.quit()

            def my_decorator(func):
                """my_set_up and my_tear_down decorator, so that my_set_up is run before and my_tear_down is run after"""
                def wrapper(self, *args, **kwargs):
                    self.my_set_up()
                    func(self, *args, **kwargs)
                    self.my_tear_down()
                return wrapper

            @my_decorator
            def visit_site(self):
                """Extract quotes"""
                self.driver.get("https://mygocompare.gocompare.com/newcustomer/")

                time.sleep(2)
                print(self.driver.page_source)

                # Enter registration number

                reg_field = self.driver.find_element(By.XPATH, "//fieldset[1]/div[2]/div[2]/div/input")
                reg_field.send_keys("AK47")
                time.sleep(5)
                print("Take screenshot")
                html = self.driver.find_element_by_tag_name('html')
                html.send_keys(Keys.PAGE_UP)
                self.driver.save_screenshot("csv_json_files/firstpagescreenshot.png")
                self.driver.find_element(By.XPATH, "//span[contains(text(), 'Find car')]").click()
                time.sleep(2)

                print("Take screenshot")
                html = self.driver.find_element_by_tag_name('html')
                html.send_keys(Keys.PAGE_UP)
                self.driver.save_screenshot("csv_json_files/firstpagescreenshot2.png")


        if __name__ == '__main__':
            start_time = time.time()
            scrape = headlessbypass()
            scrape.visit_site()

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM