I have this Python code for scraping the website TripAdvisor and until the last year (18 days ago) the code works awesome, but now the code doesn't work properly and I get this result (shown above)
I have bee trying with changes likes this container = driver.find_elements_by_xpath("//div[@class='data-test-target']")
But don't work. Also, I noted that now the website doesn't have the element taLnk ulBlueLinks
or the element review-container
.
Please, if you can help me with the code it will fantastic.
PD: Also, I was trying with beautiful soup but doesn't work the code anyway.
import csv
import time
from selenium import webdriver
import datetime
from selenium.common.exceptions import NoSuchElementException
#Common
now = datetime.datetime.now()
driver = webdriver.Chrome('chromedriver.exe')
italia = "https://www.tripadvisor.it/Attraction_Review-g657290-d2213040-Reviews-Ex_Stabilimento_Florio_delle_Tonnare_di_Favignana_e_Formica-Isola_di_Favig.html"
driver.get(italia)
place = 'Ex_Stabilimento_Florio_delle_Tonnare_di_Favignana'
lang = 'it'
def check_exists_by_xpath(xpath):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
for i in range(0, 2):
try:
if (check_exists_by_xpath("//span[@class='taLnk ulBlueLinks']")):
driver.find_element_by_xpath("//span[@class='taLnk ulBlueLinks']").click()
time.sleep(5)
container = driver.find_elements_by_xpath("//div[@class='review-container']")
num_page_items = len(container)
for j in range(num_page_items):
csvFile = open(r'Italia_en.csv', 'a')
csvWriter = csv.writer(csvFile)
time.sleep(10)
rating_a = container[j].find_element_by_xpath(
".//span[contains(@class, 'ui_bubble_rating bubble_')]").get_attribute("class")
rating_b = rating_a.split("_")
rating = rating_b[3]
review = container[j].find_element_by_xpath(".//p[@class='partial_entry']").text.replace("\n", "")
title = container[j].find_element_by_class_name('quote').find_element_by_tag_name(
'a').find_element_by_class_name('noQuotes').text
print(review)
rating_date = container[j].find_element_by_class_name('ratingDate').get_attribute('title')
print(rating, review, title, "--", sep='\n')
link_list = []
for link in container[j].find_elements_by_tag_name('a'):
link_previous = (link.get_attribute('href'))
link_list.append(link_previous)
print(link_list[1], "--", sep='\n')
csvWriter.writerow([place, rating, title, review, rating_date, link_list[1], now, lang])
driver.find_element_by_xpath('//a[@class="nav next taLnk ui_button primary"]').click()
time.sleep(5)
except:
driver.find_element_by_xpath('//a[@class="nav next taLnk ui_button primary"]').click()
time.sleep(5)
And the result is
Traceback (most recent call last):
File "gh_code2.py", line 63, in <module>
driver.find_element_by_xpath('//a[@class="nav next taLnk ui_button primary"]').click()
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 394, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath)
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 978, in find_element
'value': value})['value']
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="nav next taLnk ui_button primary"]"}
(Session info: chrome=79.0.3945.117)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "gh_code2.py", line 69, in <module>
driver.find_element_by_xpath('//a[@class="nav next taLnk ui_button primary"]').click()
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 394, in find_element_by_xpath
return self.find_element(by=By.XPATH, value=xpath)
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 978, in find_element
'value': value})['value']
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 321, in execute
self.error_handler.check_response(response)
File ".\Programs\Python\Python37\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 242, in check_response
raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":"//a[@class="nav next taLnk ui_button primary"]"}
(Session info: chrome=79.0.3945.117)
Unfortunately in my experience with scraping sites, if the site is updated and those xpath references are gone, you must update your script. In this case, it appears that
class="nav next taLnk ui_button primary"
Is no longer a viable selector. If this is a common reference, with similar structure over time, I would try and use iterations rather than an exact class name. ie nav button[0] or something of the like (dont have the exact syntax in front of me).
Otherwise, check out FireFox's selenium IDE browser. It can help you find other ways to reference items while clicking through the site.
Hope that is some help!
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.