from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from bs4 import BeautifulSoup
import time
url = "https://www.bungol.ca/"
driver = webdriver.Firefox(executable_path ='/usr/local/bin/geckodriver')
driver.get(url)
#Select toronto by default
driver.find_element_by_xpath("""/html/body/section/div[2]/div/div[1]/form/div/select/optgroup[1]/option[1]""").click()
time.sleep(1)
driver.find_element_by_xpath("""/html/body/section/div[2]/div/div[1]/form/div/button""").click()
driver.find_element_by_xpath("""/html/body/nav/div[1]/ul[1]/li[3]/select/option[8]""").click()
#select last 2 years
driver.find_element_by_xpath("""//*[@id="activeListings"]""").click()
#opening sold listing in that area
driver.find_element_by_xpath("""/html/body/div[5]/i""").click() #closes property type slide
driver.find_element_by_xpath("""//*[@id="navbarDropdown"]""").click()
driver.find_element_by_xpath("""//*[@id="listViewToggle"]""").click()
def data_collector():
hidden_next = driver.find_element_by_class_name("nextPaginate")
#inputs in textbox
inputElement = driver.find_element_by_id('navbarSearchAddressInput')
inputElement.send_keys('M3B2B6')
time.sleep(1)
#inputElement.send_keys(Keys.ENTER)
row_count = 3
table = driver.find_elements_by_css_selector("""#listViewTableBody""")
while hidden_next.is_displayed(): #while there is a next page button to be pressed
time.sleep(3) #delay for table refresh
#row_count = len(driver.find_elements_by_css_selector("""html body#body div#listView.table-responsive table#listViewTable.table.table-hover.mb-0 tbody#listViewTableBody tr.mb-2"""))
for row in range(row_count): #loop through the rows found
#alternate row by changing the tr index
driver.find_element_by_xpath("""/html/body/div[8]/table/tbody/tr[""" + str(row + 1) + """]/td[1]""").click()
time.sleep(2)
print(driver.find_element_by_css_selector("""#listingStatus""").text) #sold price
#closes the pop up after getting the data
driver.find_element_by_css_selector('.modal-xl > div:nth-child(1) > div:nth-child(1) > button:nth-child(1)').click()
time.sleep(1)
#clicks next page button for the table
driver.find_element_by_xpath("""//*[@id="listViewNextPaginate"]""").click()
if __name__ == "__main__":
data_collector()
The code loops through all the rows in the first table (currently set to 3 for testing), clicks on each row - pop-up shows up, grabs the information and close the pop-up. But when it clicks to the next page, it doesn't click on any of the rows of the second page. It doesn't show an error for not finding the row xpath either. But instead shows error for the pop-window close button because the popup did not open due to not pressing on the row to display pop-up window.
How do I make it click the rows when the table flips to the next page?
for table reference:
https://www.bungol.ca/map/location/toronto/ ?
close the property slider on the left
click tool -> open list
In my browser I also can't open the pop up, when I click on the row in the second page. So I think this can be the fault of the website.
If You want check if the element exists, You can use this code:
def check_exists_by_xpath(xpath, driver):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
Try this. My understanding is your script goes through the listings, opens a listing, grabs the listings status, close the listing and does the same for all the listings.
If my understanding is correct, the below code may help you. Its better to change implicit and time.sleep() to explicit wait and clean up the functions.
Having said that, I did not fully test the code, but the code did navigate to more than one page of listings and collected data
from selenium.webdriver import Firefox
from selenium.webdriver.support.select import Select
import time
driver = Firefox(executable_path=r'path to geckodriver.exe')
driver.get('https://www.bungol.ca/')
driver.maximize_window()
driver.implicitly_wait(10)
# Select toronto by default
driver.find_element_by_css_selector('#locationChoice button[type="submit"]').click()
sold_in_the_last = Select(driver.find_element_by_id('soldInTheLast'))
sold_in_the_last.select_by_visible_text('2 Years')
driver.find_element_by_id('activeListings').click()
# opening sold listing in that area
driver.find_element_by_css_selector('#leftSidebarClose>i').click()
driver.find_element_by_id('navbarDropdown').click()
driver.find_element_by_id('listViewToggle').click()
def get_listings():
listings_table = driver.find_element_by_id('listViewTableBody')
listings_table_rows = listings_table.find_elements_by_tag_name('tr')
return listings_table_rows
def get_sold_price(listing):
listing.find_element_by_css_selector('td:nth-child(1)').click()
time.sleep(2)
sold_price = driver.find_element_by_id('listingStatus').text
time.sleep(2)
close = driver.find_elements_by_css_selector('.modal-content>.modal-body>button[class="close"]')
close[2].click()
time.sleep(2)
return sold_price
def data_collector():
data = []
time.sleep(2)
next = driver.find_element_by_id('listViewNextPaginate')
# get all the listing prior to the last page
while next.is_displayed():
listings = get_listings()
for listing in listings:
data.append(get_sold_price(listing))
next.click()
# get listings from last page
listings = get_listings()
for listing in listings:
data.append(get_sold_price(listing))
return data
if __name__ == '__main__':
from pprint import pprint
data = data_collector()
pprint(data)
print(len(data))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.