from bs4 import BeautifulSoup
import numpy as np
import requests
from selenium import webdriver
from nltk.tokenize import sent_tokenize,word_tokenize
print('ansife')
# html = requests.get('https://www.opentable.com/new-york-restaurant-listings')
# driver = webdriver.Firefox(,executable_path=r'[Your path]\geckodriver.exe')
html = webdriver.Firefox(executable_path=r'D:\geckodriver.exe')
html.get("https://www.opentable.com/new-york-restaurant-listings")
counter = 0
lists = []
def parser_NYU(html):
global counter,lists
total_hotels_more_than_60_bookings = 0
soup = BeautifulSoup(html.page_source,'lxml')
for i, restroom in enumerate(soup.find_all('div',class_='rest-row-info')):
rest_name = restroom.find('span',class_='rest-row-name-text').text
booking = restroom.find('div',class_='booking') #.text
words = list(word_tokenize(str(booking.text)))
#same day
if int(words[1]) > 100:
print(booking.text)
lists.extend([booking.text])
print('listers',len(lists))
print('this works fine')
print('this works fine')
print('listers',len(lists))
print('unfortunately this not works,why?')
print('unfortunately this not works,why?')
parser_NYU(html)
As you can see my print statements are not working after the if loop. that is:
print('listers',len(lists))
print('this is not printing') # not printing
print('this is not printing')
what did i messed up here? what will be the main reason behind breaking the entire function after that if loop? please help me, and advance thanks!
If you mean to print the statements inside the if-condition (not if-loop) add the statements with proper indentation ie under the scope of if-condition.
for i, restroom in enumerate(soup.find_all('div',class_='rest-row-info')):
rest_name = restroom.find('span',class_='rest-row-name-text').text
booking = restroom.find('div',class_='booking') #.text
words = list(word_tokenize(str(booking.text)))
#same day
if int(words[1]) > 100:
#-----------Scope of if-block starts here--------#
print(booking.text)
lists.extend([booking.text])
print('listers',len(lists))
print('this is not printing') # not printing
print('this is not printing')
#-----------Scope of if-block ends here--------#
If you mean to print it inside the for-loop and not if-condition, place the print statements under the scope of for-loop
for i, restroom in enumerate(soup.find_all('div',class_='rest-row-info')):
#-------Scope of for-block starts here--------#
rest_name = restroom.find('span',class_='rest-row-name-text').text
booking = restroom.find('div',class_='booking') #.text
words = list(word_tokenize(str(booking.text)))
#same day
if int(words[1]) > 100:
print(booking.text)
lists.extend([booking.text])
print('listers',len(lists))
print('this is not printing') # not printing
print('this is not printing')
#-------Scope of for-block endshere--------#
The problem here is two fault:
booking.text
fails because bs4 does not find a div
with class=booking
, so it returns None
, which does not have the attribute .text
, so an excpeion is thorwn. Just check if find returned None
: rest_name = restroom.find('span',class_='rest-row-name-text').text
booking = restroom.find('div',class_='booking')
if booking is None:
continue
words = list(word_tokenize(str(booking.text)))
(best also do that for rest_name
)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.