this is html architecture
<a href="/main/list.nhn?mode=LS2D&mid=shm&sid1=100&sid2=264" class="snb_s11 nclicks(lmn_pol.mnl,,1)">BlueHouse <span class="blind">selected</span></a>
and then below is my source code to get only Blue House
middle_category = soup.find('a',{'class':'snb_s11 nclicks(lmn_pol.mnl,,1)'})
when i run that code to get only Blue House, it's gave me result with selected.
bellow is my full code
def crwaling_data_bluehouse(self):
# setting web driver to get object
chrome_driver = webdriver.Chrome('D:/바탕 화면/인턴/python/crawling_software/crwaler/news_crwaling/chromedriver.exe')
url = 'https://news.naver.com/main/list.nhn?mode=LS2D&mid=shm&sid1=100&sid2=264'
chrome_driver.get(url)
html = chrome_driver.page_source
soup = BeautifulSoup(html, 'html.parser')
#get main category
main_category = soup.find('a',{'class':'nclicks(LNB.pol)'}).find('span',{'class':'tx'}).get_text()
self.set_main_category(main_category)
#get middle category
middle_category = soup.find('a',{'class':'snb_s11 nclicks(lmn_pol.mnl,,1)'}).get_text()
middle_category = middle_category.find_next(text = True)
self.set_middle_category(middle_category)
#get title
title = soup.find('ul',{'class':'type06_headline'}).find('a')['href']
self.set_title(title)
You can use find_next()
which will only return the first match:
from bs4 import BeautifulSoup
txt = """<a href="/main/list.nhn?mode=LS2D&mid=shm&sid1=100&sid2=264" class="snb_s11 nclicks(lmn_pol.mnl,,1)">BlueHouse <span class="blind">selected</span></a>"""
soup = BeautifulSoup(txt, 'html.parser')
middle_category = soup.find('a', {'class': 'snb_s11 nclicks(lmn_pol.mnl,,1)'})
print(middle_category.find_next(text=True))
Output:
BlueHouse
Edit don't call get_text()
. Instead of
middle_category = soup.find('a',{'class':'snb_s11 nclicks(lmn_pol.mnl,,1)'}).get_text()
Use middle_category = soup.find('a', {'class': 'snb_s11 nclicks(lmn_pol.mnl,,1)'}).find_next(text=True)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.