According to my code, I am able to get the First heading of Project and I want the subheading to be printed (FSI Details). Not able to get the second heading using beautifulsoup.I tried the reference for the nth-child
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.keys import Keys
import time
import pandas as pd
import os
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver,
20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search-
pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")
Search = WebDriverWait(driver,
10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in
driver.find_elements_by_tag_name("a") if
item.get_attribute('href') is not None]
View = View[0]
driver.get(View)
request = urllib.request.Request(View)
html = urllib.request.urlopen(request).read()
soup = BeautifulSoup(html, 'html.parser')
divPInfo2 = soup.find("div", {"id": "DivProject"})
Project_title = divPInfo2.find("div", {'class': 'x_panel'},
recursive=False).find("div", {'class': 'x_title'}).find(
"h2").text.strip()
print(Project_title)
Project_title1 = divPInfo2.find("div", {'class': 'x_panel'},
recursive=False).find("div", {'class': 'x_title'}).find_all(
"h2")[1].text.strip()
print(Project_title1 ) # (FSI Detail) heading should be printed here
You can try CSS selector :contains("FSI Details")
, which selects element containing string "FSI Details". This code prints labels and values of the "FSI Details" section:
import requests
from bs4 import BeautifulSoup
url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'
soup = BeautifulSoup(requests.get(url).text, 'lxml')
fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')
print('{: <160}{: <8}'.format('Label', 'Value'))
print('-' * 168)
for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))
Prints:
Label Value
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI) 0
Built-up-Area as per Approved FSI (In sqmts) 11566.50
TotalFSI 11566.50
Further reading:
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.