简体   繁体   中英

How to find the nth child heading and print the text using beautifulsoup in python

According to my code, I am able to get the First heading of Project and I want the subheading to be printed (FSI Details). Not able to get the second heading using beautifulsoup.I tried the reference for the nth-child

 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.keys import Keys
 import urllib.request
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.select import Select
 from selenium.webdriver.common.keys import Keys
 import time
 import pandas as pd
 import os
 url = 'https://maharerait.mahaonline.gov.in'
 chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

 driver = webdriver.Chrome(executable_path=chrome_path)
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
 Registered_Project_radio= WebDriverWait(driver, 
 Application = driver.find_element_by_id("CertiNo")
 Search = WebDriverWait(driver, 
 View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]
 View = View[0]
 request = urllib.request.Request(View)
 html = urllib.request.urlopen(request).read()
 soup = BeautifulSoup(html, 'html.parser')
 divPInfo2 = soup.find("div", {"id": "DivProject"})
 Project_title = divPInfo2.find("div", {'class': 'x_panel'}, 
    recursive=False).find("div", {'class': 'x_title'}).find(
 Project_title1 = divPInfo2.find("div", {'class': 'x_panel'}, 
     recursive=False).find("div", {'class': 'x_title'}).find_all(
 print(Project_title1 )  # (FSI Detail) heading should be printed here

You can try CSS selector :contains("FSI Details") , which selects element containing string "FSI Details". This code prints labels and values of the "FSI Details" section:

import requests
from bs4 import BeautifulSoup

url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'

soup = BeautifulSoup(requests.get(url).text, 'lxml')

fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')

print('{: <160}{: <8}'.format('Label', 'Value'))
print('-' * 168)
for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
    print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))


Label                                                                                                                                                           Value   
Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI)               0       
Built-up-Area as per Approved FSI (In sqmts)                                                                                                                    11566.50
TotalFSI                                                                                                                                                        11566.50

Further reading:

CSS Selectors Refernece

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

粤ICP备18138465号  © 2020-2024 STACKOOM.COM