如何在Python中使用beautifulsoup查找第n個孩子的標題並打印文本

Question

根據我的代碼，我能夠獲得Project的第一個標題，並且我希望打印該子標題（FSI詳細信息）。 無法使用beautifulsoup獲得第二個標題。我嘗試了第n個孩子的參考

 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.common.keys import Keys
 import urllib.request
 from bs4 import BeautifulSoup
 from selenium import webdriver
 from selenium.webdriver.support.select import Select
 from selenium.webdriver.common.keys import Keys
 import time
 import pandas as pd
 import os
 url = 'https://maharerait.mahaonline.gov.in'
 chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

 driver = webdriver.Chrome(executable_path=chrome_path)
 driver.get(url)
 WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
 Registered_Project_radio= WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
 driver.execute_script("arguments[0].click();",Registered_Project_radio)
 Application = driver.find_element_by_id("CertiNo")
 Application.send_keys("P50500000005")
 Search = WebDriverWait(driver, 
      10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
 driver.execute_script("arguments[0].click();",Search)
 View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]
 View = View[0]
 driver.get(View)
 request = urllib.request.Request(View)
 html = urllib.request.urlopen(request).read()
 soup = BeautifulSoup(html, 'html.parser')
 divPInfo2 = soup.find("div", {"id": "DivProject"})
 Project_title = divPInfo2.find("div", {'class': 'x_panel'}, 
    recursive=False).find("div", {'class': 'x_title'}).find(
   "h2").text.strip()
 print(Project_title)
 Project_title1 = divPInfo2.find("div", {'class': 'x_panel'}, 
     recursive=False).find("div", {'class': 'x_title'}).find_all(
     "h2")[1].text.strip()
 print(Project_title1 )  # (FSI Detail) heading should be printed here

Answer 1

您可以嘗試CSS選擇器:contains("FSI Details") ，它選擇包含字符串“ FSI Details”的元素。 此代碼打印“ FSI詳細信息”部分的標簽和值：

import requests
from bs4 import BeautifulSoup

url = 'https://maharerait.mahaonline.gov.in/PrintPreview/PrintPreview?q=BPUvrrjIzYs%2f2hwYj1YIOfflh9NisZW6zTns2KLjHBZn6cbQ008s91nzlFrDxVvLwR1vAeLID0%2bo%2bD0H0Z6o2t%2b5P%2b%2fbBOcHCbMQHU8gkwdNZJnbbfu6N7mWSpgKXt4AiQyzuEpoDE7FX6HZypqsGXz4ObYD4KpyRzCsFJaWTgA%3d'

soup = BeautifulSoup(requests.get(url).text, 'lxml')

fsi_content = soup.select_one('.x_title:contains("FSI Details") + .x_content')

print('{: <160}{: <8}'.format('Label', 'Value'))
print('-' * 168)
for label, text in zip(fsi_content.select('label'), fsi_content.select('div:has(> label) + div')):
    print('{: <160}{: <8}'.format(label.get_text(strip=True), text.get_text(strip=True)))

印刷品：

Label                                                                                                                                                           Value   
------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Built-up-Area as per Proposed FSI (In sqmts) ( Proposed but not sanctioned) ( As soon as approved, should be immediately updated in Approved FSI)               0       
Built-up-Area as per Approved FSI (In sqmts)                                                                                                                    11566.50
TotalFSI                                                                                                                                                        11566.50

進一步閱讀：

CSS選擇器參考

如何在Python中使用beautifulsoup查找第n個孩子的標題並打印文本

問題描述

1 個解決方案

解決方案1
0 已采納 2019-07-16 07:51:19

如何在Python中使用beautifulsoup查找第n個孩子的標題並打印文本

問題描述

1 個解決方案

解決方案1 0 已采納 2019-07-16 07:51:19

解決方案1
0 已采納 2019-07-16 07:51:19