I have scraped the results from duckduckgo.com and stored results in title, links, description links and description gets printed but title doesn't get printed
I have already printed title with print(title) it gives output
class DuckduckgoScraper(web_scraping):
def scrape(self,search_Term):
self.filename = search_Term
self.url = 'https://duckduckgo.com/html?q='+search_Term
r = requests.get(self.url,headers=USER_AGENT)
soup = BeautifulSoup(r.content,'html5lib')
result_block = soup.find_all(class_ = 'result__body')
for result in result_block:
link = result.find('a', attrs={'class':'result__a'}, href=True)
title = result.find('h2')
description = result.find(attrs={'class':'result__snippet'})
if link and title:
link = link['href']
title = title.get_text()
if description:
description = description.get_text()
with open(self.filename+'.csv', 'a', encoding='utf-8',newline='') as csv_file:
file_is_empty = os.stat(self.filename+'.csv').st_size==0
fieldname = ['title','link','description']
writer = csv.DictWriter(csv_file,fieldnames=fieldname)
if file_is_empty:
writer.writeheader()
writer.writerow({'title':title,'link':link,'description':description})
It doesn't gives any errors
You are opening and writing ng to csv file each row iteration. Instead of that, store the rows in list and write them at once at the end with .writerows()
function.
Note: It's useful to do .strip()
on each item of the row, otherwise Excel/LibreOffice/... might get confused when opening the file.
import os
import csv
import requests
from bs4 import BeautifulSoup
USER_AGENT = {'User-Agent':'Mozilla/5.0'}
def scrape(search_Term):
filename = search_Term
url = 'https://duckduckgo.com/html?q='+search_Term
r = requests.get(url,headers=USER_AGENT)
soup = BeautifulSoup(r.content,'html5lib')
result_block = soup.find_all(class_ = 'result__body')
for result in result_block:
link = result.find('a', attrs={'class':'result__a'}, href=True)
title = result.find('h2')
description = result.find(attrs={'class':'result__snippet'})
rows = []
if link and title:
link = link['href']
title = title.get_text()
if description:
description = description.get_text()
rows.append({'title':title.strip(), 'link':link.strip(), 'description':description.strip()})
# print(title.strip(), link.strip())
# print(description.strip())
# print('*'* 80)
with open(filename+'.csv', 'a', encoding='utf-8',newline='') as csv_file:
file_is_empty = os.stat(filename+'.csv').st_size==0
fieldname = ['title','link','description']
writer = csv.DictWriter(csv_file,fieldnames=fieldname)
if file_is_empty:
writer.writeheader()
writer.writerows(rows)
scrape('tree')
This creates tree.csv
. In LibreOffice it looks like this:
You can make a post http requests with appropriate payload to get the required content and writem them in a csv file. I used python as a search keyword and this is what it produces:
import csv
import requests
from bs4 import BeautifulSoup
URL = "https://duckduckgo.com/html/"
payload = {
'q': 'python',
'b': '',
'kl': 'us-en'
}
r = requests.post(URL,data=payload,headers={"User-Agent":"Mozilla/5.0"})
soup = BeautifulSoup(r.text,"lxml")
with open("output.csv","w",newline="",encoding="UTF-8") as infile:
writer = csv.writer(infile)
for item in soup.select(".result__body"):
title = item.select_one(".result__a").text
link = item.select_one(".result__a").get("href")
desc = item.select_one(".result__snippet").text
desc_link = item.select_one(".result__snippet").get("href")
print(f'{title}\n{link}\n{desc}\n{desc_link}\n')
writer.writerow([title,link,desc,desc_link])
Results are like:
Welcome to Python.org
https://www.python.org/
The official home of the Python Programming Language. Compound Data Types. Lists (known as arrays in other languages) are one of the compound data types that Python understands.
https://www.python.org/
Python (programming language) - Wikipedia
https://en.wikipedia.org/wiki/Python_%28programming_language%29
Python is an interpreted, high-level, general-purpose programming language.Created by Guido van Rossum and first released in 1991, Python's design philosophy emphasizes code readability with its notable use of significant whitespace.Its language constructs and object-oriented approach aim to help programmers write clear, logical code for small and large-scale projects.
https://en.wikipedia.org/wiki/Python_%28programming_language%29
Python Tutorial - w3schools.com
https://www.w3schools.com/python/
Python is a programming language. Python can be used on a server to create web applications. Start learning Python now »
https://www.w3schools.com/python/
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.