I have a feeling I am making a stupid mistake. I am trying to srape some data from a website and I can parse the data but it doesn't write to csv. I am a complete beginner to python and I am scratching my head over this.
Here is my code so far:
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r):
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname = resultname.text
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress = resultAddress.text.replace('Get directions','').strip()
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact = resultContact.text
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
csvdata = parse_content(response)
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])
The values for cname
, caddress
, ccontact
have been set in context of the function parse_content
hence are not available in the for
loop for csv writer.
You will either need to return these values or move the csv writer in parse_content
method.
you need to return values
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r): # you are not returning anything from this function, I will change this function so it will return required values.
cname=[]
caddress=[]
ccontact=[]
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname.append(resultname.text) # As it is list you need to append all these values
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress.append(resultAddress.text.replace('Get directions','').strip())
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact.append(resultContact.text)
return cname,caddress,ccontact
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
cname,caddress,ccontact = parse_content(response)
print(cname)
print(caddress)
print(ccontact)
#print whatever you like from above lists
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.