[英]Not writing to CSV after parsing with Python/BeautifulSoup
我感到自己犯了一个愚蠢的错误。 我正在尝试从网站上获取一些数据,我可以解析数据,但不会将其写入csv。 我是python的一个完整的初学者,对此我挠头。
到目前为止,这是我的代码:
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r):
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname = resultname.text
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress = resultAddress.text.replace('Get directions','').strip()
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact = resultContact.text
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
csvdata = parse_content(response)
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])
cname
, caddress
, ccontact
的值是在函数parse_content
上下文中设置的,因此在csv writer的for
循环中不可用。
您将需要返回这些值或将parse_content
方法中的csv parse_content
移动。
您需要返回值
import requests
import csv
from bs4 import BeautifulSoup
base_url = "http://www.example.com/"
total_pages = 100
def parse_content(r): # you are not returning anything from this function, I will change this function so it will return required values.
cname=[]
caddress=[]
ccontact=[]
soup = BeautifulSoup(r.content,'lxml')
g_data = soup.find_all('ul', {"class": "searchResults"})
for item in g_data:
for li in item.findAll('li'):
for resultnameh2 in li.findAll('h2'):
for resultname in resultnameh2.findAll('a'):
cname.append(resultname.text) # As it is list you need to append all these values
for resultAddress in li.findAll('p', {"class": "resultAddress"}):
caddress.append(resultAddress.text.replace('Get directions','').strip())
for resultContact in li.findAll('ul', {"class": "resultContact"}):
for resultContact in li.findAll('a', {"class": "resultMainNumber"}):
ccontact.append(resultContact.text)
return cname,caddress,ccontact
for page in range(1, total_pages):
response = requests.get(base_url + '/' + str(page))
if response.status_code != 200:
break
parse_content(response)
cname,caddress,ccontact = parse_content(response)
print(cname)
print(caddress)
print(ccontact)
#print whatever you like from above lists
with open('index.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow([csvdata])
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.