[英]Python - Why is this data being written to file incorrectly?
只有第一个结果被写入 csv,每行一个 url 字母。 这不是写入所有 url,而是每行一个。
在这段代码的最后一部分中,我做错了什么导致 cvs 只写入一个结果而不是所有结果?
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings():
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/2/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/3/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/4/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/5/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/6/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/7/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/8/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/9/")
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class":"wlt_search_results"})
for elem in l_area.findAll("a", {"class":"frame"}):
return elem["href"]
l = grab_listings()
with open ("gyms.csv", "wb") as file:
writer = csv.writer(file)
for row in l:
writer.writerow(row)
所以我重构了你的代码,我认为它应该像你现在期望的那样工作:
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings(page_idx):
ret = []
url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/"
"page/{}/").format(page_idx) # the index of the page will be inserted here
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class": "wlt_search_results"})
for elem in l_area.findAll("a", {"class": "frame"}):
# be sure to add all your results to a list and return it,
# if you return here then you will only get the first result
ret.append(elem["href"])
return ret
def main():
l = [] # this will be a list of lists
# call the function 9 times here with idx from 1 till 9
for page_idx in range(1, 10):
l.append(grab_listings(page_idx))
print l
with open("gyms.csv", "wb") as f:
writer = csv.writer(f)
for row in l:
# be sure that your row is a list here, if it is only
# a string all characters will be seperated by a comma.
writer.writerow(row)
# for writing each URL in one line separated by commas at the end
# with open("gyms.csv", "wb") as f:
# for row in l:
# string_to_write = ',\n'.join(row)
# f.write(string_to_write)
if __name__ == '__main__':
main()
我在代码中添加了一些注释,希望它足够解释。 如果不只是问:)
简化:
import requests
from bs4 import BeautifulSoup
import csv
def grab_listings():
for i in range(0, 5):
url = "http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/{}/"
r = requests.get(url.format(i + 1))
soup = BeautifulSoup(r.text, 'html.parser')
l_area = soup.find("div", {"class": "wlt_search_results"})
for elem in l_area.findAll("a", {"class": "frame"}):
yield elem["href"]
l = grab_listings()
with open("gyms.csv", "w") as file:
writer = csv.writer(file)
for row in l:
writer.writerow(row)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.