繁体   English   中英

Python - 为什么这些数据被错误地写入文件?

[英]Python - Why is this data being written to file incorrectly?

只有第一个结果被写入 csv,每行一个 url 字母。 这不是写入所有 url,而是每行一个。

在这段代码的最后一部分中,我做错了什么导致 cvs 只写入一个结果而不是所有结果?

import requests
from bs4 import BeautifulSoup
import csv

def grab_listings():
    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/2/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/3/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/4/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/5/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/6/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/7/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/8/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/9/")
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class":"wlt_search_results"})
    for elem in l_area.findAll("a", {"class":"frame"}):
        return elem["href"]

l = grab_listings()


with open ("gyms.csv", "wb") as file:
        writer = csv.writer(file)
        for row in l:
            writer.writerow(row)

所以我重构了你的代码,我认为它应该像你现在期望的那样工作:

import requests
from bs4 import BeautifulSoup
import csv


def grab_listings(page_idx):
    ret = []
    url = ("http://www.gym-directory.com/listing-category/gyms-fitness-centres/"
           "page/{}/").format(page_idx) # the index of the page will be inserted here
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    l_area = soup.find("div", {"class": "wlt_search_results"})
    for elem in l_area.findAll("a", {"class": "frame"}):
        # be sure to add all your results to a list and return it,
        # if you return here then you will only get the first result
        ret.append(elem["href"])
    return ret


def main():
    l = [] # this will be a list of lists
    # call the function 9 times here with idx from 1 till 9
    for page_idx in range(1, 10):
        l.append(grab_listings(page_idx))
    print l

    with open("gyms.csv", "wb") as f:
        writer = csv.writer(f)
        for row in l:
            # be sure that your row is a list here, if it is only
            # a string all characters will be seperated by a comma.
            writer.writerow(row)

# for writing each URL in one line separated by commas at the end 
#    with open("gyms.csv", "wb") as f:
#        for row in l:
#            string_to_write = ',\n'.join(row)
#            f.write(string_to_write)

if __name__ == '__main__':
    main()

我在代码中添加了一些注释,希望它足够解释。 如果不只是问:)

简化:

import requests
from bs4 import BeautifulSoup
import csv


def grab_listings():
    for i in range(0, 5):
        url = "http://www.gym-directory.com/listing-category/gyms-fitness-centres/page/{}/"

        r = requests.get(url.format(i + 1))
        soup = BeautifulSoup(r.text, 'html.parser')
        l_area = soup.find("div", {"class": "wlt_search_results"})

        for elem in l_area.findAll("a", {"class": "frame"}):
            yield elem["href"]

l = grab_listings()


with open("gyms.csv", "w") as file:
    writer = csv.writer(file)
    for row in l:
        writer.writerow(row)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM