[英]scrapy crawl only returns saves one result to csv file
我有这个刮scrap的蜘蛛
# -*- coding: utf-8 -*-
import scrapy
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from items import QuestionItem
class FirstSpider(scrapy.Spider):
name = 'first'
allowed_domains = ['stackoverflow.com']
start_urls = ['https://stackoverflow.com/questions']
def parse(self, response):
selector_list = response.css('.question-summary')
for selector in selector_list:
item = QuestionItem()
item['question'] = selector.css('h3 a::text').extract()
item['votes'] = selector.css('.vote-count-post strong::text').extract()
item['answers'] = selector.css('.status strong::text').extract()
item['views'] = selector.css('.views ::text').extract()[0].replace('\n','').replace('\r','').lstrip()
item['username'] = selector.css('.user-details a::text').extract()
item['userlink'] = selector.css('.user-details a::attr(href)').extract()
return item
此代码也位于items.py中
import scrapy
class QuestionItem(scrapy.Item):
question = scrapy.Field()
votes = scrapy.Field()
answers = scrapy.Field()
views = scrapy.Field(serializer=str)
username = scrapy.Field()
userlink = scrapy.Field()
它应该从stackoverflow的默认问题页面开始,并使用css标记获取所有问题。 但是,使用此命令时,它仅将一行保存到csv。 scrapy crawl first --output file.csv
执行该函数终止时,您的parse
方法中具有return
语句。
您应该使用yield
而不是return
,并且也要在for循环的范围内使用。
class FirstSpider(scrapy.Spider):
name = 'first'
allowed_domains = ['stackoverflow.com']
start_urls = ['https://stackoverflow.com/questions']
def parse(self, response):
selector_list = response.css('.question-summary')
for selector in selector_list:
item = QuestionItem()
item['question'] = selector.css('h3 a::text').extract()
item['votes'] = selector.css('.vote-count-post strong::text').extract()
item['answers'] = selector.css('.status strong::text').extract()
item['views'] = selector.css('.views ::text').extract()[0].replace('\n','').replace('\r','').lstrip()
item['username'] = selector.css('.user-details a::text').extract()
item['userlink'] = selector.css('.user-details a::attr(href)').extract()
yield item
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.