简体   繁体   中英

How to export data automatically to CSV with Scrapy Feed

I am trying to automatically generate a CSV every time I execute Scrapy crawl (myspider). I have tried to use Scrapy feeds, but I am getting the following error:

error.ReactorNotRestartable() twisted.internet.error.ReactorNotRestartable

This is my code:

import scrapy
import json
from ..items import inmobiliarias
from scrapy import Spider
from scrapy.selector import Selector


class IdealistaSpider(scrapy.Spider):
    name = 'idealista'
    custom_setting = {'FEEDS': {'/Users/aleja/Documentos OneDrive/items.csv':{'format': 'csv'}}}

    def start_requests(self):
        url = 'https://www.idealista.com/areas/venta-viviendas/pagina-{page}?shape=%28%28osyuFrkkUqPo%60%40rEaGxm%40UA%7ERRnInApEgUlBkG%7DCiGtE%29%29'
        headers = {
    "authority": "www.idealista.com",
    "cache-control": "max-age=0",
    "sec-ch-ua": "\"Chromium\";v=\"94\", \"Google Chrome\";v=\"94\", \";Not A Brand\";v=\"99\"",
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": "\"Windows\"",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "sec-fetch-site": "none",
    "sec-fetch-mode": "navigate",
    "sec-fetch-user": "?1",
    "sec-fetch-dest": "document",
    "accept-language": "es-ES,es;q=0.9"
}
        for page in range(1,5):
            yield scrapy.Request(url = url.format(page = page), headers = headers, callback = self.parse_json)

    def parse_json(self, response):
        #from scrapy.shell import inspect_response
        #inspect_response(response, self)
        sel = Selector(text= response.text)
        item = sel.xpath('//div[@class="item-info-container"]').extract()
        items = inmobiliarias()
        for element in item:
            sel = Selector(text = element)
            items['title'] = sel.xpath('//a[@role="heading"]/@title').extract()
            items['price'] = sel.xpath('//div/span[@class="item-price h2-simulated"]/text()').extract()
            items['phone'] = sel.xpath('//span[@class="icon-phone item-not-clickable-phone"]/text()').extract()
            items['url'] = 'https://www.idealista.com' + sel.xpath('//a[@role="heading"]/@href')[0].extract()
            items['meters'] = sel.xpath('//span[small/text()="m²"]/text()').extract()
            items['rooms'] = sel.xpath('//span[small/text()="hab."]/text()').extract()
            items['real_state'] = sel.xpath('//picture[@class="logo-branding"]/a//@alt').extract()
            items['garage'] = sel.xpath('//div/span[@class="item-parking"]/text()').extract()
            last_item_detail = sel.xpath('//span[@class="item-detail"]/text()')[-1].extract()
            if last_item_detail != items['meters'] and last_item_detail != items['rooms']:
                items['floor'] = last_item_detail
            else:
                items['floor'] = ''
            yield items

You have incorrectly defined custom_settings as custom_setting ie without the s . A sample run below shows no errors and the items.csv file is created.

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM