[英]How to pass scrapy data without any URL Request?
我正在使用 Python 3 和最新的 scrapy。 當我將一些 CSV 數據與另一個回調 function 一起傳遞時,沒有任何 URL。 它顯示TypeError: Request URL must be str or unicode, got NoneType:
我的問題是如何在沒有任何 URL 的情況下傳遞數據?
# -*- coding: utf-8 -*-
import scrapy
import csv
class AppsSpider(scrapy.Spider):
def start_requests(self):
with open('data.csv', mode='r', encoding='utf-8') as file:
rows = csv.reader(file)
for row in rows:
url = row[0]
title = row[1]
developer = row[2]
price = row[3]
desc = row[4]
rating = row[5]
items = {
'url': url,
'title': title,
'developer': developer,
'price': price,
'desc': desc,
'rating': rating
}
yield scrapy.Request(None, meta=items, dont_filter=True, callback=self.parse_it)
def parse_it(self, response):
url = response.meta['url']
title = response.meta['title']
developer = response.meta['developer']
price = response.meta['price']
desc = response.meta['desc']
rating = response.meta['rating']
if int(rating) > 4:
parse_items = {
'url': url,
'title': title,
'developer': developer,
'price': price,
'desc': desc,
'rating': rating
}
yield parse_items
您已經在這一行中傳遞了“無”作為第一個參數。
yield scrapy.Request(None, meta=items, dont_filter=True, callback=self.parse_it)
在那里傳遞你的“URL”!
我有一個類似的問題,並通過混合不同的解決方案來解決它。 您將需要一個自定義下載中間件,以防止請求進入下載器以及您擁有的內容。
# -*- coding: utf-8 -*-
import scrapy
import csv
import json
class AppsSpider(scrapy.Spider):
custom_settings = {
"DOWNLOADER_MIDDLEWARES": {"yourproject.middlewares.AppsSpiderDownloaderMiddleware": 1},
}
def start_requests(self):
with open('data.csv', mode='r', encoding='utf-8') as file:
rows = csv.reader(file)
for row in rows:
url = row[0]
title = row[1]
developer = row[2]
price = row[3]
desc = row[4]
rating = row[5]
items = {
'url': url,
'title': title,
'developer': developer,
'price': price,
'desc': desc,
'rating': rating
}
# need protocol in url else scrapy will error
yield scrapy.Request(url="http://", meta=items, dont_filter=True, callback=self.parse_it)
def parse_it(self, response):
data = json.loads(response.text)
url = data['url']
title = data['title']
developer = data['developer']
price = data['price']
desc = data['desc']
rating = data['rating']
if int(rating) > 4:
parse_items = {
'url': url,
'title': title,
'developer': developer,
'price': price,
'desc': desc,
'rating': rating
}
yield parse_items
AppsSpiderDownloaderMiddleware.py
import json
from scrapy import signals
from scrapy.http import TextResponse
class AppsSpiderDownloaderMiddleware:
@classmethod
def from_crawler(cls, crawler):
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def process_request(self, request, spider):
return TextResponse(
url=request.url,
status=200,
body=json.dumps(request.meta).encode('UTF-8'),
request=request
)
def process_response(self, request, response, spider):
return response
def process_exception(self, request, exception, spider):
pass
def spider_opened(self, spider):
spider.logger.info('Spider opened: %s' % spider.name)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.