[英]Scrapy request callback not working
這是我的代碼:
class AAA(scrapy.Spider):
name = 'aaa'
start_urls = [
'https://forum.lowyat.net/topic/377400/all'
]
COOKIES_ENABLED = False
count = 0
check = 0
item = AAAItem()
toDownload = []
def parse(self, response):
for sel in response.xpath('//*[@id="contentmiddle"]/div[3]/ol/li'):
self.item['name'] = sel.xpath('div/div/div[1]/p[1]/a/text()').extract()
self.item['date'] = sel.xpath('div/div/div[2]/p[4]/text()').extract()
lastUpdateDate = self.getLastUpdateDate()
date_object1 = self.convertToDate(self.item['date'][0]+"")
date_object2 = self.convertToDate(lastUpdateDate)
if date_object1 <= date_object2:
self.haha2(response)
self.stopSpider()
self.item['link'] = sel.xpath('div/div/div[4]/p[3]/a/@href').extract()
self.arrangeDownloadUrl()
yield self.item
def arrangeDownloadUrl(self):
try:
downloadUrl = "http://AAA.com"+self.item['link'][0]+""
self.toDownload.append(downloadUrl)
except IndexError:
print 'file not downloaded, link dead'
def haha2(self, response):
for i in range (len(self.toDownload)):
Request(self.toDownload[i], self.haha3)
def haha3(self, response):
print 'haha3.................................................................'
def stopSpider(self):
raise scrapy.exceptions.CloseSpider('done')
def getLastUpdateDate(self):
date = "Nov 5, 2001 - 1:06 PM"
return date
def convertToDate(self, value):
result = datetime.strptime(value, '%b %d, %Y - %I:%S %p')
return result
def convertToString(self, value):
result = value.strftime("%b %w, %Y - %I:%S %p")
return result
為了保護隱私,我更改了頁面的網址。 無論如何,問題在於haha2
函數中的請求未能請求回調haha3
...除非我用類似self.haha3(response)
這樣的self.haha3(response)
調用,否則它將不會進入haha3函數。失敗了目的,因為我想打開鏈接並做出響應,我要打開的鏈接...知道我哪里出錯了嗎?
嘗試
def haha2(self, response):
for i in range (len(self.toDownload)):
yield Request(self.toDownload[i], callback=self.haha3)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.