简体   繁体   中英

if statement in scrapy yield

How can I add an if statement in the yield{} section see that part for Fight_Url below at the bottom of the code

basically if the result part says next I need it to just not look for the url

 import pandas as pd import scrapy urls_csv = pd.read_csv('fighters.csv') urls_list = urls_csv.url.to_list() class FightersDetailsSpider(scrapy.Spider): name = "fight_urls" def start_requests(self): links = [] for link in urls_list: links.append(link) for url in links: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response, **kwargs): for fight_url in response.xpath('//*[@class="b-fight-details__table-body"]//tr')[1:]: yield { 'Url': response.request.url, 'Result': fight_url.xpath('.//td[1]/p/a/i/i/text()').extract_first().strip(), 'First_Name': fight_url.xpath('.//td[2]/p[1]/a/text()').extract_first().strip().split()[0], 'Last_Name': fight_url.xpath('.//td[2]/p[1]/a/text()').extract_first().strip().split()[1], 'Opponent_First': fight_url.xpath('.//td[2]/p[2]/a/text()').extract_first().strip().split()[0], 'Opponent_Last': fight_url.xpath('.//td[2]/p[2]/a/text()').extract_first().strip().split()[1], 'Opponent_URL': fight_url.xpath('.//td[2]/p[2]/a/@href').extract_first(), 'Fight_Url': if fight_url.xpath('.//td[1]/p/a/i/i/text()').extract_first().strip() == "next": None else: fight_url.xpath('.//@href').extract_first(), }

Just need to modify the if-else syntax slightly:

        for fight_url in response.xpath('//*[@class="b-fight-details__table-body"]//tr')[1:]:
            yield {
                'Url': response.request.url,
                'Result': fight_url.xpath('.//td[1]/p/a/i/i/text()').extract_first().strip(),
                'First_Name': fight_url.xpath('.//td[2]/p[1]/a/text()').extract_first().strip().split()[0],
                'Last_Name': fight_url.xpath('.//td[2]/p[1]/a/text()').extract_first().strip().split()[1],
                'Opponent_First': fight_url.xpath('.//td[2]/p[2]/a/text()').extract_first().strip().split()[0],
                'Opponent_Last': fight_url.xpath('.//td[2]/p[2]/a/text()').extract_first().strip().split()[1],
                'Opponent_URL': fight_url.xpath('.//td[2]/p[2]/a/@href').extract_first(),
                'Fight_Url':
                        None if fight_url.xpath('.//td[1]/p/a/i/i/text()').extract_first().strip() == "next"
                        else fight_url.xpath('.//@href').extract_first(),
            }

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM