I'm learning how to use Scrapy
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for brickset in response.xpath('//div[@class="product-name"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}
I run this spider with command: scrapy crawl test_spider -o test.csv
This is working fine for //div[@class="product-name"
, but I don't know how to add another CSS/XPath class in the same spider file
I'm trying this but it does't work
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for test in response.xpath('//div[@class="product-name"]'):
yield {
'name': test.xpath('h1/text()').extract_first(),
}
def parse(self, response):
for attempt in response.xpath('//div[@class="another-class"]'):
yield {
'color': attempt.xpath('h1/a/text()').extract_first(),
}
Please help me to do this.
def parse(self, response):
product_name_lst = []
# we will append all data to product_name_lst
for test in response.xpath('//div[@class="product-name"]'):
product_name_lst.append('name': test.xpath('h1/text()').extract_first())
another_product_name_lst = []
# we will append all data to another_product_name_lst
for test in response.xpath('//div[@class="another-product-name"]'):
another_product_name_lst.append('name': test.xpath('h1/text()').extract_first())
# after that write to out.csv all the data you need from
# product_name_lst and another_prodct_name_lst lists
out_file = open('out.csv', 'a') # a meen append to file not rewrite file
# and here you need to write in out.csv file
out.write(data) # data is what you need to write
# and close the file
out.close()
Just use two for
loops:
import scrapy
class TestSetSpider(scrapy.Spider):
name = "test_spider"
start_urls = ['https://example.html']
def parse(self, response):
for brickset in response.xpath('//div[@class="product-name"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}
for brickset in response.xpath('//div[@class="another-class"]'):
yield {
'name': brickset.xpath('h1/text()').extract_first(),
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.