简体   繁体   中英

Error HTTP status code is not handled or not allowed

I am trying to get the data from json but they give me error that HTTP status code is not handled or not allowed is there anysolution how to handle these error in scrapy what is the reason these error will occur is that many request occur that why they show these error this is the page link https://www.nationalhardwareshow.com/en-us/attend/exhibitor-list.html

import scrapy
from scrapy import FormRequest
from scrapy.crawler import CrawlerProcess
from scrapy.http import Request


class TestSpider(scrapy.Spider):
    name = 'test'
    url="https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query"
    headers = {
    'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
    'Connection': 'keep-alive',
    'Origin': 'https://www.nationalhardwareshow.com',
    'Referer': 'https://www.nationalhardwareshow.com/',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'cross-site',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'accept': 'application/json',
    'content-type': 'application/x-www-form-urlencoded',
    'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    }

    params = {
        'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
        'x-algolia-application-id': 'XD0U5M6Y4R',
        'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47',
        'params':'query=&page=0&facetFilters=&optionalFilters=%5B%5D',
    }
    
    def start_requests(self):
        yield scrapy.FormRequest(
            url=self.url,
            method='POST',
            headers=self.headers,
            formdata=self.params,
            callback=self.parse,
        )
        
    def parse(self,response):
        print(response.json())  
    import scrapy
    from scrapy import FormRequest
    from scrapy.crawler import CrawlerProcess
    from scrapy.http import Request
    
    
    class TestSpider(scrapy.Spider):
        name = 'test'
        url="https://xd0u5m6y4r-dsn.algolia.net/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query"
        headers = {
        'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,pt;q=0.7',
        'Connection': 'keep-alive',
        'Origin': 'https://www.nationalhardwareshow.com',
        'Referer': 'https://www.nationalhardwareshow.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'cross-site',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
        'accept': 'application/json',
        'content-type': 'application/x-www-form-urlencoded',
        'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        }
    
        params = {
            'x-algolia-agent': 'Algolia for vanilla JavaScript 3.27.1',
            'x-algolia-application-id': 'XD0U5M6Y4R',
            'x-algolia-api-key': 'd5cd7d4ec26134ff4a34d736a7f9ad47',
            'params':'query=&page=0&facetFilters=&optionalFilters=%5B%5D',
        }
        
        def start_requests(self):
            yield scrapy.FormRequest(
                url=self.url,
                method='POST',
                headers=self.headers,
                formdata=self.params,
                callback=self.parse,
            )
            
        def parse(self,response):
            print(response.json())

You are getting HTTP status code is not handled or not allowed because of headers and param's extravagant.

import scrapy
import json
from scrapy.crawler import CrawlerProcess
class TestSpider(scrapy.Spider):
    name = 'test'

    custom_settings = {
        'CONCURRENT_REQUESTS_PER_DOMAIN': 1,
        'DOWNLOAD_DELAY': 1
        }

    def start_requests(self):
        data={"params":"query=&page=0&facetFilters=&optionalFilters=%5B%5D"}
        headers= {
            'Content-Type': 'application/x-www-form-urlencoded',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36'

        }
        api_url='https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47'
        yield scrapy.Request(
            url= api_url,
            method='POST',
            headers=headers,
            body=json.dumps(data),
            callback=self.parse
            )
       
    def parse(self, response):
        resp = json.loads(response.body)
        for item in resp['hits']:
            yield {
                'Title':item['companyName']
            } 
       
if __name__ == "__main__":
    process = CrawlerProcess(TestSpider)
    process.crawl()
    process.start()

Output:

{'Title': 'Bug Bite Thing'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'BULA'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Bunnik Creations'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'McCordick Glove & Safety Inc'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Burro Creative Solutions'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Bytech/Case Logic USA'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Cable Lasso'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Caframo Ltd'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'California Air Tools'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Calloway Mills/Home and More'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Camp Chef'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Canadian Spa Company'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'CAPS-LOCK'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Carson LLC'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Cascade Holdings'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Catania Oils'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'CCH Products'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'CedarCraft'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': "Central Garden & Pet/Pennington/Howard Johnson's Enterprises"}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Centrex Plastic LLC./American Plastics'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Chaby International'}
2022-07-10 04:59:07 [scrapy.core.scraper] DEBUG: Scraped from <200 https://xd0u5m6y4r-2.algolianet.com/1/indexes/event-edition-eve-e6b1ae25-5b9f-457b-83b3-335667332366_en-us/query?x-algolia-agent=Algolia%20for%20vanilla%20JavaScript%203.27.1&x-algolia-application-id=XD0U5M6Y4R&x-algolia-api-key=d5cd7d4ec26134ff4a34d736a7f9ad47>
{'Title': 'Changzhou Feiwang Tool Co.,Ltd.'}
2022-07-10 04:59:07 [scrapy.core.engine] INFO: Closing spider (finished)
2022-07-10 04:59:07 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 643,
 'downloader/request_count': 1,
 'downloader/request_method_count/POST': 1,
 'downloader/response_bytes': 117197,
 'downloader/response_count': 1,
 'downloader/response_status_count/200': 1,
 'elapsed_time_seconds': 3.180524,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2022, 7, 9, 22, 59, 7, 202813),
 'httpcompression/response_bytes': 765918,
 'httpcompression/response_count': 1,
 'item_scraped_count': 100,

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM