I'm not able to login with the script. Go to https://www.amscan.com , look for "Shop Now" and then "Log In". I'm using both FormRequest and Request but the error coming is HTTP status code is not handled or not allowed. Ignoring Response [400] Below is my code and let me know if anyone helps me.
import scrapy
from scrapy.shell import inspect_response
import json
import re
class Amscan(scrapy.Spider):
name = 'amscan'
start_urls = ['https://www.amscan.com/']
def parse(self, response):
url = 'https://b2b.amscan.com/auth/login'
payload="{'login':{'mode':'email','username':'info@soaringfinancial.com','password':'PassSoaring$123'},'redirect':'#!/'}"
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
'content-type': 'application/json;charset=UTF-8',
'authority': 'b2b.amscan.com',
'sec-ch-ua': '"Google Chrome";v="87", " Not;A Brand";v="99", "Chromium";v="87"',
'accept': 'application/json, text/plain, */*',
'sec-ch-ua-mobile': '?0',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'accept-language': 'en-US,en;q=0.9',
}
formdata ={
'login': {
'mode': "email",
'password': my_password,
'username': my_username,
},
'redirect': "#!/",
}
yield scrapy.FormRequest(url, formdata=formdata, headers=headers, method='POST', callback=self.login)
def login(self, response):
inspect_response(response, self)
error is...
2021-01-09 16:33:26 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amscan.com/> (referer: None)
2021-01-09 16:33:27 [scrapy.core.engine] DEBUG: Crawled (400) <POST https://b2b.amscan.com/auth/login> (referer: https://www.amscan.com/)
2021-01-09 16:33:27 [scrapy.spidermiddlewares.httperror] INFO: Ignoring response <400 https://b2b.amscan.com/auth/login>: HTTP status code is not handled or not allowed
2021-01-09 16:33:27 [scrapy.core.engine] INFO: Closing spider (finished)
2021-01-09 16:33:27 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 920,
'downloader/request_count': 2,
'downloader/request_method_count/GET': 1,
'downloader/request_method_count/POST': 1,
'downloader/response_bytes': 12975,
'downloader/response_count': 2,
'downloader/response_status_count/200': 1,
'downloader/response_status_count/400': 1,
'elapsed_time_seconds': 4.394109,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2021, 1, 9, 11, 3, 27, 909495),
'httperror/response_ignored_count': 1,
'httperror/response_ignored_status_count/400': 1,
'log_count/DEBUG': 2,
'log_count/INFO': 11,
'request_depth_max': 1,
'response_received_count': 2,
'scheduler/dequeued': 2,
'scheduler/dequeued/memory': 2,
'scheduler/enqueued': 2,
'scheduler/enqueued/memory': 2,
'start_time': datetime.datetime(2021, 1, 9, 11, 3, 23, 515386)}
here's some curl2scrapy results for your request Try it in scrapy shell. Looks like you've missed csrf stuff and got 404'd for it.
from scrapy import Request
url = 'https://b2b.amscan.com/auth/login'
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0",
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.5",
"Content-Type": "application/json;charset=utf-8",
"X-XSRF-TOKEN": "qLEN9AAM-XFCR1rAsQQ0L5lHglyxw_pNpWYg",
"Origin": "https://b2b.amscan.com",
"Connection": "keep-alive",
"Referer": "https://b2b.amscan.com/",
"TE": "Trailers"
}
cookies = {
"_csrf": "NApyTe0Bsv6WcTyxKs_qcF8F",
"XSRF-TOKEN": "qLEN9AAM-XFCR1rAsQQ0L5lHglyxw_pNpWYg",
"client_sid": "s%3AIkCcqv7GeKTLHwc9NaV2eWFV8EqEJXg0.jbE9Hfl0uobLScUfJk8muPteqxSEK0smQsZx2wkFn6Y"
}
body = '{"login":{"mode":"email","username":"ololo@trololo.com","password":"ololo"},"redirect":"#!/"}'
request = Request(
url=url,
method='POST',
dont_filter=True,
cookies=cookies,
headers=headers,
body=body,
)
fetch(request)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.