简体   繁体   中英

Plus instead of a space in the url

I'm getting a string url request, I want to handle it and send the parameters, because it is very long. I get plus in req.url instead of a space like query .

query = "https://www.superjob.ru/resume/search_resume.html?sbmit=1&detail_search=1&keywords%5B0%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B0%5D%5Bskwc%5D=or&keywords%5B0%5D%5Bsrws%5D=60&keywords%5B1%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B1%5D%5Bskwc%5D=or&keywords%5B1%5D%5Bsrws%5D=7&keywords%5B2%5D%5Bkeys%5D=%D0%AE%D0%BB%D0%BC%D0%B0%D1%80%D1%82&keywords%5B2%5D%5Bskwc%5D=nein&keywords%5B2%5D%5Bsrws%5D=50&keywords%5B3%5D%5Bkeys%5D=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&keywords%5B3%5D%5Bskwc%5D=nein&keywords%5B3%5D%5Bsrws%5D=60&exclude_words=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&period=7&place_of_work=0&t%5B%5D=4&m%5B%5D=194&m%5B%5D=195&m%5B%5D=193&m%5B%5D=192&m%5B%5D=191&m%5B%5D=164&m%5B%5D=162&m%5B%5D=163&m%5B%5D=150&m%5B%5D=534&m%5B%5D=540&m%5B%5D=149&m%5B%5D=148&m%5B%5D=147&m%5B%5D=146&m%5B%5D=535&m%5B%5D=536&m%5B%5D=159&m%5B%5D=157&m%5B%5D=158&m%5B%5D=156&m%5B%5D=155&m%5B%5D=154&m%5B%5D=161&m%5B%5D=20&m%5B%5D=21&m%5B%5D=22&m%5B%5D=24&m%5B%5D=23&m%5B%5D=25&m%5B%5D=28&m%5B%5D=573&m%5B%5D=29&m%5B%5D=27&m%5B%5D=26&m%5B%5D=145&m%5B%5D=144&m%5B%5D=143&m%5B%5D=142&m%5B%5D=141&m%5B%5D=30&m%5B%5D=31&m%5B%5D=115&m%5B%5D=153&m%5B%5D=72&m%5B%5D=84&m%5B%5D=152&m%5B%5D=83&m%5B%5D=82&m%5B%5D=81&m%5B%5D=79&m%5B%5D=80&m%5B%5D=542&m%5B%5D=151&m%5B%5D=46&m%5B%5D=85&paymentfrom=20000&paymentto=35000&type_of_work=0&citizenship%5B0%5D=1&old1=18&old2=40&maritalstatus=0&pol=0&children=0&education=0&eduform=0&id_institute=0&institution=&languages%5B0%5D%5Blanguage_id%5D=0&languages%5B0%5D%5Blanguage_level%5D=0&business_trip=0"
def getparams(url):
    params = {}
    urlarr = url.split("&")
    i = 0
    while i < len(urlarr):
        params[urllib2.unquote(urlarr[i].split("=")[0].encode("utf-8"))] = urllib2.unquote(urlarr[i].split("=")[1].encode('utf-8'))
        i += 1
    return params
s = Session()
req = s.get(query.split("?")[0], params=getparams(query.split("?")[1]))
print(req.url)

I tried decode() instead encode() and without encode() and without urllib2.unquote()

Have you looked at the url you are posting? you set

query = "https://www.superjob.ru/resume/search_resume.html?sbmit=1&detail_search=1&keywords%5B0%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B0%5D%5Bskwc%5D=or&keywords%5B0%5D%5Bsrws%5D=60&keywords%5B1%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B1%5D%5Bskwc%5D=or&keywords%5B1%5D%5Bsrws%5D=7&keywords%5B2%5D%5Bkeys%5D=%D0%AE%D0%BB%D0%BC%D0%B0%D1%80%D1%82&keywords%5B2%5D%5Bskwc%5D=nein&keywords%5B2%5D%5Bsrws%5D=50&keywords%5B3%5D%5Bkeys%5D=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&keywords%5B3%5D%5Bskwc%5D=nein&keywords%5B3%5D%5Bsrws%5D=60&exclude_words=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&period=7&place_of_work=0&t%5B%5D=4&m%5B%5D=194&m%5B%5D=195&m%5B%5D=193&m%5B%5D=192&m%5B%5D=191&m%5B%5D=164&m%5B%5D=162&m%5B%5D=163&m%5B%5D=150&m%5B%5D=534&m%5B%5D=540&m%5B%5D=149&m%5B%5D=148&m%5B%5D=147&m%5B%5D=146&m%5B%5D=535&m%5B%5D=536&m%5B%5D=159&m%5B%5D=157&m%5B%5D=158&m%5B%5D=156&m%5B%5D=155&m%5B%5D=154&m%5B%5D=161&m%5B%5D=20&m%5B%5D=21&m%5B%5D=22&m%5B%5D=24&m%5B%5D=23&m%5B%5D=25&m%5B%5D=28&m%5B%5D=573&m%5B%5D=29&m%5B%5D=27&m%5B%5D=26&m%5B%5D=145&m%5B%5D=144&m%5B%5D=143&m%5B%5D=142&m%5B%5D=141&m%5B%5D=30&m%5B%5D=31&m%5B%5D=115&m%5B%5D=153&m%5B%5D=72&m%5B%5D=84&m%5B%5D=152&m%5B%5D=83&m%5B%5D=82&m%5B%5D=81&m%5B%5D=79&m%5B%5D=80&m%5B%5D=542&m%5B%5D=151&m%5B%5D=46&m%5B%5D=85&paymentfrom=20000&paymentto=35000&type_of_work=0&citizenship%5B0%5D=1&old1=18&old2=40&maritalstatus=0&pol=0&children=0&education=0&eduform=0&id_institute=0&institution=&languages%5B0%5D%5Blanguage_id%5D=0&languages%5B0%5D%5Blanguage_level%5D=0&business_trip=0"

which contains a lot of escape quotes. None of them are a space ( %20 )

The reason you get a plus instead of a space, is because your url contains a plus in place of every space. This is expected behaviour, as proper URLs should not contain any spaces, and thus, are often converted into "+" beforehand.

If I put your escaped URL into my browser hotbar and look at it, all I see is "+", no spaces there.

This is from somewhere at the start of your URL:

detail_search=1&keywords%5B0%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA

which is essentially detail_search=1&keywords[0][keys]=Кладовщик+Комплектовщик if unescaped. As you can clearly see, both versions, quoted and unquoted, contain the plus. This is done by the website, and can't (shouldn't) be changed, especially if you want to reconstruct the URL somewhere else.

If you need the parameters without the plus, just do "myUnquotedString".replace("+"," ") (after unquoting, because i don't know what urllib2 does if it gets a malformed string as parameter for unquote() .

I solved this problem like this:

query = "https://www.superjob.ru/resume/search_resume.html?sbmit=1&detail_search=1&keywords%5B0%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B0%5D%5Bskwc%5D=or&keywords%5B0%5D%5Bsrws%5D=60&keywords%5B1%5D%5Bkeys%5D=%D0%9A%D0%BB%D0%B0%D0%B4%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%82%D0%BE%D0%B2%D1%89%D0%B8%D0%BA+%D0%93%D1%80%D1%83%D0%B7%D1%87%D0%B8%D0%BA+%D0%A0%D0%B0%D0%B1%D0%BE%D1%82%D0%BD%D0%B8%D0%BA+%D1%81%D0%BA%D0%BB%D0%B0%D0%B4%D0%B0&keywords%5B1%5D%5Bskwc%5D=or&keywords%5B1%5D%5Bsrws%5D=7&keywords%5B2%5D%5Bkeys%5D=%D0%AE%D0%BB%D0%BC%D0%B0%D1%80%D1%82&keywords%5B2%5D%5Bskwc%5D=nein&keywords%5B2%5D%5Bsrws%5D=50&keywords%5B3%5D%5Bkeys%5D=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&keywords%5B3%5D%5Bskwc%5D=nein&keywords%5B3%5D%5Bsrws%5D=60&exclude_words=%D0%A0%D1%83%D0%BA%D0%BE%D0%B2%D0%BE%D0%B4%D0%B8%D1%82%D0%B5%D0%BB%D1%8C+%D0%B4%D0%B8%D1%80%D0%B5%D0%BA%D1%82%D0%BE%D1%80+%D0%BD%D0%B0%D1%87%D0%B0%D0%BB%D1%8C%D0%BD%D0%B8%D0%BA+%D1%8D%D0%BA%D0%BE%D0%BD%D0%BE%D0%BC%D0%B8%D1%81%D1%82+%D0%B0%D0%BD%D0%B0%D0%BB%D0%B8%D1%82%D0%B8%D0%BA+%D0%BB%D0%BE%D0%B3%D0%B8%D1%81%D1%82+%D0%9E%D1%84%D0%B8%D1%81-%D0%BC%D0%B5%D0%BD%D0%B5%D0%B4%D0%B6%D0%B5%D1%80+%D0%93%D0%BE%D1%81%D1%83%D0%B4%D0%B0%D1%80%D1%81%D0%B2*&period=7&place_of_work=0&t%5B%5D=4&m%5B%5D=194&m%5B%5D=195&m%5B%5D=193&m%5B%5D=192&m%5B%5D=191&m%5B%5D=164&m%5B%5D=162&m%5B%5D=163&m%5B%5D=150&m%5B%5D=534&m%5B%5D=540&m%5B%5D=149&m%5B%5D=148&m%5B%5D=147&m%5B%5D=146&m%5B%5D=535&m%5B%5D=536&m%5B%5D=159&m%5B%5D=157&m%5B%5D=158&m%5B%5D=156&m%5B%5D=155&m%5B%5D=154&m%5B%5D=161&m%5B%5D=20&m%5B%5D=21&m%5B%5D=22&m%5B%5D=24&m%5B%5D=23&m%5B%5D=25&m%5B%5D=28&m%5B%5D=573&m%5B%5D=29&m%5B%5D=27&m%5B%5D=26&m%5B%5D=145&m%5B%5D=144&m%5B%5D=143&m%5B%5D=142&m%5B%5D=141&m%5B%5D=30&m%5B%5D=31&m%5B%5D=115&m%5B%5D=153&m%5B%5D=72&m%5B%5D=84&m%5B%5D=152&m%5B%5D=83&m%5B%5D=82&m%5B%5D=81&m%5B%5D=79&m%5B%5D=80&m%5B%5D=542&m%5B%5D=151&m%5B%5D=46&m%5B%5D=85&paymentfrom=20000&paymentto=35000&type_of_work=0&citizenship%5B0%5D=1&old1=18&old2=40&maritalstatus=0&pol=0&children=0&education=0&eduform=0&id_institute=0&institution=&languages%5B0%5D%5Blanguage_id%5D=0&languages%5B0%5D%5Blanguage_level%5D=0&business_trip=0"
query = query.encode('utf-8')
s = Session()
req = s.get(query.split("?")[0], params=parse_qs(urlparse(query).query))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM