简体   繁体   中英

Removing certain characters from an url string (Python3)

I finally figured how to convert my dictionary with data into a string which gives me the desired output. I was wondering how to make this code more dense. Any suggestions?

import urllib.parse
import urllib.request
import re

#user input
start = '19851123'
end = '19851124'
stns = [('235','240')]
var = [('TEMP')]

#format input to dict
req = {
'start':start,
'end':end,
'vars':var,
'stns':stns
}

#format dict to url strg without modifing the 'safe characters'
q = urllib.parse.urlencode(req, doseq = True, safe="()',")

#removing the 'safe characters' and turn '+' into ':'
q = re.sub("[\(',\)]", "", q)
q = re.sub("\+", ":", q)

#combine the url and query
url = 'http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi?%s' % q 


#create a handel with url+query
fhand = urllib.request.urlopen(url)

#print requested data whitout header
for line in fhand:
    if not line.decode().startswith('#'):
        print(line.decode().strip())

By preprocessing your req input, you could avoid the need for re :

req = {
  'start': 19851123,
  'end': 19851124,
  'vars': ['TEMP'],
  'stns': ['235:240'],
}

urllib.parse.urlencode(req, doseq=True, safe=':')
# 'start=19851123&end=19851124&vars=TEMP&stns=235:240'

So you could limit your code to:

from urllib.parse import urlencode
from urllib.request import urlopen


def main(parameters, url='http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi'):
    query = urlencode(parameters, doseq=True, safe=':')
    with urlopen('{}?{}'.format(url, query)) as fhand:
        for line in fhand:
            if not line.startswith(b'#'):
                yield line.decode().strip()


if __name__ == '__main__':
    req = {
        'start': 19851123,
        'end': 19851124,
        'vars': ['TEMP'],
        'stns': ['235:240'],
    }
    for line in main(req):
        print(line)

But this feels cumbersome to modify the user input, so using argparse might be beneficial. By the look of things, vars and stns could be of varying length so you might benefit from the 'append' action with a specific default case handling:

import argparse
from urllib.parse import urlencode
from urllib.request import urlopen


def command_line_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('start', type=int)
    parser.add_argument('end', type=int)
    parser.add_argument('-v', '--vars', action='append')
    parser.add_argument('-s', '--stns', action='append', nargs=2, type=int)
    return parser


def parse_command_line(parser):
    args = parser.parse_args()
    if args.vars is None:
        args.vars = ['TEMP']
    if args.stns is None:
        args.stns = [(235, 240)]
    args.stns = ['{}:{}'.format(*stn) for stn in args.stns]
    return args


def main(parameters, url='http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi'):
    query = urlencode(parameters, doseq=True, safe=':')
    with urlopen('{}?{}'.format(url, query)) as fhand:
        for line in fhand:
            if not line.startswith(b'#'):
                yield line.decode().strip()


if __name__ == '__main__':
    for line in main(vars(parse_command_line(command_line_parser()))):
        print(line)

Usage being something like

$ python script.py -v TEMP -v TEST -s 235 240 19851123 19851124

which would query the following URL:

http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi?start=19851123&end=19851124&vars=TEMP&vars=TEST&stns=235:240

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM