简体   繁体   中英

Python: multiple simultaneous requests

This Python Script makes GET Requests to URL´s loaded from websites.txt file. It will check then the Response for a "KEYWORD". If it finds the Key it will save it in "WorkingSites.txt".

Everything is working perfect BUT its to slow because it checks only one url at the same time. What is the best and easiest method to check for example 10 URLS at the same time ?

can you please provide me an example with my script below

Thanks

import requests
import sys

if len(sys.argv) != 2:
    print "\n\033[34;1m[*]\033[0m python " + sys.argv[0] \
        + ' websites.txt '
    exit(0)

targetfile = open(sys.argv[1], 'r')
while True:
    success = open('WorkingSites.txt', 'a')
    host = targetfile.readline().replace('\n', '')
    if not host:
        break
    if not host.startswith('http'):
        host = 'http://' + host
    print '\033[34;1m[*]\033[0m Check        : ' + host
    try:
        r = requests.request('get', host, timeout=5,
                             headers={'Content-Type': 'application/x-www-form-urlencoded'
                             ,
                             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3163.100 Safari/537.36'
                             })
        text = 'KEYWORD'
    except:
        print '\033[31;1m[-]\033[0m Failed        : No Response\n'
        pass
        continue
    if text in r.text:
        print '\033[32;1m[+]\033[0m success        : ' + host + '\n'
        success.write(host + '\n')
    else:
        print '\033[31;1m[-]\033[0m Failed        : ' + host + '\n'

print "\033[34;1m[*]\033[0m Output Saved On : WorkingSites.txt"
import concurrent.futures
import requests
import sys
from timeit import default_timer
import psutil

INPUT = 'websites.txt'
OUTPUT = 'WorkingSites.txt'
SUCCESS = open(OUTPUT, 'a')
START_TIME = default_timer()

def fetch(host):
  KEYWORD = 'KEYWORD'

  try:
    with requests.get(host, timeout=5, headers={
    'Content-Type':'application/x-www-form-urlencoded',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3163.100 Safari/537.36'
    }) as response: 

      print('{0} {1} {2} {3}'.format(host, psutil.cpu_percent(), psutil.virtual_memory()[2], "{:5.2f}s".format(default_timer() - START_TIME)))

      if (response.status_code == 200 and KEYWORD in response.text):
        SUCCESS.write(host+'\n')

      return response
  except requests.exceptions.RequestException as e:
    pass

async def get_data_asynchronous():
  with open(INPUT) as fi:
      hosts = fi.read().splitlines()

  for host in hosts:
    if not host.startswith('http'):
      host = 'http://' + host

  with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
      loop = asyncio.get_event_loop()

      START_TIME = default_timer()

      futures = [
          loop.run_in_executor(
              executor, 
              fetch, 
              host,
          )
          for host in hosts
      ]

      for response in await asyncio.gather(*futures):
        pass

def main():
  loop = asyncio.get_event_loop()
  loop.run_until_complete(asyncio.ensure_future(get_data_asynchronous()))
  print("\033[34;1m[*]\033[0m Output Saved On : "+OUTPUT)

main()

this is my script at the moment it works very well for the first urls but then it goes for whatever reason slower.

is this a problem with my script or pc ? can somebody please test it ive uploaded a few thousand urls to pastebin https://pastebin.com/raw/5wtrpcDQ

thank you !!!

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM