I'm on working on my tool.
So I have this function:
import subprocess, os, platform, ctypes, requests, random, threading
from bs4 import BeautifulSoup as bs
temptotal = 0
totalurl = 0
retry = 0
load = 0
load2 = 0
loaded = 0
dorksdone = 0
tempourl = 0
#Import Proxy List
selecting = 1
while selecting == 1:
try:
option = int(input("Choose Type Proxy(1 = http, 2=socks4, 3 = socks5) :")
except:
option = 404
if option == 1:
selecting = 0
prox = 'http'
proxyyyy = 'http'
elif option == 2:
selecting = 0
prox = 'socks4'
proxyyyy = 'socks4'
elif option == 3:
selecting = 0
prox = 'socks5'
proxyyyy = 'socks5'
else:
print("Choose valid numbre such as 1, 2 or 3!")
proxy_list = input("Give me Proxylist :" )
with open(proxy_list, mode="r", encoding="utf-8") as mf:
for line in mf:
load2 += 1
print(" ")
print("Total Proxy loaded :" + str(load2))
print(" ")
#import keywordfile
dorkslist = input("Give me KeywordList/Dorklist :" + bcolors.ENDC + " ")
with open(dorkslist, mode="r", encoding="utf-8") as mf:
for line in mf:
load += 1
mf.close()
print(" ")
print("Total Dorks loaded:" + str(load))
print(" ")
#define url to check
yahoourl = {"https://fr.search.yahoo.com/search?p=&fr=yfp-search-sb",
"https://fr.search.yahoo.com/search?p=&fr=yfp-search-sb&b=11&pz=10"}
#funtion i want to speed up
def checker():
global temptotal
global loaded
global dorksdone
global tempourl
proxy = set()
with open(proxy_list, "r") as f:
file_lines1 = f.readlines()
for line1 in file_lines1:
proxy.add(line1.strip())
with open(dorkslist, mode="r",encoding="utf-8") as my_file:
for line in my_file:
loaded += 1
threading.Thread(target=titre).start()
indorks = line
encode = requote_uri(indorks)
for yahoo in yahoourl:
yahooo = yahoo.replace("&fr",encode + "&fr")
try:
proxies = {
'http': prox+'://'+random.choice(list(proxy))
}
r = requests.get(yahooo, proxies=proxies)
print("Dorks used :" + indorks )
dorksdone += 1
soup = bs(r.text, 'html.parser')
links = soup.find_all('a')
for link in soup.find_all('a'):
a = link.get('href')
unquote(a)
temptotal += 1
with open("Bing.txt", mode="a",encoding="utf-8") as fullz:
fullz.write(a + "\n")
fullz.close()
lines_seen = set() # holds lines already seen
outfile = open("Bingnodup.txt", "w", encoding="utf-8")
for line in open("Bing.txt", "r", encoding="utf-8"):
if line not in lines_seen: # not a duplicate
outfile.write(line)
lines_seen.add(line)
outfile.close()
with open("Bingnodup.txt", mode="r", encoding="utf-8") as cool:
for url in cool:
try:
proxies = {
'http': prox+'://'+random.choice(list(proxy))
}
response = requests.get(url, proxies=proxies)
save = response.url
with open("Bingtemp.txt", mode="a", encoding="utf-8") as cool1:
cool1.write(save + "\n")
tempourl += 1
cool1.close()
except:
pass
except:
raise
fin()
#start bot
bot1 = threading.Thread(target=checker)
bot1.start()
bot1.join()
Exemple file for Keyword:
python
wordpress
Exemple file for proxy(http so take 1 on choice):
46.4.96.137:8080
223.71.167.169:80
219.248.205.117:3128
198.24.171.34:8001
51.158.123.35:9999
But this function when running is very very very slow, could who let me know how i can give boost to this function? Because i have try to use this topic: How can I use threading in Python?
But i didn't understand how to build in into the right way for my function.
Your script is what's called I/O bound. What this means is that it is not slow because the CPU needs to perform long computations, but because it needs to wait a lot every time it requests a URL (the bottleneck are the requests to the internet).
For concurrency you have 3 options:
The first two are the ones which can help you in I/O bound problems like yours. The first one is the recommended approach in a problem like this, since there is a library available with support for async/await.
This is an adapted example from the above link, which does exactly what you need:
import asyncio
import time
import aiohttp
def get_proxies():
if platform.system() == "Linux":
clear = lambda: os.system('clear')
clear()
if platform.system() == "Windows":
clear = lambda: os.system('cls')
clear()
proxy = set()
with open("proxy.txt", "r") as f:
file_lines1 = f.readlines()
for line1 in file_lines1:
proxy.add(line1.strip())
return proxy
async def download_site(session, url, proxies):
async with session.get(url, proxies=proxies) as response:
save = response.url
with open("Yahootemp.txt", mode="a", encoding="utf-8") as cool1:
cool1.write(save + "\n")
async def download_all_sites(sites, proxies):
async with aiohttp.ClientSession() as session:
tasks = []
for url in sites:
task = asyncio.ensure_future(download_site(session, url, proxies))
tasks.append(task)
await asyncio.gather(*tasks, return_exceptions=True)
if __name__ == "__main__":
proxies = get_proxies()
proxies = {
'http': prox + '://' + random.choice(list(proxies))
}
sites = []
with open("Yahoonodup.txt", mode="r", encoding="utf-8") as cool:
for url in cool:
sites.append(url)
asyncio.get_event_loop().run_until_complete(download_all_sites(sites, proxies))
You could make it even faster if saving the files seems to still be too slow; read this .
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.