[英]How do I stop the thread function if it is ended in Python?
我制作了如下的小刮板:
channel_links.txt
文件中的所有链接后停止,以避免一遍又一遍地执行任务,我该怎么做?我的代码片段:
import time
import os
import threading
import random
import requests
from bs4 import BeautifulSoup
def rpsm_loop():
global rps, rpm
while True:
initial = reqs
time.sleep(1)
rps = round((reqs - initial) / 60, 1)
rpm = round(rps * 60, 1)
def scrapeEmails():
global reqs, _lock, success, fails, rps, rpm
with open(os.path.join("channel_links.txt"), "r") as f:
for _count, line in enumerate(f):
pass
for x in range(_count):
with open(os.path.join("channel_links.txt"), "r") as f:
for line in f:
url = line.strip()
html = (url + "/about")
soup = BeautifulSoup(requests.get(html, cookies={
'CONSENT': 'YES+cb.20221228-17-p0.en-GB+FX+{}'.format(random.randint(100, 999))},
proxies=proxies).text, "html.parser")
# here it do a job and write found emails in text but it doesnt want to stop once it gets the last link from **channel_links.txt**
if __name__ == "__main__":
_lock = threading.Lock()
reqs = 0
success = 0
fails = 0
rpm = 0
rps = 0
threading.Thread(target=rpsm_loop).start()
while True:
if threading.active_count() < 10:
try:
time.sleep(1.5)
threading.Thread(target=thread_starter).start()
except:
pass
很高兴你想出来了。 为了完成,您可能希望使用concurrent.futures.ThreadPoolExecutor
来完成此任务。 一次提交所有任务后,您可以使用max_workers
参数定义要同时处理的任务数。 例子:
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
def fetch_data(url):
soup = BeautifulSoup(requests.get(url).text, "html.parser")
# do some processing
print(f"Thread finished for url: {url}")
# just some random stackoverflow questions
urls_to_process = [
"https://stackoverflow.com/questions/75133271",
"https://stackoverflow.com/questions/75058874",
"https://stackoverflow.com/questions/75057665",
"https://stackoverflow.com/questions/75057365",
"https://stackoverflow.com/questions/54096083",
"https://stackoverflow.com/questions/54096084",
"https://stackoverflow.com/questions/5409423",
"https://stackoverflow.com/questions/5408767",
"https://stackoverflow.com/questions/5408733",
]
with ThreadPoolExecutor(max_workers=3) as executor:
for url in urls_to_process:
print("Submitting a new thread.. ")
executor.submit(fetch_data, url)
# wait until all threads are done
executor.shutdown(wait=True)
print("All threads done!")
我不敢相信我在回答我自己的问题哈哈我真的在学习这个 python:)
来源: https://www.geeksforgeeks.org/read-a-file-line-by-line-in-python/
所以解决方法是在我阅读其他主题后break
一下如何停止 function,再次感谢所有发帖并试图帮助我的人
if not line:
break
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.