I am attempting to read urls from a text file "url.com", append them to a list, then iterate the list to check if the url is an active website.
I am having problems with tqdm, I don't know to make the current list item replace the previous list item when iterating my group of urls.
Can someone guide me on how to properly replace the previous iterated item without a newline (with tqdm)?
import urllib
from tqdm.auto import tqdm, trange
import time
from urllib.request import urlopen
import sys
with open(r"millard.txt", 'r') as fp:
total_lines = len(fp.readlines())
print('Total Number of lines:', total_lines)
score_list = []
working = []
not_working = []
cntr = 0
num_lines = sum(1 for line in open('millard.txt', 'r'))
danky = "Progress"
with open('millard.txt', 'r') as f:
for i in tqdm(f, total=num_lines, position=0, leave=False, desc=danky):
# print(line)
# url = i.strip() # to remove the trailing \n
new_link = "http://www.{poop}".format(poop=i)
print("Getting %s" % new_link, end="")
# try block to read URL
try:
post = urllib.request.urlopen(new_link)
# print(post.__dict__)
working.append(new_link)
except urllib.error.HTTPError as e:
# print(e.__dict__)
fss = 1
except urllib.error.URLError as e:
# print(e.__dict__)
fss = 1
cntr += 1
time.sleep(0.01)
with open(r'final.txt', 'w') as fp:
for item in working:
# write each item on a new line
fp.write("%s" % item)
print('Done')
Tried a lot, just need the script to not print out a list of urls, instead print each url in the same line as the group/list etc. is being iterated.
from urllib.request import urlopen
from socket import error as SocketError
import errno
with open(r"millard.txt", 'r') as fp:
total_lines = len(fp.readlines())
print('Total urls loaded:', total_lines)
working = []
not_working = []
num_lines = sum(1 for line in open('millard.txt', 'r'))
urls = []
f = open('millard.txt', 'r+')
f1 = f.readlines()
for i in f1:
urls.append(i.rstrip())
for i in (pbar := tqdm(urls, total=num_lines, position=0, leave=True, desc=".")):
# print(line)
# url = i.strip() # to remove the trailing \n
new_link = "http://www.{poop}".format(poop=i)
# try block to read URL
import urllib.request
hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'}
try:
req = urllib.request.Request(new_link, headers=hdr)
response = urllib.request.urlopen(req)
response.read()
code = " - success"
working.append(new_link)
except urllib.error.HTTPError as e:
# status = e.__dict__
code = " - failed"
except urllib.error.URLError as e:
# status = e.__dict__
code = " - failed"
except SocketError as e:
# if e.errno != errno.ECONNRESET:
code = " - failed"
pbar.set_description(f"Checking: {i}{code}")
with open(r'final.txt', 'w') as fp:
for item in working:
# write each item on a new line
fp.write("%s \n" % item)
with open(r"final.txt", 'r') as fp:
total_lines = len(fp.readlines())
print('Total urls loaded:', total_lines)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.