In this script, I have to dissect some URLs and print the output into a CSV file.
import requests
import CSV
from urllib.parse import urlparse
with open(r'C:\Users\acer\Desktop\Project\WordPress\output.csv', 'w', newline='') as file: ==>doubt
writer = csv.writer(file) ==>doubt
def content_length(domain):
try:
r = requests.get(domain.strip())
surl = r.url
urlp = urlparse(surl)
furl = urlp.scheme + '://' + urlp.hostname + '/'
rd = requests.get(furl.strip())
rdstat = rd.status_code
#print(rdstat)
if rd.status_code == 403:
rdmsg = '403 - Forbidden'
elif rd.status_code == 200:
r1 = requests.get(furl, stream = True)
c_length = r1.headers['content-length']
rdmsg = c_length
else:
rdmsg = 'Not Available'
#print (rdmsg)
final_print = str(rdstat) + (',') + rdmsg
print(final_print)
writer.writerow(["Source Url", "Final Url", "Status Code", "Content-Length"]) ==>doubt
writer.writerow([surl,furl,str(rdstat),rdmsg]) ==>doubt
except Exception as e:
print(domain.strip() + ',' + 'Exception')
print(e)
print('Response Status Code,Content Length')
with open(r'C:\Users\acer\Desktop\Project\WordPress\domains.txt', 'r') as domlist:
for dom in domlist:
content_length(dom)
Here, my problem is that I have to add the output in the CSV file for every iteration(ie every url in the text file). but, I don't know where to add the code snippet for CSV...
You can try to restructure the code so instead of making one request and then writing one row; try to make all the requests, put the data into lists and then write those lists to the csv file.
You can do this by returning the data rather than writing it in your content_length
function:
def content_length(domain):
try:
r = requests.get(domain.strip())
surl = r.url
urlp = urlparse(surl)
furl = urlp.scheme + '://' + urlp.hostname + '/'
rd = requests.get(furl.strip())
rdstat = rd.status_code
#print(rdstat)
if rd.status_code == 403:
rdmsg = '403 - Forbidden'
elif rd.status_code == 200:
r1 = requests.get(furl, stream = True)
c_length = r1.headers['content-length']
rdmsg = c_length
else:
rdmsg = 'Not Available'
#print (rdmsg)
final_print = str(rdstat) + (',') + rdmsg
print(final_print)
return [surl,furl,str(rdstat),rdmsg]
except Exception as e:
print(domain.strip() + ',' + 'Exception')
print(e)
That will return a list which will be the row you want to write. Now you'll need another function to get all 'rows' from the domain file:
def all_domains():
dom_data = []
with open(r'C:\Users\acer\Desktop\Project\WordPress\domains.txt', 'r') as file:
domlist = file.readlines()
for dom in domlist:
cl = content_length(dom)
dom_data.append(cl)
return dom_data
You can now write the result of all_domains
to csv using a write_rows
:
domains = all_domains()
with open(r'C:\Users\acer\Desktop\Project\WordPress\output.csv', 'w', newline='') as file:
writer = csv.writer(file)
# Write headers
writer.writerow(["Source Url", "Final Url", "Status Code", "Content-Length"])
# Write domains
writer.writerows(domains)
Also when you import the csv
package at the top of the file make sure that this is lowercase and not uppercase 'CSV' as this will throw an exemption
Hope this helps!
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.