#Stuff needed to run
import requests
import urllib.request
import io
from bs4 import BeautifulSoup as soup
#Pick url, request it, save response, read response, soup it into variable
my_url = 'https://old.reddit.com/r/all/'
request = urllib.request.Request(my_url,headers={'User-Agent': 'your bot 0.1'})
response = urllib.request.urlopen(request)
page_html = response.read()
page_soup = soup(page_html, "html.parser")
#get all the posts, get one post, get all the authors, get one author
posts = page_soup.findAll("div", {"class": "top-matter"})
post = posts[0]
authors = page_soup.findAll("p", {"class":"tagline"})
author = authors[0]
#make filename, open to write, set the headers, write the headers,
filename = "redditAll.csv"
f = open(filename, "w")
headers = "Title of the post, Author of the post\n"
f.write(headers)
#for the post and author in posts and authors, get one of each, open the file & write it, repeat
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
#close the file
f.close()
After running this code and opening the csv file, there's only two cells that have text in them.
Changed this
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
To this
with open(filename, "w", encoding="utf-8") as f:
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
f.write(post_text + "," + username + "\n")
Try this:
# for the post and author in posts and authors, get one of each, open the file & write it, repeat
def writer():
with open(filename, "w", encoding="utf-8") as f:
for post_, author_ in zip(posts, authors):
post_text = post_.p.a.text.replace(",", " -")
username = author_.a.text
# with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
writer()
You could open the file in append mode using the a
parameter, the second time you open the file writing, check this SO thread on how to do this. Or move the with open(filename, "w", encoding="utf-8") as f:
outside the loop
The w
param will overwrite the previous data in the file so each time the loop runs, the record will be overwritten with the new record leaving just the final record in the file
Also I would go with the builtin csv
library to read/write csv files as one of the comments mentions. Here is the documentation for it
Changed this
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
with open(filename, "w", encoding="utf-8") as f:
f.write(post_text + "," + username + "\n")
To this
with open(filename, "w", encoding="utf-8") as f:
for post, author in zip(posts, authors):
post_text = post.p.a.text.replace(",", " -")
username = author.a.text
f.write(post_text + "," + username + "\n")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.