from bs4 import BeautifulSoup
import requests
import os
url = "http://nos.nl/artikel/2093082-steeds-meer-nekklachten-bij-kinderen-door-gebruik-tablets.html"
r = requests.get(url)
soup = BeautifulSoup(r.content.decode('utf-8', 'ignore'))
data = soup.find_all("article", {"class": "article"})
with open("data1.txt", "wb") as file:
content=‘utf-8’
for item in data:
content+='''{}\n{}\n\n{}\n{}'''.format( item.contents[0].find_all("time", {"datetime": "2016-03-16T09:50:30+0100"})[0].text,
item.contents[0].find_all("a", {"class": "link-grey"})[0].text,
item.contents[0].find_all("img", {"class": "media-full"})[0],
item.contents[1].find_all("div", {"class": "article_textwrap"})[0].text,
)
with open("data1.txt".format(file_name), "wb") as file:
file.write(content)
Recently solved a utf/Unicode problem but now it isn't saving it as a .txt file nor saving it at all. What do I need to do?
If you want to write the data as UTF-8 to the file try codecs.open
like:
from bs4 import BeautifulSoup
import requests
import os
import codecs
url = "http://nos.nl/artikel/2093082-steeds-meer-nekklachten-bij-kinderen-door-gebruik-tablets.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)
data = soup.find_all("article", {"class": "article"})
with codecs.open("data1.txt", "wb", "utf-8") as filen:
for item in data:
filen.write(item.contents[0].find_all("time", {"datetime": "2016-03-16T09:50:30+0100"})[0].get_text())
filen.write('\n')
filen.write(item.contents[0].find_all("a", {"class": "link-grey"})[0].get_text())
filen.write('\n\n')
filen.write(item.contents[0].find_all("img", {"class": "media-full"})[0].get_text())
filen.write('\n')
filen.write(item.contents[1].find_all("div", {"class": "article_textwrap"})[0].get_text())
I'm unsure about filen.write(item.contents[0].find_all("img", {"class": "media-full"})[0])
because that returned a Tag
instance for me.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.