I wrote this code on Python 2.7.13, for scraping datatable from a website.
import urllib2
from bs4 import BeautifulSoup
import csv
import os
out=open("proba.csv","rb")
data=csv.reader(out)
def make_soup(url):
thepage = urllib2.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
maindatatable=""
soup = make_soup("https://www.mnb.hu/arfolyamok")
for record in soup.findAll('tr'):
datatable=""
for data in record.findAll('td'):
datatable=datatable+","+data.text
maindatatable = maindatatable + "\n" + datatable[1:]
header = "Penznem,Devizanev,Egyseg,Penznemforintban"
print maindatatable
file = open(os.path.expanduser("proba.csv"),"wb")
utf16_str1 =header.encode('utf16')
utf16_str2 = maindatatable.encode('utf16')
file.write(utf16_str1)
file.write(utf16_str2)
file.close()
I want to export this into CSV with the next 4 rows:
"Penznem Devaizanev Egyseg Penznemforintban"
The data are separated with "," but the last two values is ONE row. (283,45)
How can I fix it?
you can not avoid last coma directly but,
What you can simply do is to use another seprator ie ;(semicolon) and when you open file in exel,calc select (;)semicolon as seprator and you will get result as expected!
import urllib2 from bs4 import BeautifulSoup import csv import os out=open("proba.csv","rb") data=csv.reader(out) def make_soup(url): thepage = urllib2.urlopen(url) soupdata = BeautifulSoup(thepage, "html.parser") return soupdata maindatatable="" soup = make_soup("https://www.mnb.hu/arfolyamok") for record in soup.findAll('tr'): datatable="" for data in record.findAll('td'): datatable=datatable+";"+data.text maindatatable = maindatatable + "\\n" + datatable[1:] header = "Penznem;Devizanev;Egyseg;Penznemforintban" print maindatatable file = open(os.path.expanduser("proba.csv"),"wb") utf16_str1 =header.encode('utf16') utf16_str2 = maindatatable.encode('utf16') file.write(utf16_str1) file.write(utf16_str2) file.close()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.