My code extracts information from an xhtml file to csv. Cannot seem to delete the topmost row reading "NYSE,, ,Price,Chg,Chg". Using "del data[0]" for example gets rid of the whole column. Any help is greatly appreciated.
import sys
import re
import xml.dom.minidom
document = xml.dom.minidom.parse("2017-11-27-16-20-15.xhtml")
tableElements = document.getElementsByTagName('table')[2]
for tr in tableElements.getElementsByTagName('tr'):
data = []
for a in tr.getElementsByTagName('a'):
for node in a.childNodes:
if node.nodeType == node.TEXT_NODE:
data.append(node.nodeValue)
for td in tr.getElementsByTagName('td'):
for node in td.childNodes:
if node.nodeType == node.TEXT_NODE:
data.append(node.nodeValue)
del data[-1]
del data[1]
del data[1]
del data[1]
symbol = data[0]
symm = re.findall(r"\(([A-Za-z0-9_]+)\)", symbol)
sym = (''.join(symm))
company = data[0]
comp = re.sub("\(.*\)\n","", company)
volume = data[1]
lst = volume.replace(',', '')
price = data[2]
lstt = price.lstrip('$')
change = data[-1]
fullList = [sym] + [comp] + [lst] + [lstt] + [change]
fullList.insert(0, "NYSE")
print(','.join(fullList))
if data[0] return the whole column, then loop though you columns and delete the first entry.
for column in range(len(data)):
del data[column][0]
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.