[英]python lxml - loop/iterate through excel rows and save each row as one xml
the problem is that the 2nd xml file contains also the data from the first iteration of the excel row and the third xml file every data from the first and 2nd rows 问题是第二个xml文件还包含来自第一行和第二行的第一次迭代的数据和第三行的第三次xml文件中的数据
Working since hours on that and cant figure it out 从那以后开始工作,无法弄明白
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
doc = etree.ElementTree(xmlRoot)
doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
as described this for every single excel row and for each row one .xml file 正如每个excel行和每行一个.xml文件所描述的那样
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
The same xmlRoot
object is reused several times. 相同的
xmlRoot
对象重复使用多次。 You need to create a new root element for each iteration in the for
loop. 您需要在每次迭代中创建一个新的根元素
for
循环。
The code that creates the root element can be put in a function. 创建根元素的代码可以放在函数中。 Here is a simplified example:
这是一个简化的例子:
from lxml import etree
def makeroot():
return etree.Element("LedgerImport")
for i in range(2, 6):
xmlRoot = makeroot()
consolidate = etree.SubElement(xmlRoot, 'consolidate',
attrib={'consolidatedAmount': str(i)})
doc = etree.ElementTree(xmlRoot)
doc.write(str(i) + ".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
After @mzjn pointed out your basic mistake, here is a thing I made for fun - you can create nested XML with a declarative mapping, instead of laboriously calling etree.SubElement
yourself. 在@mzjn指出你的基本错误之后,这是我为了好玩而做的事情 - 你可以用声明性映射创建嵌套XML,而不是费力
etree.SubElement
自己调用etree.SubElement
。
Here is how. 这是怎么回事。 Assume this as the basic situation:
假设这是基本情况:
from lxml import etree
import openpyxl
ns = {
None: 'http://xml.datev.de/bedi/tps/ledger/v040',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
}
mapping = {
'_tag': '{' + ns[None] + '}LedgerImport',
'attrib': {
'version': '4.0',
'{' + ns['xsi'] + '}schemaLocation': 'http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd',
'generator_info': 'DATEV Musterdaten',
'generating_system': 'DATEV manuell',
},
'nsmap': ns,
'_children': [{
'_tag': 'consolidate',
'attrib': {
'consolidatedAmount': lambda: sheet.cell(i, 16).value,
'consolidatedDate': lambda: sheet.cell(i, 2).value,
'consolidatedInvoiceId': lambda: sheet.cell(i, 13).value,
'consolidatedCurrencyCode': lambda: sheet.cell(i, 12).value,
},
'_children': [{
'_tag': 'accountsPayableLedger',
'_children': [
{'_tag': 'bookingText', '_text': lambda: sheet.cell(i, 21).value},
{'_tag': 'invoiceId', '_text': lambda: sheet.cell(i, 13).value},
{'_tag': 'date', '_text': lambda: sheet.cell(i, 2).value},
{'_tag': 'amount', '_text': lambda: sheet.cell(i, 16).value},
{'_tag': 'accountNo', '_text': lambda: sheet.cell(i, 19).value},
{'_tag': 'costCategoryId', '_text': lambda: sheet.cell(i, 15).value},
{'_tag': 'currencyCode', '_text': lambda: sheet.cell(i, 12).value},
{'_tag': 'partyId', '_text': lambda: sheet.cell(i, 20).value},
{'_tag': 'bpAccountNo', '_text': lambda: sheet.cell(i, 20).value},
]
}]
}],
}
The nested dict resembles your final XML document. 嵌套的dict类似于最终的XML文档。 Its keys also resemble the parameters that
etree.Element()
and etree.SubElement()
take, with the addition of _text
and _children
. 它的键也类似于
etree.Element()
和etree.SubElement()
采用的参数,并添加了_text
和_children
。
Now we can define a single recursive helper function that takes this input tree and transforms it into a nested XML tree of the same configuration. 现在我们可以定义一个递归辅助函数,它接受这个输入树并将其转换为相同配置的嵌套XML树。 As a bonus we can execute the
lambda
functions, which allows us to dynamically calculate attribute values and text: 作为奖励,我们可以执行
lambda
函数,这允许我们动态计算属性值和文本:
def build_tree(template, parent=None):
# prepare a dict for calling etree.Element()/etree.SubElement()
params = {k: v for k, v in template.items() if k not in ['_children', '_text']}
# calculate any dynamic attribute values
for name in params.get('attrib', {}):
value = params['attrib'][name]
params['attrib'][name] = str(value() if callable(value) else value)
if parent is None:
node = etree.Element(**params)
else:
params['_parent'] = parent
node = etree.SubElement(**params)
# calculate (if necessary) and set the node text
if '_text' in template:
if callable(template['_text']):
node.text = str(template['_text']())
else:
node.text = str(template['_text']) if template['_text'] else template['_text']
# recurse into children, if any
for child in template.get('_children', []):
build_tree(child, node)
return node
We can call this in a loop: 我们可以循环调用它:
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
for i in range(2,6):
root = build_tree(mapping)
doc = etree.ElementTree(root)
name = "%s.xml" % sheet.cell(i, 13).value
doc.write(name, xml_declaration=True, encoding='utf-8', pretty_print=True)
This should generate a couple of nicely nested XML documents, and it should be a lot easier to manage if your XML structure changes or gets more complicated. 这应该产生一个很好的夫妇嵌套的XML文档的,它应该是一个更容易管理,如果你的XML结构的改变或变得更为复杂。
Alternatively, consider XSLT , the special-purpose declarative langauge designed to transform XML files, which lxml
does support. 或者,考虑XSLT ,这是专门用于转换XML文件的声明语言,
lxml
支持它。 Specifically, pass parameters from Python to the stylesheet to transform a template XML (not unlike passing parameters to a prepared SQL statement): 具体来说,将参数从Python传递到样式表以转换模板XML(与将参数传递到准备好的SQL语句不同):
XML template (includes all top-level namespaces) XML模板(包括所有顶级命名空间)
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://xml.datev.de/bedi/tps/ledger/v040"
generating_system="DATEV manuell"
generator_info="DATEV Musterdaten" version="4.0"
xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="???">
<accountsPayableLedger>
<bookingText>???</bookingText>
<invoiceId>???</invoiceId>
<date>???</date>
<amount>???</amount>
<accountNo>???</accountNo>
<costCategoryId>???</costCategoryId>
<currencyCode>???</currencyCode>
<partyId>???</partyId>
<bpAccountNo>???</bpAccountNo>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
XSLT (save as .xsl file, a little longer due to default namespace in XML) XSLT (另存为.xsl文件,由于XML中的默认命名空间而稍长)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:doc="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- INITIALIZE PARAMETERS -->
<xsl:param name="prm_consolidate" />
<xsl:param name="prm_bookingText" />
<xsl:param name="prm_invoiceId" />
<xsl:param name="prm_date" />
<xsl:param name="prm_amount" />
<xsl:param name="prm_accountNo" />
<xsl:param name="prm_costCategoryId" />
<xsl:param name="prm_currencyCode" />
<xsl:param name="prm_partyId" />
<xsl:param name="prm_bpAccountNo" />
<!-- IDENTITY TRANSFORM -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<!-- REWRITE TITLE TEXT -->
<xsl:template match="doc:accountsPayableLedger">
<xsl:copy>
<xsl:element name="consolidate" namespace="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:attribute name="consolidatedAmount"><xsl:value-of select="$prm_consolidate"/></xsl:attribute>
</xsl:element>
<xsl:element name="bookingText" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bookingText"/></xsl:element>
<xsl:element name="invoiceId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_invoiceId"/></xsl:element>
<xsl:element name="date" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_date"/></xsl:element>
<xsl:element name="amount" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_amount"/></xsl:element>
<xsl:element name="accountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_accountNo"/></xsl:element>
<xsl:element name="costCategoryId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_costCategoryId"/></xsl:element>
<xsl:element name="currencyCode" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_currencyCode"/></xsl:element>
<xsl:element name="partyId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_partyId"/></xsl:element>
<xsl:element name="bpAccountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bpAccountNo"/></xsl:element>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Python (no DOM element building) Python (没有DOM元素构建)
import lxml.etree as et
# LOAD XML AND XSL
xml = et.parse('/path/to/Template.xml')
xsl = et.parse('/path/to/XSLTScript.xsl')
### OPEN EXCEL SPREADSHEET
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# LOOP THROUGH ROWS
for i in range(2, 6):
consolidate = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account = et.XSLT.strparam(sheet.cell(row=i,column=21).value)
invoice = et.XSLT.strparam(sheet.cell(row=i,column=13).value)
date = et.XSLT.strparam(sheet.cell(row=i,column=2).value)
amount = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account_no = et.XSLT.strparam(sheet.cell(row=i,column=19).value)
cost1 = et.XSLT.strparam(sheet.cell(row=i,column=15).value)
currency_code = et.XSLT.strparam(sheet.cell(row=i,column=12).value)
party_id = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
bpaccount = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
# PASS PARAMETER TO XSLT
transform = et.XSLT(xsl)
result = transform(xml, prm_consolidate = consolidate,
prm_bookingText=account,
prm_invoiceId = invoice,
prm_date = date,
prm_amount = amount,
prm_account_no = account_no,
prm_costCategoryId = cost1,
prm_currencyCode = currency_code,
prm_partyId = party_id,
prm_bpAccountNo = bpaccount)
# SAVE XML TO FILE
with open('/path/to/Output_Row{}.xml'.format(i), 'wb') as f:
f.write(result)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.