python lxml - 循环/遍历excel行并将每行保存为一个xml

Question

the problem is that the 2nd xml file contains also the data from the first iteration of the excel row and the third xml file every data from the first and 2nd rows 问题是第二个xml文件还包含来自第一行和第二行的第一次迭代的数据和第三行的第三次xml文件中的数据

Working since hours on that and cant figure it out 从那以后开始工作，无法弄明白

from lxml import etree
import openpyxl


# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"

xmlRoot = etree.Element(
    "{" + xmlns + "}LedgerImport",
    version=version,
    attrib={"{" + xsi + "}schemaLocation": schemaLocation},
    generator_info=generator_info,
    generating_system=generating_system,
    nsmap={'xsi': xsi, None: xmlns}
)

####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']

# build the xml tree
for i in range(2,6):
        consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
        accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
        account = etree.SubElement(accountsPayableLedger, 'bookingText')
        account.text = sheet.cell(row=i,column=21).value
        invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
        invoice.text = sheet.cell(row=i,column=13).value
        date = etree.SubElement(accountsPayableLedger, 'date')
        date.text = sheet.cell(row=i,column=2).value
        amount = etree.SubElement(accountsPayableLedger, 'amount')
        amount.text = sheet.cell(row=i,column=16).value
        account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
        account_no.text = sheet.cell(row=i,column=19).value
        cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
        cost1.text = sheet.cell(row=i,column=15).value
        currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
        currency_code.text = sheet.cell(row=i,column=12).value
        party_id = etree.SubElement(accountsPayableLedger, 'partyId')
        party_id.text = sheet.cell(row=i,column=20).value
        bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
        bpaccount.text = sheet.cell(row=i,column=20).value
        doc = etree.ElementTree(xmlRoot)
        doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)

as described this for every single excel row and for each row one .xml file 正如每个excel行和每行一个.xml文件所描述的那样

<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
  <consolidate consolidatedAmount="1337.01">
    <accountsPayableLedger>
      <bookingText>amazon</bookingText>
      <invoiceId>1</invoiceId>
    </accountsPayableLedger>
  </consolidate>
</LedgerImport>

Answer 1

The same xmlRoot object is reused several times. 相同的xmlRoot对象重复使用多次。 You need to create a new root element for each iteration in the for loop. 您需要在每次迭代中创建一个新的根元素for循环。

The code that creates the root element can be put in a function. 创建根元素的代码可以放在函数中。 Here is a simplified example: 这是一个简化的例子：

from lxml import etree

def makeroot():
    return etree.Element("LedgerImport")

for i in range(2, 6):
    xmlRoot = makeroot()
    consolidate = etree.SubElement(xmlRoot, 'consolidate',
                                   attrib={'consolidatedAmount': str(i)})
    doc = etree.ElementTree(xmlRoot)
    doc.write(str(i) + ".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)

Answer 2

After @mzjn pointed out your basic mistake, here is a thing I made for fun - you can create nested XML with a declarative mapping, instead of laboriously calling etree.SubElement yourself. 在@mzjn指出你的基本错误之后，这是我为了好玩而做的事情 - 你可以用声明性映射创建嵌套XML，而不是费力etree.SubElement自己调用etree.SubElement 。

Here is how. 这是怎么回事。 Assume this as the basic situation: 假设这是基本情况：

from lxml import etree
import openpyxl

ns = {
    None:  'http://xml.datev.de/bedi/tps/ledger/v040',
    'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
}

mapping = {
    '_tag': '{' + ns[None] + '}LedgerImport',
    'attrib': {
        'version': '4.0',
        '{' + ns['xsi'] + '}schemaLocation': 'http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd',
        'generator_info': 'DATEV Musterdaten',
        'generating_system': 'DATEV manuell',
    },
    'nsmap': ns,
    '_children': [{
        '_tag': 'consolidate',
        'attrib': {
            'consolidatedAmount': lambda: sheet.cell(i, 16).value,
            'consolidatedDate': lambda: sheet.cell(i, 2).value,
            'consolidatedInvoiceId': lambda: sheet.cell(i, 13).value,
            'consolidatedCurrencyCode': lambda: sheet.cell(i, 12).value,
        },
        '_children': [{
            '_tag': 'accountsPayableLedger',
            '_children': [
                {'_tag': 'bookingText', '_text': lambda: sheet.cell(i, 21).value},
                {'_tag': 'invoiceId', '_text': lambda: sheet.cell(i, 13).value},
                {'_tag': 'date', '_text': lambda: sheet.cell(i, 2).value},
                {'_tag': 'amount', '_text': lambda: sheet.cell(i, 16).value},
                {'_tag': 'accountNo', '_text': lambda: sheet.cell(i, 19).value},
                {'_tag': 'costCategoryId', '_text': lambda: sheet.cell(i, 15).value},
                {'_tag': 'currencyCode', '_text': lambda: sheet.cell(i, 12).value},
                {'_tag': 'partyId', '_text': lambda: sheet.cell(i, 20).value},
                {'_tag': 'bpAccountNo', '_text': lambda: sheet.cell(i, 20).value},
            ]
        }]
    }],
}

The nested dict resembles your final XML document. 嵌套的dict类似于最终的XML文档。 Its keys also resemble the parameters that etree.Element() and etree.SubElement() take, with the addition of _text and _children . 它的键也类似于etree.Element()和etree.SubElement()采用的参数，并添加了_text和_children 。

Now we can define a single recursive helper function that takes this input tree and transforms it into a nested XML tree of the same configuration. 现在我们可以定义一个递归辅助函数，它接受这个输入树并将其转换为相同配置的嵌套XML树。 As a bonus we can execute the lambda functions, which allows us to dynamically calculate attribute values and text: 作为奖励，我们可以执行lambda函数，这允许我们动态计算属性值和文本：

def build_tree(template, parent=None):
    # prepare a dict for calling etree.Element()/etree.SubElement()
    params = {k: v for k, v in template.items() if k not in ['_children', '_text']}

    # calculate any dynamic attribute values
    for name in params.get('attrib', {}):
        value = params['attrib'][name]
        params['attrib'][name] = str(value() if callable(value) else value)

    if parent is None:
        node = etree.Element(**params)    
    else:
        params['_parent'] = parent
        node = etree.SubElement(**params)

    # calculate (if necessary) and set the node text
    if '_text' in template:
        if callable(template['_text']):
            node.text = str(template['_text']())
        else:
            node.text = str(template['_text']) if template['_text'] else template['_text']

    # recurse into children, if any    
    for child in template.get('_children', []):
        build_tree(child, node)

    return node

We can call this in a loop: 我们可以循环调用它：

wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']

for i in range(2,6):
    root = build_tree(mapping)
    doc = etree.ElementTree(root)
    name = "%s.xml" % sheet.cell(i, 13).value
    doc.write(name, xml_declaration=True, encoding='utf-8', pretty_print=True)

This should generate a couple of nicely nested XML documents, and it should be a lot easier to manage if your XML structure changes or gets more complicated. 这应该产生一个很好的夫妇嵌套的XML文档的，它应该是一个更容易管理，如果你的XML结构的改变或变得更为复杂。

Answer 3

Alternatively, consider XSLT , the special-purpose declarative langauge designed to transform XML files, which lxml does support. 或者，考虑XSLT ，这是专门用于转换XML文件的声明语言， lxml支持它。 Specifically, pass parameters from Python to the stylesheet to transform a template XML (not unlike passing parameters to a prepared SQL statement): 具体来说，将参数从Python传递到样式表以转换模板XML（与将参数传递到准备好的SQL语句不同）：

XML template (includes all top-level namespaces) XML模板（包括所有顶级命名空间）

<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
              xmlns="http://xml.datev.de/bedi/tps/ledger/v040" 
              generating_system="DATEV manuell" 
              generator_info="DATEV Musterdaten" version="4.0" 
              xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
  <consolidate consolidatedAmount="???">
    <accountsPayableLedger>
      <bookingText>???</bookingText>
      <invoiceId>???</invoiceId>
      <date>???</date>
      <amount>???</amount>
      <accountNo>???</accountNo>
      <costCategoryId>???</costCategoryId>
      <currencyCode>???</currencyCode>
      <partyId>???</partyId>
      <bpAccountNo>???</bpAccountNo>
    </accountsPayableLedger>
  </consolidate>
</LedgerImport>

XSLT (save as .xsl file, a little longer due to default namespace in XML) XSLT （另存为.xsl文件，由于XML中的默认命名空间而稍长）

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                              xmlns:doc="http://xml.datev.de/bedi/tps/ledger/v040">
  <xsl:output indent="yes"/>
  <xsl:strip-space elements="*"/>

  <!-- INITIALIZE PARAMETERS -->
  <xsl:param name="prm_consolidate" />
  <xsl:param name="prm_bookingText" /> 
  <xsl:param name="prm_invoiceId" /> 
  <xsl:param name="prm_date" /> 
  <xsl:param name="prm_amount" /> 
  <xsl:param name="prm_accountNo" /> 
  <xsl:param name="prm_costCategoryId" /> 
  <xsl:param name="prm_currencyCode" /> 
  <xsl:param name="prm_partyId" /> 
  <xsl:param name="prm_bpAccountNo" /> 

  <!-- IDENTITY TRANSFORM -->
  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

  <!-- REWRITE TITLE TEXT -->
  <xsl:template match="doc:accountsPayableLedger">
    <xsl:copy>
      <xsl:element name="consolidate" namespace="http://xml.datev.de/bedi/tps/ledger/v040">
           <xsl:attribute name="consolidatedAmount"><xsl:value-of select="$prm_consolidate"/></xsl:attribute>
      </xsl:element>
      <xsl:element name="bookingText" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bookingText"/></xsl:element>
      <xsl:element name="invoiceId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_invoiceId"/></xsl:element>
      <xsl:element name="date" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_date"/></xsl:element>
      <xsl:element name="amount" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_amount"/></xsl:element>
      <xsl:element name="accountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_accountNo"/></xsl:element>
      <xsl:element name="costCategoryId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_costCategoryId"/></xsl:element>
      <xsl:element name="currencyCode" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_currencyCode"/></xsl:element>
      <xsl:element name="partyId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_partyId"/></xsl:element>
      <xsl:element name="bpAccountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bpAccountNo"/></xsl:element>
    </xsl:copy>
  </xsl:template>

</xsl:stylesheet>

Python (no DOM element building) Python （没有DOM元素构建）

import lxml.etree as et

# LOAD XML AND XSL
xml = et.parse('/path/to/Template.xml')
xsl = et.parse('/path/to/XSLTScript.xsl')

### OPEN EXCEL SPREADSHEET
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']

# LOOP THROUGH ROWS
for i in range(2, 6):
   consolidate = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
   account = et.XSLT.strparam(sheet.cell(row=i,column=21).value)
   invoice = et.XSLT.strparam(sheet.cell(row=i,column=13).value)
   date = et.XSLT.strparam(sheet.cell(row=i,column=2).value)
   amount = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
   account_no = et.XSLT.strparam(sheet.cell(row=i,column=19).value)
   cost1 = et.XSLT.strparam(sheet.cell(row=i,column=15).value)
   currency_code = et.XSLT.strparam(sheet.cell(row=i,column=12).value)
   party_id = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
   bpaccount = et.XSLT.strparam(sheet.cell(row=i,column=20).value)

   # PASS PARAMETER TO XSLT
   transform = et.XSLT(xsl)
   result = transform(xml, prm_consolidate = consolidate,
                           prm_bookingText=account,       
                           prm_invoiceId = invoice,
                           prm_date = date,
                           prm_amount = amount,
                           prm_account_no = account_no,
                           prm_costCategoryId = cost1,
                           prm_currencyCode = currency_code,
                           prm_partyId = party_id,
                           prm_bpAccountNo = bpaccount)

   # SAVE XML TO FILE
   with open('/path/to/Output_Row{}.xml'.format(i), 'wb') as f:
       f.write(result)

python lxml - 循环/遍历excel行并将每行保存为一个xml

问题描述

3 个解决方案

解决方案1
4 已采纳 2019-08-28 17:10:06

解决方案2
2 2019-08-28 18:49:26

解决方案3
1 2019-08-28 18:07:09

python lxml - 循环/遍历excel行并将每行保存为一个xml

问题描述

3 个解决方案

解决方案1 4 已采纳 2019-08-28 17:10:06

解决方案2 2 2019-08-28 18:49:26

解决方案3 1 2019-08-28 18:07:09

解决方案1
4 已采纳 2019-08-28 17:10:06

解决方案2
2 2019-08-28 18:49:26

解决方案3
1 2019-08-28 18:07:09