简体   繁体   English

在python中将XML响应写入多个csv文件

[英]Write xml response to multiple csv files in Python

I would like to create three csv files and write xml response (REPORT_ITEM) on those files.. 我想创建三个csv文件,并在这些文件上写入xml响应(REPORT_ITEM)。
I have following xml data: 我有以下xml数据:

<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
   <MessageHeader>
      <TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
      <Source>OASIS</Source>
      <Version>v20131201</Version>
   </MessageHeader>
   <MessagePayload>
      <RTO>
         <name>CAISO</name>
         <REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
            <REPORT_HEADER>
               <SYSTEM>OASIS</SYSTEM>
               <TZ>PPT</TZ>
               <REPORT>PRC_LMP</REPORT>
               <MKT_TYPE>DAM</MKT_TYPE>
               <UOM>US$/MWh</UOM>
               <INTERVAL>ENDING</INTERVAL>
               <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
            </REPORT_HEADER>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>2</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>29</VALUE>
            </REPORT_DATA>
            <REPORT_DATA>
               <DATA_ITEM>LMP_PRC</DATA_ITEM>
               <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
               <OPR_DATE>2015-10-12</OPR_DATE>
               <INTERVAL_NUM>12</INTERVAL_NUM>
               <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
               <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
               <VALUE>35.67227</VALUE>
            </REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
            <DISCLAIMER>The contents of these pages are subject to change without notice.  Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
         </DISCLAIMER_ITEM>
      </RTO>
   </MessagePayload>
</OASISReport>

This data has three "REPORT_ITEM" tags with data in them. 该数据具有三个带有数据的“ REPORT_ITEM”标签。 I want to to write that data separately on three csv files. 我想将该数据分别写入三个csv文件中。 So far i am able to parse that data using 'etree' module. 到目前为止,我能够使用“ etree”模块解析该数据。

My code:

import lxml.etree as et
import csv

tree = et.parse('data.xml')
root = tree.getroot()

list_of_rows = list()

for REPORT_HEADER in root.iter('REPORT_HEADER'):
    SYSTEM = REPORT_HEADER.find('SYSTEM').text
    TZ = REPORT_HEADER.find('TZ').text
    REPORT = REPORT_HEADER.find('REPORT').text
    MKT_TYPE = REPORT_HEADER.find('MKT_TYPE').text
    UOM = REPORT_HEADER.find('UOM').text
    INTERVAL = REPORT_HEADER.find('INTERVAL').text
    SEC_PER_INTERVAL = REPORT_HEADER.find('SEC_PER_INTERVAL').text
    list1 = [SYSTEM,TZ,REPORT,MKT_TYPE,UOM,INTERVAL,SEC_PER_INTERVAL]
    list_of_rows.append(list1)
    # print(list1)

for REPORT_DATA in root.iter('REPORT_DATA'):
    DATA_ITEM = REPORT_DATA.find('DATA_ITEM').text
    RESOURCE_NAME = REPORT_DATA.find('RESOURCE_NAME').text
    OPR_DATE = REPORT_DATA.find('OPR_DATE').text
    INTERVAL_NUM = REPORT_DATA.find('INTERVAL_NUM').text
    INTERVAL_START_GMT = REPORT_DATA.find('INTERVAL_START_GMT').text
    INTERVAL_END_GMT = REPORT_DATA.find('INTERVAL_END_GMT').text
    VALUE = REPORT_DATA.find('VALUE').text
    list2 = [DATA_ITEM,RESOURCE_NAME,OPR_DATE,INTERVAL_NUM,INTERVAL_START_GMT,INTERVAL_END_GMT,VALUE]
    list_of_rows.append(list2)

with open("file.csv", "w") as f:
    csv_writer = csv.writer(f)
    for row in list_of_rows:
        csv_writer.writerow(row)

Thanks. 谢谢。 I would like to create three csv files and write xml response"REPORT_ITEM" on those files 我想创建三个csv文件,并在这些文件上写入xml响应“ REPORT_ITEM”

Use xpath instead, though you may need to tweak this to remove blank rows!-try as below 使用xpath代替,尽管您可能需要调整它以删除空白行!-尝试如下

import lxml.etree as et
import csv


xml="""<?xml version="1.0" encoding="UTF-8"?>
<OASISReport>
    <MessageHeader>
        <TimeDate>2015-11-05T07:50:48-00:00</TimeDate>
        <Source>OASIS</Source>
        <Version>v20131201</Version>
    </MessageHeader>
    <MessagePayload>
        <RTO>
            <name>CAISO</name>
            <REPORT_ITEM>
                <REPORT_HEADER>
                    <SYSTEM>OASIS</SYSTEM>
                    <TZ>PPT</TZ>
                    <REPORT>PRC_LMP</REPORT>
                    <MKT_TYPE>DAM</MKT_TYPE>
                    <UOM>US$/MWh</UOM>
                    <INTERVAL>ENDING</INTERVAL>
                    <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
                </REPORT_HEADER>
                <REPORT_DATA>
                    <DATA_ITEM>LMP_PRC</DATA_ITEM>
                    <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
                    <OPR_DATE>2015-10-12</OPR_DATE>
                    <INTERVAL_NUM>2</INTERVAL_NUM>
                    <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
                    <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
                    <VALUE>29</VALUE>
                </REPORT_DATA>
                <REPORT_DATA>
                    <DATA_ITEM>LMP_PRC</DATA_ITEM>
                    <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
                    <OPR_DATE>2015-10-12</OPR_DATE>
                    <INTERVAL_NUM>12</INTERVAL_NUM>
                    <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
                    <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
                    <VALUE>35.67227</VALUE>
                </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
    <REPORT_HEADER>
        <SYSTEM>OASIS</SYSTEM>
        <TZ>PPT</TZ>
        <REPORT>PRC_LMP</REPORT>
        <MKT_TYPE>DAM</MKT_TYPE>
        <UOM>US$/MWh</UOM>
        <INTERVAL>ENDING</INTERVAL>
        <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
    </REPORT_HEADER>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>2</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>29</VALUE>
    </REPORT_DATA>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>12</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>35.67227</VALUE>
    </REPORT_DATA>
</REPORT_ITEM>
<REPORT_ITEM>
    <REPORT_HEADER>
        <SYSTEM>OASIS</SYSTEM>
        <TZ>PPT</TZ>
        <REPORT>PRC_LMP</REPORT>
        <MKT_TYPE>DAM</MKT_TYPE>
        <UOM>US$/MWh</UOM>
        <INTERVAL>ENDING</INTERVAL>
        <SEC_PER_INTERVAL>3600</SEC_PER_INTERVAL>
    </REPORT_HEADER>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>2</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T08:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T09:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>29</VALUE>
    </REPORT_DATA>
    <REPORT_DATA>
        <DATA_ITEM>LMP_PRC</DATA_ITEM>
        <RESOURCE_NAME>TH_SP15_GEN-APND</RESOURCE_NAME>
        <OPR_DATE>2015-10-12</OPR_DATE>
        <INTERVAL_NUM>12</INTERVAL_NUM>
        <INTERVAL_START_GMT>2015-10-12T18:00:00-00:00</INTERVAL_START_GMT>
        <INTERVAL_END_GMT>2015-10-12T19:00:00-00:00</INTERVAL_END_GMT>
        <VALUE>35.67227</VALUE>
    </REPORT_DATA>
</REPORT_ITEM>
<DISCLAIMER_ITEM>
    <DISCLAIMER>The contents of these pages are subject to change without notice.  Decisions based on information contained within the California ISO's web site are the visitor's sole responsibility.</DISCLAIMER>
    </DISCLAIMER_ITEM>
    </RTO>
    </MessagePayload>
</OASISReport>"""

tree = et.fromstring(xml)



itms = tree.xpath("//REPORT_ITEM")

data = []      
for i in itms:
    d= [[k.text for k in j] for j in i]
    print d
    data.append(d)



for item in data:
    for i in range(len(data)):
        with open("D:\\_" + str(i) + ".csv", 'w') as f:
            csv_writer = csv.writer(f)
            csv_writer.writerows(item)

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM