繁体   English   中英

循环遍历多个 xml 个文件

[英]Loop through multiple xml files

我是 python 的新手,想遍历多个 xml 文件。 我目前正在使用现有代码提取 sample2 xml 文件:

import xml.etree.ElementTree as ET
import pandas as pd
import os


tree=ET.parse("sample2.xml")
root = tree.getroot()

qty=root.iterfind(".//Qty")
pri=root.iterfind(".//PriceAmount")
cor=root.iterfind(".//AuctionIdentification")

data =[]
for x, y, z in zip(qty, pri, cor):
    #print(x.get("v"), y.get("v"))
    a = x.get("v"), y.get("v"), z.get("v")
    data.append(a)
    
    
df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
df['Qty'] = df['Qty'].astype(float)
df['Price'] = df['Price'].astype(float)

#print(df)

total = df['Qty'].sum()
price = df['Price'].mean()
border = df.loc[0,'Border']

df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

df2['Qty'] = [total]
df2['Price'] = [price]
df2['Border'] = [str(border)[0:12]]

我尝试将 soup xml 添加到下面的代码行,但这没有用

tree=ET.parse("sample2.xml, "soup xml")

root = tree.getroot()

考虑将您的代码转换为 function 并为您需要的各种文件调用它:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename):

   tree=ET.parse(filename)
   root = tree.getroot()

   qty=root.iterfind(".//Qty")
   pri=root.iterfind(".//PriceAmount")
   cor=root.iterfind(".//AuctionIdentification")

   data =[]
   for x, y, z in zip(qty, pri, cor):
       #print(x.get("v"), y.get("v"))
       a = x.get("v"), y.get("v"), z.get("v")
       data.append(a)
    
    
   df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
   df['Qty'] = df['Qty'].astype(float)
   df['Price'] = df['Price'].astype(float)

   #print(df)

   total = df['Qty'].sum()
   price = df['Price'].mean()
   border = df.loc[0,'Border']

   df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

   df2['Qty'] = [total]
   df2['Price'] = [price]
   df2['Border'] = [str(border)[0:12]]

   return df2

然后你可以为你的文件调用它:

my_xml_processor("sample2.xml")

my_xml_processor("soup.xml")

编辑:这些是我推荐的一些小代码更改:

import xml.etree.ElementTree as ET
import pandas as pd
import os

def my_xml_processor(filename:str)->pd.DataFrame: # <- Add type hints

   root = ET.parse(filename).getroot() # <- tree is not used

   qty = root.iterfind(".//Qty")
   pri = root.iterfind(".//PriceAmount")
   cor = root.iterfind(".//AuctionIdentification")

   data = [ # <- This could be a list comprehension
     (x.get('v'), y.get('v'), z.get('v')) 
     for x,y,z in zip(qty, pri, cor)
    ]
    
   df = (pd
         .DataFrame(data, columns=["Qty", "Price" , "Border"])
         .astype({
            'Qty': float, 
            'Price': float, 
          })
         )
   
   df2 = df.agg({
        'Qty':'sum', 
        'Price':'mean',
        'Border': lambda x: str(x[0])[:12]
    }).to_frame().T


   return df2

您可以使用您现有的代码,但为您拥有的每个文件名循环运行它,例如:


import xml.etree.ElementTree as ET
import pandas as pd
import os


files = ['sample2.xml', 'sample3.xml', 'sample4.xml']

for file in files: #read each filename from above list
    tree=ET.parse(file)
    root = tree.getroot()

    qty=root.iterfind(".//Qty")
    pri=root.iterfind(".//PriceAmount")
    cor=root.iterfind(".//AuctionIdentification")

    data =[]
    for x, y, z in zip(qty, pri, cor):
        #print(x.get("v"), y.get("v"))
        a = x.get("v"), y.get("v"), z.get("v")
        data.append(a)
        
        
    df = pd.DataFrame(data, columns=["Qty", "Price" , "Border"])
    df['Qty'] = df['Qty'].astype(float)
    df['Price'] = df['Price'].astype(float)

    #print(df)

    total = df['Qty'].sum()
    price = df['Price'].mean()
    border = df.loc[0,'Border']

    df2 = pd.DataFrame(columns=["Qty", "Price" , "Border"])

    df2['Qty'] = [total]
    df2['Price'] = [price]
    df2['Border'] = [str(border)[0:12]]

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM