简体   繁体   中英

Converting a txt file to an xml file in Python

Good morning

I would like to know if there is a solution to convert a text file to an xml file directly :

This is my text file:

5g8hj;Tournevis;cm;5,10

6r9tl;Marteau;cm;8,20

5d6ft;peinture;l;50,10

7ja3t;Vis;cm;500,001

And I'd like my XML file to look like this:

<?xml version='1.0' encoding='UTF-8'?>
<Bricolage>
  <Article>
    <Code_Article>5g8hj</Code_Article>
    <Nom_Article>Tournevis</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>5,10</Nombre>
  </Article>
  <Article>
    <Code_Article>6r9tl</Code_Article>
    <Nom_Article>Marteau</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>8,20</Nombre>
  </Article>
  <Article>
    <Code_Article>5d6ft</Code_Article>
    <Nom_Article>peinture</Nom_Article>
    <Unite_Article>l</Unite_Article>
    <Nombre>50,10</Nombre>
  </Article>
  <Article>
    <Code_Article>7ja3t</Code_Article>
    <Nom_Article>Vis 12*12,25</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>500,001</Nombre>
  </Article>
</Bricolage>

Text files :

5g8hj;Tournevis;cm;5,10
6r9tl;Marteau;cm;8,20
5d6ft;peinture;l;50,10
7ja3t;Vis 12*12,25;cm;500,001
#!/usr/bin/python2.7
# -*-coding:Latin-1 -*

import csv
from lxml import etree as et

root = et.Element("Bricolage")

data = []
with open("Codes_articles.txt") as file:
    for row in csv.reader(file, delimiter=";"):
        article = et.SubElement(root, "Article")
        code = et.SubElement(article, "Code_Article")
        code.text = row[0]
        nom = et.SubElement(article, "Nom_Article")
        nom.text = row[1]
        unite = et.SubElement(article, "Unite_Article")
        unite.text = row[2]
        nombre = et.SubElement(article, "Nombre")
        nombre.text = row[3]
        data.append(row)

xml_datas = et.tostring(root, pretty_print=True, xml_declaration=True, encoding="utf-8")

# pour print :
print(xml_datas.decode())

# pour écrire le fichier :
with open("direct_txt_to_xml.xml", "wb") as outfile:
    outfile.write(xml_datas)

This results in :

<?xml version='1.0' encoding='utf-8'?>
<Bricolage>
  <Article>
    <Code_Article>5g8hj</Code_Article>
    <Nom_Article>Tournevis</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>5,10</Nombre>
  </Article>
  <Article>
    <Code_Article>6r9tl</Code_Article>
    <Nom_Article>Marteau</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>8,20</Nombre>
  </Article>
  <Article>
    <Code_Article>5d6ft</Code_Article>
    <Nom_Article>peinture</Nom_Article>
    <Unite_Article>l</Unite_Article>
    <Nombre>50,10</Nombre>
  </Article>
  <Article>
    <Code_Article>7ja3t</Code_Article>
    <Nom_Article>Vis 12*12,25</Nom_Article>
    <Unite_Article>cm</Unite_Article>
    <Nombre>500,001</Nombre>
  </Article>
</Bricolage>

Assuming your text is in input.txt try this:

from xml.etree.ElementTree import Element, ElementTree, SubElement, Comment, tostring


def indent(elem, level=0):
    i = "\n" + level*"  "
    if len(elem):
        if not elem.text or not elem.text.strip():
            elem.text = i + "  "
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
        for elem in elem:
            indent(elem, level+1)
        if not elem.tail or not elem.tail.strip():
            elem.tail = i
    else:
        if level and (not elem.tail or not elem.tail.strip()):
            elem.tail = i


top = Element("Bricolage")

with open("input.txt", "r") as f:
    for x in f:
        splits = x.split(";")
        child = SubElement(top, "Article")
        sub_child_1 = SubElement(child, "Code_Article")
        sub_child_1.text = splits[0]
        sub_child_2 = SubElement(child, "Nom_Article")
        sub_child_2.text = splits[1]
        sub_child_3 = SubElement(child, "Unite_Article")
        sub_child_3.text = splits[2]
        sub_child_4 = SubElement(child, "Nombre")
        sub_child_4.text = splits[3]

indent(top)

tree = ElementTree(top)

tree.write("output.xml")

This uses the pretty printing from http://effbot.org/zone/element-lib.htm#prettyprint .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM