简体   繁体   中英

How to convert a .txt file to .xml file using python

My abc.txt file looks like this:

1
76 45 146 87

And this is my EXPECTED output showing in xyz.xml file:

-<root>
   -<object>
       - <label>1</label>
     </object>
   -<cordinates>
        <xmin>76</xmin>
        <ymin>45</ymin>
        <xmin>146</xmin>
        <xmax>87</xmax>
    </cordinates>
 </root>

This is my source code that I have tried:

import xml.etree.cElementTree as ET
root = ET.Element("root")
object = ET.SubElement(root, "object")
ET.SubElement(object, "label").text = "1"
cordinates = ET.SubElement(root, "cordinates")
ET.SubElement(cordinates, "xmin").text = "76"
ET.SubElement(cordinates, "ymin").text = "45"
ET.SubElement(cordinates, "xmin").text = "146"
ET.SubElement(cordinates, "xmax").text = "87"
tree = ET.ElementTree(root)
tree.write("xyz.xml")

But the problem is, I have done the labeling and putting the coordinates manually. And I want to extract abc.txt file in this program and do the job automatically for more than 350 .txt files. So can someone please help me re-write the code? Any help will be appreciated. Thanks!

This code will read the object name and coordinated into a dictionary and then iterate over the dictionary to create the corresponding elements that you require.

import xml.etree.cElementTree as ET

file_list = ['abc.txt', 'def.txt']
obj_coord = {}

for file in file_list:
    f = open(file, "r")
    object = f.readline()
    coord_string = f.readline()
    if not coord_string: 
        break
    coord_list = coord_string.split() 
    obj_coord[object] = coord_string
    f.close()


root = ET.Element("root")

for obj, coord in obj_coord.items():
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = obj 
    cordinates = ET.SubElement(root, "cordinates")
    ET.SubElement(cordinates, "xmin").text = coord[0]
    ET.SubElement(cordinates, "ymin").text = coord[1]
    ET.SubElement(cordinates, "xmin").text = coord[2]
    ET.SubElement(cordinates, "xmax").text = coord[3]

tree = ET.ElementTree(root)
tree.write("xyz.xml")

You can begin with making a function, which takes an txt_file and xml_file name and inputs, and write the contents of txt_file to xml_file like so

import xml.etree.cElementTree as ET

def write_xml(txt_file, xml_file):
    label = 0
    text_list = []

    #list of name of coordinates
    coords_list = ['xmin', 'ymin', 'xmax', 'ymax']

    #Open the text file
    with open(txt_file) as fp:
        #Read the label and text string
        label, text_str = fp.readlines()
        #Create the list of text
        text_list = [item for item in text_str.split()]

    #Create the coordinates dictionary, with key as item of coords_list and values as item of text_list
    coord_dict = dict(zip(coords_list, text_list))

    #Create the xml file
    root = ET.Element("root")
    object = ET.SubElement(root, "object")
    ET.SubElement(object, "label").text = label

    cordinates = ET.SubElement(root, "cordinates")

    #Iterate through the coordinates dictionary and assign the elements
    for key, value in coord_dict.items():
        ET.SubElement(cordinates, key).text = value

    #Write to the xml file
    tree = ET.ElementTree(root)
    tree.write(xml_file)

Then you can call the function as write_xml('file.txt', 'xyz.xml')

Now you can call this function for all your txt files in a loop

txt_file_paths = [....]
xml_file_paths = [....]

for idx in range(len(txt_file_paths)):
    write_xml(txt_file_paths[idx], xml_file_paths[idx])

You can fill up your txt_file_paths by listing all files in the txt files folder using os.listdir , then create the file path using os.path.join and add them to the list

import os
txt_files_folder = '<folder_with_txt_files>'
txt_file_names = []

for file in os.listdir(txt_files_folder):

    txt_file_path = os.path.join(txt_files_folder, file)
    txt_file_names.append(txt_file_path)
import xml.etree.cElementTree as ET
import os


def toxml(lines, save_filepath):
    def generate_xml(obj, cordinates_arr, save_filepath):
        root_node = ET.Element("root")
        object_node = ET.SubElement(root_node, "object")
        ET.SubElement(object_node, "label").text = obj
        cordinates_node = ET.SubElement(root_node, "cordinates")
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[0]
        ET.SubElement(cordinates_node, "ymin").text = cordinates_arr[1]
        ET.SubElement(cordinates_node, "xmin").text = cordinates_arr[2]
        ET.SubElement(cordinates_node, "xmax").text = cordinates_arr[3]
        tree = ET.ElementTree(root_node)
        tree.write(save_filepath)

    if len(lines) != 2:
        print("Invalid content: {}".format(lines))
    obj = lines[0].strip()
    cordinates = lines[1].strip()
    if len(obj) == '' or len(cordinates.split()) != 4:
        print("Invalid line format: {}".format(lines))
    # start generate
    generate_xml(obj, cordinates, save_filepath)

def entry(target_dir_path, save_dri_path):
    assert os.path.exists(target_dir_path), "Target directory is not exist: {}".format(target_dir_path)
    assert os.path.exists(save_dir_path), "Save directory is not exist: {}".format(target_dir_path)

    for filename in os.listdir(target_dir_path):
        file_full_path = os.path.join(target_dir_path, filename)
        filename_prefix, _ = os.path.splitext(filename)
        save_path = os.path.join(save_dir_path, "{}.xml".format(filename_prefix))
        try:
            with open(file_full_path) as ff:
                toxml(ff.readlines(), save_path)
        except Exception as ex:
            print("Generate {0} failed, with error msg: {1}.".format(filename, ex.__str__()))


if __name__ == '__main__':
    target_dir_path = '/path/to/you/wanna/convert'
    save_dir_path = '/path/to/you/wanna/save'
    entry(target_dir_path, save_dir_path)

The code already tested, holp it can help you

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM