简体   繁体   中英

How to convert xml to java string list

I have a xml string in java that I need to break up into smaller strings. For example, given the following:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?> 
    <employee id="111">    
        <firstName>Lokesh</firstName>    
        <lastName>Gupta</lastName>     
        <location>India</location>   
    </employee> 
    <employee id="222">    
        <firstName>Alex</firstName>    
        <lastName>Gussin</lastName>    
        <location>Russia</location>    
    </employee> 
    <employee id="333">    
        <firstName>David</firstName>    
        <lastName>Feezor</lastName>    
        <location>USA</location>    
    </employee>

How can I parse without any noticeable delimiters to obtain:

string1 = "<employee id="111">    <firstName>Lokesh</firstName>    <lastName>Gupta</lastName>    <location>India</location>   </employee>"
string2 = "<employee id="222">    <firstName>Alex</firstName>    <lastName>Gussin</lastName>    <location>Russia</location>    </employee>"
string3 = "<employee id="333">    <firstName>David</firstName>    <lastName>Feezor</lastName>    <location>USA</location>    </employee>"

Any ideas are appreciated. Thanks!

You can parse the XML into DOM, iterate the child nodes of the root element (once you add one to the XML), and render each element as XML.

String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?> \r\n" + 
             "<employee id=\"111\">    \r\n" + 
             "    <firstName>Lokesh</firstName>    \r\n" + 
             "    <lastName>Gupta</lastName>     \r\n" + 
             "    <location>India</location>   \r\n" + 
             "</employee> \r\n" + 
             "<employee id=\"222\">    \r\n" + 
             "    <firstName>Alex</firstName>    \r\n" + 
             "    <lastName>Gussin</lastName>    \r\n" + 
             "    <location>Russia</location>    \r\n" + 
             "</employee> \r\n" + 
             "<employee id=\"333\">    \r\n" + 
             "    <firstName>David</firstName>    \r\n" + 
             "    <lastName>Feezor</lastName>    \r\n" + 
             "    <location>USA</location>    \r\n" + 
             "</employee>";

// Add missing root element
xml = xml.replaceAll("^(<\\?xml.*?\\?>)?", "$1<X>") + "</X>";

// Prepare parser
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder domBuilder = domFactory.newDocumentBuilder();
Document document = domBuilder.parse(new InputSource(new StringReader(xml)));

// Prepare renderer
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

// Iterate top-level elements and render them to individual strings
List<String> list = new ArrayList<>();
for (Node node = document.getDocumentElement().getFirstChild(); node != null; node = node.getNextSibling()) {
    if (node.getNodeType() == Node.ELEMENT_NODE) {
        StringWriter buf = new StringWriter();
        transformer.transform(new DOMSource(node), new StreamResult(buf));
        String elementXml = buf.toString();
        elementXml = elementXml.replaceAll("\\R", " ").trim(); // Eliminate line separators
        list.add(elementXml);
    }
}

// Print the result
for (String s : list)
    System.out.printf("'%s'%n", s);

Output

'<employee id="111">         <firstName>Lokesh</firstName>         <lastName>Gupta</lastName>          <location>India</location>    </employee>'
'<employee id="222">         <firstName>Alex</firstName>         <lastName>Gussin</lastName>         <location>Russia</location>     </employee>'
'<employee id="333">         <firstName>David</firstName>         <lastName>Feezor</lastName>         <location>USA</location>     </employee>'

You can do it using StAX:

private ArrayList <String> getEmployees(String input) throws XMLStreamException {
    ArrayList <String> employees = new ArrayList <>();

    XMLEventReader xmlEventReader = XMLInputFactory.newInstance().createXMLEventReader(new StringReader(input));
    XMLOutputFactory xmlOutputFactory = XMLOutputFactory.newInstance();

    XMLEventWriter xmlEventWriter = null;
    StringWriter sw = null;
    while (xmlEventReader.hasNext()) {
        XMLEvent xmlEvent = xmlEventReader.nextEvent();
        if(xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart().equals("employee"))  {
            sw = new StringWriter();
            xmlEventWriter = xmlOutputFactory.createXMLEventWriter(sw);
        }

        if(xmlEventWriter != null) {
            if(xmlEvent.isCharacters() && xmlEvent.asCharacters().isWhiteSpace()) {
                continue;
            }

            xmlEventWriter.add(xmlEvent);
        }

        if(xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().getLocalPart().equals("employee")) {
            xmlEventWriter.close();
            employees.add(sw.toString());
            xmlEventWriter = null;
            sw = null;
        }
    }

    return employees;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM