Traversing a DOM tree to get (name,value) pairs of attributes and leaf nodes

Question

I want to traverse through an XML file in DOM for the purpose of retrieving as (name,value) pairs all:

Attribute names and values;
All leaf node names and their text content;

So given the following XML file as an example:

<?xml version="1.0" encoding="UTF-8"?>
<title text="title1">
    <comment id="comment1">
        <data> abcd </data>
        <data> efgh </data>
    </comment>
    <comment id="comment2">
        <data> ijkl </data>
        <data> mnop </data>
        <data> qrst </data>
    </comment>
</title>

What I want as name value pairs are:

text=title1
id=comment1
data=abcd
data=efgh
id=commment2
data=ijkl
data=mnop
data=qrst

Answer 1

An easier solution might be to use XPath to extract all name value pairs as in the following example. You could also skip the DOM construction and call evaluate directly on the InputSource . The XPath expression

//@* | //*[not(*)]

matches the union of all attributes and all nodes that don't have any child nodes.

import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class Test {

    private static final String xml = "<title text='title1'>\n"
            + "  <comment id='comment1'>\n"
            + "    <data> abcd </data>\n"
            + "    <data> efgh </data>\n"
            + "  </comment>\n"
            + "  <comment id='comment2'>\n"
            + "    <data> ijkl </data>\n"
            + "    <data> mnop </data>\n"
            + "    <data> qrst </data>\n"
            + "  </comment>\n"
            + "</title>\n";

    public static void main(String[] args) throws Exception {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));

        XPathFactory xpf = XPathFactory.newInstance();
        XPath xp = xpf.newXPath();
        NodeList nodes = (NodeList)xp.evaluate("//@* | //*[not(*)]", doc, XPathConstants.NODESET);

        System.out.println(nodes.getLength());

        for (int i=0, len=nodes.getLength(); i<len; i++) {
            Node item = nodes.item(i);
            System.out.println(item.getNodeName() + " : " + item.getTextContent());
        }
    }
}

Answer 2

How about something like:

    String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
        "<title text=\"title1\">\n" +
        "    <comment id=\"comment1\">\n" +
        "        <data> abcd </data>\n" +
        "        <data> efgh </data>\n" +
        "    </comment>\n" +
        "    <comment id=\"comment2\">\n" +
        "        <data> ijkl </data>\n" +
        "        <data> mnop </data>\n" +
        "        <data> qrst </data>\n" +
        "    </comment>\n" +
        "</title>\n";

    try {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));

        DocumentTraversal traversal = (DocumentTraversal) doc;

        NodeIterator iterator = traversal.createNodeIterator(
          doc.getDocumentElement(), NodeFilter.SHOW_ELEMENT, null, true);

        for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
            //System.out.println("Element: " + ((Element) n).getTagName());
            String tagname = ((Element) n).getTagName();
            if(tagname.equals("title")) {
                System.out.println("text=" + ((Element)n).getAttribute("text"));
            }
            else if(tagname.equals("comment")) {
                System.out.println("id=" + ((Element)n).getAttribute("id"));
            }
            else if(tagname.equals("data")) {
                System.out.println("data=" + ((Element)n).getTextContent());
            }
            else {
                System.out.println("Unhandled element");
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

Okay, so you weren't happy with that, how about this:

 String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
        "<title text=\"title1\">\n" +
        "    <comment id=\"comment1\">\n" +
        "        <data> abcd </data>\n" +
        "        <data> efgh </data>\n" +
        "    </comment>\n" +
        "    <comment id=\"comment2\">\n" +
        "        <data> ijkl </data>\n" +
        "        <data> mnop </data>\n" +
        "        <data> qrst </data>\n" +
        "    </comment>\n" +
        "</title>\n";

    try {
        DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document doc = builder.parse(new InputSource(new StringReader(xml)));

        DocumentTraversal traversal = (DocumentTraversal) doc;

        NodeIterator iterator = traversal.createNodeIterator(
          doc.getDocumentElement(), NodeFilter.SHOW_ELEMENT, null, true);

        for (Node n = iterator.nextNode(); n != null; n = iterator.nextNode()) {
            //System.out.println("Element: " + ((Element) n).getTagName());
            String tagname = ((Element) n).getTagName();

            NamedNodeMap map = ((Element)n).getAttributes();
            if(map.getLength() > 0) {

                for(int i=0; i<map.getLength(); i++) {
                    Node node = map.item(i);
                    System.out.println(node.getNodeName() + "=" + node.getNodeValue());
                }
            }
            else {
                System.out.println(tagname + "=" + ((Element)n).getTextContent());
            }
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

Traversing a DOM tree to get (name,value) pairs of attributes and leaf nodes

Question

2 answers

solution1
4 2011-08-26 11:15:19

solution2
2 2011-08-26 05:44:08

Traversing a DOM tree to get (name,value) pairs of attributes and leaf nodes

Question

2 answers

solution1 4 2011-08-26 11:15:19

solution2 2 2011-08-26 05:44:08

solution1
4 2011-08-26 11:15:19

solution2
2 2011-08-26 05:44:08