简体   繁体   中英

Parsing xml data from one xml to a new xml in Java

I have an xml file that have paragraphs element, sentence elements and annotation sub element under sentences. I would like to read these annotation elements and extract the content to write them to a new xml file like:

    <sentence>
      <Date></Date>
      <Person></Person>
      <NumberDate></NumberDate>
      <Location></Location>
      <etc></etc>
    </sentence>

In my code, I parse the xml file and read the annotations but am only able to print to console. I cant figure out how to continue and how to export to a new xml file.

Here is my code:

     package domparserxml;
        import java.io.File;
        //package domparserxml;
        import java.io.IOException;
        import java.io.PrintStream;
        import javax.xml.parsers.DocumentBuilder;
        import javax.xml.parsers.DocumentBuilderFactory;
        import javax.xml.parsers.ParserConfigurationException;

        import org.w3c.dom.Document;
        import org.w3c.dom.Element;
        import org.w3c.dom.Node;
        import org.w3c.dom.NodeList;
        import org.xml.sax.SAXException;

        public class DomParserXml {

            public static void main(String[] args) {
                // Tap into the xml
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

            try {
                DocumentBuilder builder = factory.newDocumentBuilder();
                Document doc = builder.parse("Chrono.xml"); //This is my input xml file
                NodeList paragraphList = doc.getElementsByTagName("paragraph");//getting the paragraph tags
                 for (int i=0;i<paragraphList.getLength();i++) {
                     Node p = paragraphList.item(i);//getting the paragraphs
                    if (p.getNodeType()==Node.ELEMENT_NODE) {//if the datatype is Node element than we can handle it
                        Element paragraph = (Element) p;
                        paragraph.getAttribute("id"); //get the paragraph id
                        paragraph.getAttribute("date");//get the paragraph date
                        NodeList sentenceList = paragraph.getChildNodes();//getting the sentence childnodes of the paragraph element
                        for(int j=0;j<sentenceList.getLength();j++) {
                            Node s = sentenceList.item(j);
                              if(s.getNodeType()==Node.ELEMENT_NODE) {
                                 Element sentence = (Element) s;
                                //sentence.getAttribute("id");  //dont need it now
                                NodeList annotationList = sentence.getChildNodes();//the annotation tags or nodes are childnodes of the sentence element
                                int len = annotationList.getLength();       //to make it shorter and reusable
                                System.out.println("");         //added these two just to add spaces in between sentences
                                //System.out.println("");
                                for(int a=0;a<len;a++) {        //here i am using 'len' i defined above. 
                                    Node anno = annotationList.item(a);
                                    if(anno.getNodeType()==Node.ELEMENT_NODE) {
                                        Element annotation = (Element) anno;
                                        if(a ==1){          //if it is the first sentence of the paragraph, print all these below:
                                            //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml"));
                                            //System.setOut(myconsole);
                                            //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date")  + ";" + "senteid:" + sentence.getAttribute("id") + ";" +  annotation.getTagName() + ":" + annotation.getTextContent() + ";");
                                            System.out.print("paragraph-id:" + paragraph.getAttribute("id") + ";" + "paragraph-date:" + paragraph.getAttribute("date")  + ";" + "senteid:" + sentence.getAttribute("id") + ";" +  annotation.getTagName() + ":" + annotation.getTextContent() + ";");
                            }
                                    if (a>1){       // if there is more after the first sentence, don't write paragraph, id etc. again, just write what is new..
                                        //PrintStream myconsole = new PrintStream(new File("C:\\Users\\ngwak\\Applications\\eclipse\\workfolder\\results.xml"));
                                System.out.print(annotation.getTagName() + ":" + annotation.getTextContent() + ";");
                                        //myconsole.print("paragraph-id:" + paragraph.getAttribute("id") + " " + "paragraph-date:" + paragraph.getAttribute("date")  + " " + "senteid:" + sentence.getAttribute("id") + " " +  annotation.getTagName() + ":" + annotation.getTextContent() + " ");
                            }

                            }

                        }

                    }
                }
            }

        }
    } catch (ParserConfigurationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (SAXException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    }

}

Can somebody please help me.

Thanks.

DOM provides many handy classes to create XML file easily. Firstly, you have to create a Document with DocumentBuilder class, define all the XML content – node, attribute with Element class. In last, use Transformer class to output the entire XML content to stream output, typically a File.

Have a look at the code, you can use this code just after you get all the values in your paragraph variable

package com.sujit;

import java.io.File;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class CreateXML {

    public static void main(String[] args) {
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder;
        try 
        {
            docBuilder = docFactory.newDocumentBuilder();
            // root elements
            Document doc = docBuilder.newDocument();
            Element rootElement = doc.createElement("sentence");  //root
            doc.appendChild(rootElement);

            Element date = doc.createElement("date");
            date.appendChild(doc.createTextNode(paragraph.getAttribute("date")));  // child
            rootElement.appendChild(date);

            Element person = doc.createElement("person");
            person.appendChild(doc.createTextNode(paragraph.getAttribute("person")));
            rootElement.appendChild(person);

            Element numberdate = doc.createElement("numberdate");
            numberdate.appendChild(doc.createTextNode(paragraph.getAttribute("numberDate")));
            rootElement.appendChild(numberdate);

            Element location = doc.createElement("location");
            location.appendChild(doc.createTextNode(paragraph.getAttribute("location")));
            rootElement.appendChild(location);

            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            DOMSource source = new DOMSource(doc);
            File file = new File("E://file.xml");
            StreamResult result = new StreamResult(file);

            transformer.transform(source, result);

            System.out.println("File saved!");          

        } 

        catch (ParserConfigurationException e) 
        {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (TransformerConfigurationException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (TransformerException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

}

Let me know if you still face any issue.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM