简体   繁体   中英

how to split an XML file into multiple XML files using java

I'm using XML files in Java for the first time and i need some help. I am trying to split an XML file to multiple XML files using Java

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<products>
    <product>
        <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
        <gtin>00027242816657</gtin>
        <price>2999.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple iPad 2 with Wi-Fi 16GB - iOS 5 - Black
        </description>
        <gtin>00885909464517</gtin>
        <price>399.0</price>
        <orderId>2343</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue
        </description>
        <gtin>00027242831438</gtin>
        <price>91.99</price>
        <orderId>2343</orderId>
        <supplier>Sony</supplier>
    </product>
    <product>
        <description>Apple MacBook Air A 11.6" Mac OS X v10.7 Lion MacBook
        </description>
        <gtin>00885909464043</gtin>
        <price>1149.0</price>
        <orderId>2344</orderId>
        <supplier>Apple</supplier>
    </product>
    <product>
        <description>Panasonic TC-L47E50 47" Smart TV Viera E50 Series LED
            HDTV</description>
        <gtin>00885170076471</gtin>
        <price>999.99</price>
        <orderId>2344</orderId>
        <supplier>Panasonic</supplier>
    </product>
</products>

and I'm trying to get three XML documents like:

 <?xml version="1.0" encoding="UTF-8"?>
<products>
        <product>
            <description>Sony 54.6" (Diag) Xbr Hx929 Internet Tv</description>
            <gtin>00027242816657</gtin>
            <price currency="USD">2999.99</price>
            <orderid>2343</orderid>
        </product>
        <product>
            <description>Sony NWZ-E464 8GB E Series Walkman Video MP3 Player Blue</description>
            <gtin>00027242831438</gtin>
            <price currency="USD">91.99</price>
            <orderid>2343</orderid>
        </product>
</products>

one for each supplier. How can I receive it? Any help on this will be great.

Make sure you change the path in "inputFile" to your file and also the output part:

StreamResult result = new StreamResult(new File("C:\xmls\" + supplier.trim() + ".xml"));

Here your code.

import java.io.File;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class ExtractXml
{
    /**
     * @param args
     */
    public static void main(String[] args) throws Exception
    {
        String inputFile = "resources/products.xml";

        File xmlFile = new File(inputFile);
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
        Document doc = dBuilder.parse(xmlFile);

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true); // never forget this!

        XPathFactory xfactory = XPathFactory.newInstance();
        XPath xpath = xfactory.newXPath();
        XPathExpression allProductsExpression = xpath.compile("//product/supplier/text()");
        NodeList productNodes = (NodeList) allProductsExpression.evaluate(doc, XPathConstants.NODESET);

        //Save all the products
        List<String> suppliers = new ArrayList<String>();
        for (int i=0; i<productNodes.getLength(); ++i)
        {
            Node productName = productNodes.item(i);

            System.out.println(productName.getTextContent());
            suppliers.add(productName.getTextContent());
        }

        //Now we create the split XMLs

        for (String supplier : suppliers)
        {
            String xpathQuery = "/products/product[supplier='" + supplier + "']";

            xpath = xfactory.newXPath();
            XPathExpression query = xpath.compile(xpathQuery);
            NodeList productNodesFiltered = (NodeList) query.evaluate(doc, XPathConstants.NODESET);

            System.out.println("Found " + productNodesFiltered.getLength() + 
                               " product(s) for supplier " + supplier);

            //We store the new XML file in supplierName.xml e.g. Sony.xml
            Document suppXml = dBuilder.newDocument();

            //we have to recreate the root node <products>
            Element root = suppXml.createElement("products"); 
            suppXml.appendChild(root);
            for (int i=0; i<productNodesFiltered.getLength(); ++i)
            {
                Node productNode = productNodesFiltered.item(i);

                //we append a product (cloned) to the new file
                Node clonedNode = productNode.cloneNode(true);
                suppXml.adoptNode(clonedNode); //We adopt the orphan :)
                root.appendChild(clonedNode);
            }

            //At the end, we save the file XML on disk
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            DOMSource source = new DOMSource(suppXml);

            StreamResult result =  new StreamResult(new File("resources/" + supplier.trim() + ".xml"));
            transformer.transform(source, result);

            System.out.println("Done for " + supplier);
        }
    }

}

You can have a look here to see how to parse a XML document using DOM, in Java: DOM XML Parser Example

Here, how to write the new XML file(s): Create XML file using java

In addition you could study XPath to easily select your nodes: Java Xpath expression

If the performances are not your goal , first of all, once you load your DOM and your Xpath, you can retrieve all the suppliers you have in your xml document using the following XPath query

//supplier/text()

you will get something like that:

Text='Sony'
Text='Apple'
Text='Sony'
Text='Apple'
Text='Panasonic'

Then I will put those results in a ArraryList or whatever. The second step will be the iteration of that collection, and for each item query the XML input document in order to extract all the nodes with a particular supplier:

/products/product[supplier='Sony'] 

of course in java you will have to build the last xpath query in a dynamic way:

String xpathQuery = "/products/product/[supplier='" + currentValue + "']

After that, you will get the list of nodes which match the supplier you specified. The next step would be constructing the new XML DOM and save it on a file.

DOM parser will consume more memory. I prefer to use SAX parser to read XML and write .

I like the approach of Xmappr ( https://code.google.com/p/xmappr/ ) where you can use simple annotations:

first the root-element Products which simply holds a list of Product-instances

@RootElement
public class Products {

    @Element
    public List<Product> product;
}

Then the Product-class

@RootElement
public class Product {

   @Element
   public String description;

   @Element
   public String supplier;

   @Element
   public String gtin;

   @Element
   public String price;

   @Element
   public String orderId;
}

And then you simply fetch the Product-instances from the Products:

public static void main(String[] args) throws FileNotFoundException {
    Reader reader = new FileReader("test.xml");
    Xmappr xm = new Xmappr(Products.class);
    Products products = (Products) xm.fromXML(reader);

    // fetch list of products
    List<Product> listOfProducts = products.product;

    // do sth with the products in the list
    for (Product product : listOfProducts) {
        System.out.println(product.description);
    }       
}

And then you can do whatever you want with the products (eg sorting them according the supplier and put them out to an xml-file)

Consider this xml

<?xml version="1.0"?>
<SSNExportDocument xmlns="urn:com:ssn:schema:export:SSNExportFormat.xsd" Version="0.1" DocumentID="b482350d-62bb-41be-b792-8a9fe3884601-1" ExportID="b482350d-62bb-41be-b792-8a9fe3884601" JobID="464" RunID="3532468" CreationTime="2019-04-16T02:20:01.332-04:00" StartTime="2019-04-15T20:20:00.000-04:00" EndTime="2019-04-16T02:20:00.000-04:00">
    <MeterData MeterName="MUNI1-11459398" UtilDeviceID="11459398" MacID="00:12:01:fae:fe:00:d5:fc">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:06.214-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="5949.1000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>
    <MeterData MeterName="MUNI4-11460365" UtilDeviceID="11460365" MacID="00:11:01:bc:fe:00:d3:f9">
        <RegisterData StartTime="2019-04-15T20:00:00.000-04:00" EndTime="2019-04-15T20:00:00.000-04:00" NumberReads="1">
            <RegisterRead ReadTime="2019-04-15T20:00:00.000-04:00" GatewayCollectedTime="2019-04-16T01:40:11.082-04:00" RegisterReadSource="REG_SRC_TYPE_EO_CURR_READ" Season="-1">
                <Tier Number="0">
                    <Register Number="1" Summation="136349.9000" SummationUOM="GAL"/>
                </Tier>
            </RegisterRead>
        </RegisterData>
    </MeterData>

We can use JAXB which converts your xml tags to objects. Then we can play around with them.

File xmlFile = new File("input.xml");
jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
SSNExportDocument ssnExpDoc = (SSNExportDocument) jaxbUnmarshaller.unmarshal(xmlFile);
MeterData mD = new MeterData();
Map<String, List<MeterData>> meterMapper = new HashMap<String, List<MeterData>>(); // Phantom Reference

for (MeterData mData : ssnExpDoc.getMeterData()) {
            String meterFullName = mData.getMeterName();
            String[] splitMeterName = meterFullName.split("-");
            List<MeterData> _meterDataList = meterMapper.get(splitMeterName[0]);// o(1)
            if (_meterDataList == null) {
                _meterDataList = new ArrayList<>();
                _meterDataList.add(mData);
                meterMapper.put(splitMeterName[0], _meterDataList);
                _meterDataList = null;
            } else {
                _meterDataList.add(mData);
            }
        }

meterMapper contains tag names against list of objects

Then Marshall the contents using

       JAXBContext jaxbContext = JAXBContext.newInstance(SSNExportDocument.class);

        // Create Marshaller
        Marshaller jaxbMarshaller = jaxbContext.createMarshaller();

        // Required formatting??
        jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
        jaxbMarshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
        //jaxbMarshaller.setProperty("com.sun.xml.bind.xmlDeclaration", Boolean.FALSE);

        // Print XML String to Console

        StringWriter sw = new StringWriter();

        // Write XML to StringWriter
        jaxbMarshaller.marshal(employee, sw);

        // Verify XML Content
        String xmlContent = sw.toString();
        System.out.println(xmlContent);

Not a perfect solution but works in most cases. Had to play around with some string operations to make it work. Basically this solution splits the given XML for a given element and forms Sub-XMLs and writes those a list.

public static void main(String[] args) {
    java.io.File inputFile = new java.io.File("input.xml");
    String elementSplitString = "product";
    java.io.InputStream inputStream = null;

    try {
        

        inputStream = new java.io.BufferedInputStream(new java.io.FileInputStream(inputFile));

        javax.xml.stream.XMLInputFactory inputFactory = javax.xml.stream.XMLInputFactory.newInstance();
        javax.xml.stream.XMLOutputFactory outputFactory = javax.xml.stream.XMLOutputFactory.newInstance();
        javax.xml.stream.XMLEventReader reader = inputFactory.createXMLEventReader(inputStream);
        javax.xml.stream.XMLEventWriter writer = null;
        StringWriter parentXMLStringWriter = new StringWriter();
        javax.xml.stream.XMLEventWriter headerWriter = outputFactory.createXMLEventWriter(parentXMLStringWriter); 
        StringWriter stringWriter = null;
        String lastReadEvent = "";
        boolean splitElementFound = false;
        List<StringBuilder> list = new ArrayList<StringBuilder>();
        while (reader.hasNext()) {
            javax.xml.stream.events.XMLEvent event = reader.nextEvent();
            
            
            switch(event.getEventType()) {
                case javax.xml.stream.XMLStreamConstants.START_ELEMENT:
                    javax.xml.stream.events.StartElement startElement = (javax.xml.stream.events.StartElement)event;
                    if (startElement.getName().getLocalPart().equals(elementSplitString)) {
                        splitElementFound = true;
                        stringWriter = new StringWriter();
                        writer = outputFactory.createXMLEventWriter(stringWriter);
                        if (writer != null) writer.add(event);
                    } else if(writer != null)
                         writer.add(event);
                    
                    break;

                case javax.xml.stream.XMLStreamConstants.END_ELEMENT:
                    javax.xml.stream.events.EndElement endElement = (javax.xml.stream.events.EndElement)event;
                    if (endElement.getName().getLocalPart().equals(elementSplitString)) {
                        if (writer != null) writer.add(event);
                        
                        writer.close();
                        StringBuilder builder = new StringBuilder();
                        String parentXML = parentXMLStringWriter.toString();
                        builder.append(parentXML.subSequence(0, parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 1));
                        builder.append(stringWriter.toString());
                        builder.append(parentXML.substring(parentXML.indexOf(">", parentXML.indexOf(lastReadEvent)) + 2));
                        list.add(builder);
                        writer = null;
                    }else if(writer != null)
                        writer.add(event);
                    break;

                default:
                    if (writer != null) 
                        writer.add(event);
                    break;
            }
            if(!splitElementFound) {
                if(event instanceof javax.xml.stream.events.StartElement)
                    lastReadEvent = ((javax.xml.stream.events.StartElement)event).getName().getLocalPart();
                else if(event instanceof javax.xml.stream.events.EndElement)
                    lastReadEvent = ((javax.xml.stream.events.EndElement)event).getName().getLocalPart();
                headerWriter.add(event);
            }else {
                headerWriter.close();
            }

        }
        
        headerWriter = null;
        reader.close();
        if (writer != null) writer.close();
    } catch(Throwable ex) {
        ex.printStackTrace();
    } finally {
        if (inputStream != null) {
            try {
                inputStream.close();
            } catch (java.io.IOException ex) {
                // do nothing
            }
        }
    }
} 

如果您有 XML 方言的架构 (XSD) JAXB,则可以替代 Dom。

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM