簡體   English   中英

JAVA 代碼片段將整個 XML 文件中的單引號(')替換為雙引號

[英]JAVA code snippet to replace single quote(') to double quote in whole XML file

我有一個包含嵌套標簽的 XML 文件。 我們可以使用DOM,JDOM解析器我想在整個XML文件中從單引號(')到雙引號的所有標簽的字符串中替換。 標簽也可以嵌套在標簽內。 我想要一些 for 循環來查找所有標簽並替換值,例如 HYPER SHIPPING'SDN BHD_First_Page --> HYPER SHIPPING''SDN BHD_First_Page

示例代碼

    public void iterateChildNodes(org.jdom.Element parentNode) {
        if(parentNode.getChildren().size() == 0) {
            if(parentNode.getText().contains("'")) {
                parentNode.setText(parentNode.getText().replaceAll("'", "\'"));
                LOGGER.info("*************  Below Value updated");
                LOGGER.info(parentNode.getText());
            }
        }else {
            List<Element> rec = parentNode.getChildren();
            for(Element i : rec) {
                iterateChildNodes(i);
            }
        }
    }

樣品 XML 文件

    <Document>
        <Identifier>DOC1</Identifier>
        <Type>HYPER SHIPPING SDN BHD</Type>
        <Description>HYPER SHIPPING SDN BHD</Description>
        <Confidence>33.12</Confidence>
        <ConfidenceThreshold>10.0</ConfidenceThreshold>
        <Valid>true</Valid>
        <Reviewed>true</Reviewed>
        <ReviewedBy>SYSTEM</ReviewedBy>
        <ValidatedBy>SYSTEM</ValidatedBy>
        <ErrorMessage/>
        <Value>HYPER SHIPPING'SDN BHD_First_Page</Value>  //Value to be replaced here
        <DocumentDisplayInfo/>
        <DocumentLevelFields/>
        <Pages>
            <Page>
                <Identifier>PG0</Identifier>
                <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                <NewFileName>BI2E7_0.tif</NewFileName>
                <SourceFileID>1</SourceFileID>
                <PageLevelFields>
                    <PageLevelField>
                        <Name>Search_Engine_Classification</Name>
                        <Value>Park Street '10 road</Value>     //Value to be replaced here
                        <Type/>
                        <Confidence>66.23</Confidence>
                        <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                        <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                        <OcrConfidence>0.0</OcrConfidence>
                        <FieldOrderNumber>0</FieldOrderNumber>
                        <ForceReview>false</ForceReview>
                    </PageLevelField>
                </PageLevelFields>
            </Page>
        </Pages>
    </Document>

此代碼可以將 XML 文件中的所有'替換為"

此處不加描述,嘗試一步步編碼。 這很容易理解。

(更新)

Part 1: Using JDOM

import java.util.ArrayList;
import java.util.List;

import org.w3c.dom.NodeList;
import org.jdom2.input.SAXBuilder;
import org.jdom2.transform.JDOMSource;
import org.w3c.dom.*;

import java.io.*;

public class XmlModificationJDom {

    public static void main(String[] args) {
        XmlModificationJDom xmlModificationJDom = new XmlModificationJDom();
        xmlModificationJDom.updateXmlAndSaveJDom();

    }

    public void updateXmlAndSaveJDom() {
        try {
            File inputFile = new File("document.xml");
            SAXBuilder saxBuilder = new SAXBuilder();
            org.jdom2.Document xmlDocument = saxBuilder.build(inputFile);
            org.jdom2.Element rootElement = xmlDocument.getRootElement();

            iterateAndUpdateElementsUsingJDom(rootElement);

            saveUpdatedXmlUsingJDomSource(xmlDocument);

        } catch (Exception ex) {
            ex.printStackTrace();
        }

    }

    public void iterateAndUpdateElementsUsingJDom(org.jdom2.Element element) {

        if (element.getChildren().size() == 0) {
            // System.out.println(element.getName() + ","+ element.getText());
            if (element.getText().contains("'")) {
                element.setText(element.getText().replaceAll("\'", "\""));
            }
        } else {
            // System.out.println(element.getName());
            for (org.jdom2.Element childElement : element.getChildren()) {
                iterateAndUpdateElementsUsingJDom(childElement);
            }
        }
    }
}

Part 2: Using DOM

import javax.xml.parsers.*;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import java.util.ArrayList;
import java.util.List;

import java.io.*;

public class XmlModificationDom {

    public static void main(String[] args) {
        XmlModificationDom XmlModificationDom = new XmlModificationDom();
        XmlModificationDom.updateXmlAndSave();
    }
    
    public void updateXmlAndSave() {
        try {
            File inputFile = new File("document.xml");
            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
            Document document = dBuilder.parse(inputFile);
            document.getDocumentElement().normalize();

            Node parentNode = document.getFirstChild();
            iterateChildNodesAndUpate(parentNode);

            writeAndSaveXML(document);

        } catch (Exception ex) {
            ex.printStackTrace();
        }

    }

    public void writeAndSaveXML(Document document) throws Exception {
        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer transformer = transformerFactory.newTransformer();
        DOMSource source = new DOMSource(document);
        StreamResult result = new StreamResult(new File("updated-document.xml"));
        transformer.transform(source, result);
    }

    public void iterateChildNodesAndUpate(Node parentNode) {

        NodeList nodeList = parentNode.getChildNodes();

        for (int index = 0; index < nodeList.getLength(); index++) {
            Node node = nodeList.item(index);
            if (node.getNodeType() == Node.ELEMENT_NODE) {
                Element element = (Element) node;
                //System.out.print(element.getNodeName());

                if (element.hasChildNodes() && element.getChildNodes().getLength() > 1) {
                    //System.out.println("Child > " + element.getNodeName());
                    iterateChildNodesAndUpate(element);
                } else {
                    //System.out.println(" - " + element.getTextContent());
                    if (element.getTextContent().contains("'")) {
                        String str = element.getTextContent().replaceAll("\'", "\"");
                        element.setTextContent(str);
                    }
                }
            }
        }
    }
}

輸入文件document.xml

<Document>
        <Identifier>DOC1</Identifier>
        <Type>HYPER SHIPPING SDN BHD</Type>
        <Description>HYPER SHIPPING SDN BHD</Description>
        <Confidence>33.12</Confidence>
        <ConfidenceThreshold>10.0</ConfidenceThreshold>
        <Valid>true</Valid>
        <Reviewed>true</Reviewed>
        <ReviewedBy>SYSTEM</ReviewedBy>
        <ValidatedBy>SYSTEM</ValidatedBy>
        <ErrorMessage/>
        <Value>HYPER SHIPPING'SDN BHD_First_Page</Value>  //Value to be replaced here
        <DocumentDisplayInfo/>
        <DocumentLevelFields/>
        <Pages>
            <Page>
                <Identifier>PG0</Identifier>
                <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                <NewFileName>BI2E7_0.tif</NewFileName>
                <SourceFileID>1</SourceFileID>
                <PageLevelFields>
                    <PageLevelField>
                        <Name>Search_Engine_Classification</Name>
                        <Value>Park Street '10 road</Value>     //Value to be replaced here
                        <Type/>
                        <Confidence>66.23</Confidence>
                        <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                        <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                        <OcrConfidence>0.0</OcrConfidence>
                        <FieldOrderNumber>0</FieldOrderNumber>
                        <ForceReview>false</ForceReview>
                    </PageLevelField>
                </PageLevelFields>
            </Page>
        </Pages>
</Document>

Output updated-document.xml/updated-document-jdom.xml

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<Document>
        <Identifier>DOC1</Identifier>
        <Type>HYPER SHIPPING SDN BHD</Type>
        <Description>HYPER SHIPPING SDN BHD</Description>
        <Confidence>33.12</Confidence>
        <ConfidenceThreshold>10.0</ConfidenceThreshold>
        <Valid>true</Valid>
        <Reviewed>true</Reviewed>
        <ReviewedBy>SYSTEM</ReviewedBy>
        <ValidatedBy>SYSTEM</ValidatedBy>
        <ErrorMessage/>
        <Value>HYPER SHIPPING"SDN BHD_First_Page</Value><DocumentDisplayInfo/>
        <DocumentLevelFields/>
        <Pages>
            <Page>
                <Identifier>PG0</Identifier>
                <OldFileName>HYPER-KL FEB-0001-0001.tif</OldFileName>
                <NewFileName>BI2E7_0.tif</NewFileName>
                <SourceFileID>1</SourceFileID>
                <PageLevelFields>
                    <PageLevelField>
                        <Name>Search_Engine_Classification</Name>
                        <Value>Park Street "10 road</Value><Type/>
                        <Confidence>66.23</Confidence>
                        <LearnedFileName>HYPER KL-JUN-0001.tif</LearnedFileName>
                        <OcrConfidenceThreshold>0.0</OcrConfidenceThreshold>
                        <OcrConfidence>0.0</OcrConfidence>
                        <FieldOrderNumber>0</FieldOrderNumber>
                        <ForceReview>false</ForceReview>
                    </PageLevelField>
                </PageLevelFields>
            </Page>
        </Pages>
</Document>

更多詳細代碼,請訪問此 repo

您需要在單引號雙引號上添加反斜杠

value =value.replace("\'","\"");

只需將 removeQuote 方法替換為

private static void removeQuote(Document batchXml) throws JDOMException, Exception {
        Element root = batchXml.getRootElement();
        List<Element> docs = root.getChild("Documents").getChildren("Document");
        for (Element doc : docs) {
            String docType = doc.getChildText("Value");
             value =value.replaceAll("\'", "\"");
        }
    }

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM