Java DOM解析xml節點中的html

Question

我在這里有一個解析器：

package lt.prasom.functions;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.Properties;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import android.annotation.TargetApi;
import android.media.MediaRecorder.OutputFormat;
import android.util.Log;

public class XMLParser {

    // constructor
    public XMLParser() {

    }

    /**
     * Getting XML from URL making HTTP request
     * @param url string
     * */
    public String getXmlFromUrl(String url) {
        String xml = null;

        try {
            // defaultHttpClient
            DefaultHttpClient httpClient = new DefaultHttpClient();
            HttpGet httpGet = new HttpGet(url);

            HttpResponse httpResponse = httpClient.execute(httpGet);
            HttpEntity httpEntity = httpResponse.getEntity();
            xml = EntityUtils.toString(httpEntity);

        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        // return XML
        return xml;
    }

    /**
     * Getting XML DOM element
     * @param XML string
     * */
    public Document getDomElement(String xml){
        Document doc = null;
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setValidating(false);

        try {

            DocumentBuilder db = dbf.newDocumentBuilder();

            InputSource is = new InputSource();
                is.setCharacterStream(new StringReader(xml));
                doc = db.parse(is); 

            } catch (ParserConfigurationException e) {
                Log.e("Error: ", e.getMessage());
                return null;
            } catch (SAXException e) {
                Log.e("Error: ", e.getMessage());
                return null;
            } catch (IOException e) {
                Log.e("Error: ", e.getMessage());
                return null;
            }

            return doc;
    }

    /** Getting node value
      * @param elem element
      */
     @TargetApi(8)
    public final String getElementValue( Node elem , boolean html) {
         Node child;
         if( elem != null){
             if (elem.hasChildNodes()){
                 for( child = elem.getFirstChild(); child != null; child = child.getNextSibling() ){
                     if( child.getNodeType() == Node.TEXT_NODE  ){


                         //return child.getNodeValue();
                         return child.getNodeValue();
                     }
                 }
             }
         }
         return "";
     }

     /**
      * Getting node value
      * @param Element node
      * @param key string
      * */

     public String getValue(Element item, String str) {     
            NodeList n = item.getElementsByTagName(str);    

            return this.getElementValue(n.item(0), false);
        }

}

還有我的示例xml：

<items>
<item>
<name>test</name>
<description>yes <b>no</b></description>
</item>
</items>

當我解析描述時，我正在標記所有內容（“是”）。 所以我想解析描述標簽中的原始數據。 我嘗試了CDATA標記無效。 沒有編碼xml有什么辦法嗎？

謝謝！

Answer 1

我同意關於此問題的評論尚不完整，或者不夠明確，無法直接回答問題（例如修改您的資料來源以進行工作等），但是我必須做一些類似的事情（我認為）並可以添加。 這可能會有所幫助。

因此，如果IF本身的“ description”元素的內容都是有效的XML，那么說該文檔實際上看起來像：

<items>
  <item>
   <name>test</name>
   <description><span>yes <b>no</b></span></description>
  </item>
</items>

那么您可以將“ description”元素的內容作為新的XML文檔破解，然后獲取如下所示的XML文本形式：

<span>yes <b>no</b></span>

因此，方法類似於：

/**
 * Get the Description as a new XML document
 *
 */
public Document retrieveDescriptionAsDocument(Document sourceDocument) {

Document document;
Node tmpNode;
Document document2 = null;

try {
    // get the description node, I am just using XPath here as it is easy
    // to read, you already have a reference to the node so just continue as you
    // were doing for that, bottom line is to get a reference to the node
    tmpNode = org.apache.xpath.XPathAPI.selectSingleNode(sourceDocument,"/items/item/description");

    if (tmpNode != null) {

        // create a new empty document
        document2 = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
        // associate the node with the original document
        sourceDocument.importNode(tmpNode, true);
        // create a document fragment from the original document
        DocumentFragment df = sourceDocument.createDocumentFragment();
        // append the node you found, to the fragment   
        df.appendChild(tmpNode);
        // create the Node to append to the new DOM
        Node importNode = document2.importNode(df,true);
        // append the fragment (as a node) to the new empty document
        Document2.appendChild(importNode);
    }
    else {
        // LOG WARNING
        yourLoggerOrWhatever.warn("retrieveContainedDocument: No data found for XPath:" + xpathP);
    }

    } catch (Exception e) {
        // LOG ERROR
        yourLoggerOrWhatever.error("Exception caught getting contained document:",e);
    }

    // return the new doc, and the caller can then output that new document, that will now just contain "<span>yes <b>no</b></span>" as text, apply an XSL or whatever
    return document2;
}

Java DOM解析xml節點中的html

問題描述

1 個解決方案

解決方案1
1 已采納 2012-09-27 15:21:24

Java DOM解析xml節點中的html

問題描述

1 個解決方案

解決方案1 1 已采納 2012-09-27 15:21:24

解決方案1
1 已采納 2012-09-27 15:21:24