[英]Java DOM parse html in xml node
我在這里有一個解析器:
package lt.prasom.functions;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.w3c.dom.CharacterData;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import android.annotation.TargetApi;
import android.media.MediaRecorder.OutputFormat;
import android.util.Log;
public class XMLParser {
// constructor
public XMLParser() {
}
/**
* Getting XML from URL making HTTP request
* @param url string
* */
public String getXmlFromUrl(String url) {
String xml = null;
try {
// defaultHttpClient
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpGet httpGet = new HttpGet(url);
HttpResponse httpResponse = httpClient.execute(httpGet);
HttpEntity httpEntity = httpResponse.getEntity();
xml = EntityUtils.toString(httpEntity);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
// return XML
return xml;
}
/**
* Getting XML DOM element
* @param XML string
* */
public Document getDomElement(String xml){
Document doc = null;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(false);
try {
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
doc = db.parse(is);
} catch (ParserConfigurationException e) {
Log.e("Error: ", e.getMessage());
return null;
} catch (SAXException e) {
Log.e("Error: ", e.getMessage());
return null;
} catch (IOException e) {
Log.e("Error: ", e.getMessage());
return null;
}
return doc;
}
/** Getting node value
* @param elem element
*/
@TargetApi(8)
public final String getElementValue( Node elem , boolean html) {
Node child;
if( elem != null){
if (elem.hasChildNodes()){
for( child = elem.getFirstChild(); child != null; child = child.getNextSibling() ){
if( child.getNodeType() == Node.TEXT_NODE ){
//return child.getNodeValue();
return child.getNodeValue();
}
}
}
}
return "";
}
/**
* Getting node value
* @param Element node
* @param key string
* */
public String getValue(Element item, String str) {
NodeList n = item.getElementsByTagName(str);
return this.getElementValue(n.item(0), false);
}
}
還有我的示例xml:
<items>
<item>
<name>test</name>
<description>yes <b>no</b></description>
</item>
</items>
當我解析描述時,我正在標記所有內容(“是”)。 所以我想解析描述標簽中的原始數據。 我嘗試了CDATA標記無效。 沒有編碼xml有什么辦法嗎?
謝謝!
我同意關於此問題的評論尚不完整,或者不夠明確,無法直接回答問題(例如修改您的資料來源以進行工作等),但是我必須做一些類似的事情(我認為)並可以添加。 這可能會有所幫助。
因此,如果IF本身的“ description”元素的內容都是有效的XML,那么說該文檔實際上看起來像:
<items>
<item>
<name>test</name>
<description><span>yes <b>no</b></span></description>
</item>
</items>
那么您可以將“ description”元素的內容作為新的XML文檔破解,然后獲取如下所示的XML文本形式:
<span>yes <b>no</b></span>
因此,方法類似於:
/**
* Get the Description as a new XML document
*
*/
public Document retrieveDescriptionAsDocument(Document sourceDocument) {
Document document;
Node tmpNode;
Document document2 = null;
try {
// get the description node, I am just using XPath here as it is easy
// to read, you already have a reference to the node so just continue as you
// were doing for that, bottom line is to get a reference to the node
tmpNode = org.apache.xpath.XPathAPI.selectSingleNode(sourceDocument,"/items/item/description");
if (tmpNode != null) {
// create a new empty document
document2 = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
// associate the node with the original document
sourceDocument.importNode(tmpNode, true);
// create a document fragment from the original document
DocumentFragment df = sourceDocument.createDocumentFragment();
// append the node you found, to the fragment
df.appendChild(tmpNode);
// create the Node to append to the new DOM
Node importNode = document2.importNode(df,true);
// append the fragment (as a node) to the new empty document
Document2.appendChild(importNode);
}
else {
// LOG WARNING
yourLoggerOrWhatever.warn("retrieveContainedDocument: No data found for XPath:" + xpathP);
}
} catch (Exception e) {
// LOG ERROR
yourLoggerOrWhatever.error("Exception caught getting contained document:",e);
}
// return the new doc, and the caller can then output that new document, that will now just contain "<span>yes <b>no</b></span>" as text, apply an XSL or whatever
return document2;
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.