[英]Generate an XML doc from the DOM while skipping over certain elements
我有嘗試從中生成XML文檔的HTML。 我想跳過某些元素(基本上是除div以外的所有元素),為此,我編寫了一個簡單的DOM遍歷函數,但似乎陷入了無限循環。 (下面有更多詳細信息。)
<div id="browserDiv">
<h3>Library</h3>
<ul>
<li>
<div id="t-0" class="section topic" data-content="2b-2t-38-w-2c-2w-2t-33-36-3d">
<p>Set Theory</p>
<img class="toggle"><img class="edit">
<img class="add-entry"><img class="delete">
<ul>
<li>
<div id="t-0-0" class="section topic" data-content="1t-3c-2x-33-31-37">
<p>Axioms</p>
<img class="toggle"><img class="edit">
<img class="add-entry"><img class="delete">
<ul>
<li>
<div id="t-0-0-0" class="section topic" data-content="1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32">
<p>Axiom of Separation</p>
<img class="toggle"><img class="edit">
<img class="add-entry"><img class="delete">
<ul>
<li>
<img class="add-new">
</li>
</ul>
</li>
<li>
<img class="add-new">
</li>
</div>
</li>
<li>
<img class="add-new">
</li>
</ul>
</div>
</li>
<li>
<div id="t-1" class="section topic" data-content="1t-32-2p-30-3d-37-2x-37">
<p>Analysis</p>
<img class="toggle"><img class="edit">
<img class="add-entry"><img class="delete">
<ul>
<li>
<img class="add-new">
</li>
</ul>
</div>
</li>
<li>
<img class="add-new">
</li>
</ul>
</div>
而且我正在嘗試將此html轉換為XML文件。 但是XML只存儲div
元素中包含的信息,因此在遍歷DOM樹時,我試圖跳過所有其他元素。
<?xml version="1.0" encoding="UTF-8"?>
<library userid="095209376">
<title>UserID095209376's Library</title>
<topic children="yes" loadable="no">
<id>0</id>
<encoding>2b-2t-38-w-2c-2w-2t-33-36-3d</encoding>
<topic children="yes" loadable="no">
<id>0-0</id>
<encoding>1t-3c-2x-33-31-37</encoding>
<topic children="no" loadable="yes">
<id>0-0-0</id>
<encoding>1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32</encoding>
</topic>
</topic>
<topic children="yes" loadable="no">
<id>1</id>
<encoding>1t-32-2p-30-3d-37-2x-37</encoding>
</topic>
</library>
(請注意,腳本標記僅用於使SO進行語法突出顯示。)
<script>
function saveLibrary(){
var xmlDoc = document.implementation.createDocument('http://www.tuningcode.com', 'library');
var rootNode = document.getElementById('browserDiv');
console.log("rootNode here: " + rootNode);
var libraryTree = walkLibraryTree2(rootNode, xmlDoc);
xmlDoc.documentElement.appendChild(libraryTree);
var oSerializer = new XMLSerializer();
var sXML = oSerializer.serializeToString(xmlDoc);
console.log("xmlDoc: " + xmlDoc);
console.log(sXML);
}
function walkLibraryTree2(nodeToWalk, doc){
var elem = doc.createElement(nodeToWalk.tagName);
console.log(elem);
if(nodeToWalk.hasChildNodes()){
var ch = nodeToWalk.children;
for(var i = 0; i < ch.length; i++){
var theWalk = walkLibraryTree2(ch[i], doc);
if(theWalk != null){
if(ch[i].tagName == 'DIV'){
elem.appendChild(theWalk);
} else{
elem = theWalk;
}
}
}
return elem;
} else {
return null;
}
}
saveLibrary();
</script>
問題是,當我運行它時,(編輯)它花費的時間比應該花費的時間長得多,並且會生成如下內容:
<library xmlns="http://www.tuningcode.com"><LI xmlns=""/></library>.
換句話說,它不打印任何div,而僅打印一個li元素。 我已經將它打印到控制台了很多,即使只有上面顯示的節點數量,它也可以將數千條語句打印到控制台。
如何跳過除div
元素之外的所有元素遍歷樹? 還是為什么上面的代碼無法正常工作?
我認為您遇到了非常長的運行時間,因為對於for
循環的每次迭代walkLibraryTree2
兩次調用walkLibraryTree2
,從而導致指數級擴展(您的HTML深度為13個級別,因此, walkLibraryTree2
被調用了8,000次以上)。
處理復雜的問題時,最好將其分解為較小的部分。 以下似乎有效:
<script>
function saveLibrary() {
var xmlDoc = document.implementation.createDocument(null, 'library');
var rootNode = document.getElementById('browserDiv');
console.log("rootNode here: " + rootNode);
appendNodes(xmlDoc.documentElement, processChildren(rootNode, xmlDoc));
var oSerializer = new XMLSerializer();
var sXML = oSerializer.serializeToString(xmlDoc);
console.log("xmlDoc: " + xmlDoc);
console.log(sXML);
}
// DomNode, Document -> Array[DomNode]
function processChildren(node, doc) {
var nodes = [],
i;
for (i = 0; i < node.childNodes.length; i += 1) {
nodes = nodes.concat(processNode(node.childNodes[i], doc));
}
return nodes;
}
// DomNode, Array[DomNode] -> void
function appendNodes(destNode, nodes) {
var i;
for (i = 0; i < nodes.length; i += 1) {
destNode.appendChild(nodes[i]);
}
}
// DomNode, Document -> Array[DomNode]
function processNode(node, doc) {
var children = processChildren(node, doc);
if (node.tagName == "DIV") {
return [createTopicElement(node, doc, children)];
} else {
return children;
}
}
// DomNode, Document, Array[DomNode] -> DomNode
function createTopicElement(baseNode, doc, children) {
var el = doc.createElement("topic"),
hasChildren = !! children.length,
id = node.id.substring(2),
encoding = node.getAttribute("data-content");
el.setAttribute("children", hasChildren ? "yes" : "no");
el.appendChild(createElementWithValue(doc, "id", id));
el.appendChild(createElementWithValue(doc, "encoding", encoding));
appendNodes(el, children);
return el;
}
// Document, String, String -> DomNode
function createElementWithValue(doc, name, value) {
var el = doc.createElement(name);
el.textContent = value;
return el;
}
saveLibrary();
</script>
生成XML:
<library>
<topic children="yes">
<id>0</id>
<encoding>2b-2t-38-w-2c-2w-2t-33-36-3d</encoding>
<topic children="yes">
<id>0-0</id>
<encoding>1t-3c-2x-33-31-37</encoding>
<topic children="no">
<id>0-0-0</id>
<encoding>1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32</encoding>
</topic>
</topic>
</topic>
<topic children="no">
<id>1</id>
<encoding>1t-32-2p-30-3d-37-2x-37</encoding>
</topic>
</library>
我不知道您的loadable
屬性是如何確定的,或者標題是從哪里來的,但這應該可以幫助您達到目標。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.