從DOM生成XML文檔，同時跳過某些元素

Question

我有嘗試從中生成XML文檔的HTML。 我想跳過某些元素（基本上是除div以外的所有元素），為此，我編寫了一個簡單的DOM遍歷函數，但似乎陷入了無限循環。 （下面有更多詳細信息。）

所以我有看起來像這樣的HTML：

<div id="browserDiv">
    <h3>Library</h3>
    <ul>
        <li>
            <div id="t-0" class="section topic" data-content="2b-2t-38-w-2c-2w-2t-33-36-3d">
                <p>Set Theory</p>
                <img class="toggle"><img class="edit">
                <img class="add-entry"><img class="delete">
                <ul>
                    <li>
                        <div id="t-0-0" class="section topic" data-content="1t-3c-2x-33-31-37">
                            <p>Axioms</p>
                            <img class="toggle"><img class="edit">
                            <img class="add-entry"><img class="delete">
                            <ul>
                                <li>
                                    <div id="t-0-0-0" class="section topic" data-content="1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32">
                                        <p>Axiom of Separation</p>
                                        <img class="toggle"><img class="edit">
                                        <img class="add-entry"><img class="delete">
                                        <ul>
                                            <li>
                                                <img class="add-new">
                                            </li>
                                        </ul>
                                </li>
                                <li>
                                    <img class="add-new">
                                </li>
                        </div>
                    </li>
                    <li>
                        <img class="add-new">
                    </li>
                </ul>
            </div>
        </li>
        <li>
            <div id="t-1" class="section topic" data-content="1t-32-2p-30-3d-37-2x-37">
                <p>Analysis</p>
                <img class="toggle"><img class="edit">
                <img class="add-entry"><img class="delete">
                <ul>
                    <li>
                        <img class="add-new">
                    </li>
                </ul>
            </div>
        </li>
        <li>
            <img class="add-new">
        </li>
    </ul>
</div>

這是屏幕截圖：

在此處輸入圖片說明

而且我正在嘗試將此html轉換為XML文件。 但是XML只存儲div元素中包含的信息，因此在遍歷DOM樹時，我試圖跳過所有其他元素。

我打算最終產生的那種XML：

<?xml version="1.0" encoding="UTF-8"?>
<library userid="095209376">
    <title>UserID095209376's Library</title>
    <topic children="yes" loadable="no">
        <id>0</id>
        <encoding>2b-2t-38-w-2c-2w-2t-33-36-3d</encoding>
        <topic children="yes" loadable="no">
            <id>0-0</id>
            <encoding>1t-3c-2x-33-31-37</encoding>
            <topic children="no" loadable="yes">
                <id>0-0-0</id>
                <encoding>1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32</encoding>
            </topic>
        </topic>
    <topic children="yes" loadable="no">
        <id>1</id>
        <encoding>1t-32-2p-30-3d-37-2x-37</encoding>
    </topic>
</library>

這是我目前正在迭代的方式：

（請注意，腳本標記僅用於使SO進行語法突出顯示。）

<script>
function saveLibrary(){

    var xmlDoc = document.implementation.createDocument('http://www.tuningcode.com', 'library');
    var rootNode = document.getElementById('browserDiv');
    console.log("rootNode here: " + rootNode);
    var libraryTree = walkLibraryTree2(rootNode, xmlDoc);
    xmlDoc.documentElement.appendChild(libraryTree);
    var oSerializer = new XMLSerializer();
    var sXML = oSerializer.serializeToString(xmlDoc);
    console.log("xmlDoc: " + xmlDoc);
    console.log(sXML);

}

function walkLibraryTree2(nodeToWalk, doc){

    var elem = doc.createElement(nodeToWalk.tagName);
    console.log(elem);
    if(nodeToWalk.hasChildNodes()){
        var ch = nodeToWalk.children;
        for(var i = 0; i < ch.length; i++){
            var theWalk = walkLibraryTree2(ch[i], doc);
            if(theWalk != null){
                if(ch[i].tagName == 'DIV'){
                    elem.appendChild(theWalk);
                } else{
                    elem = theWalk;
                }
            }
        }
        return elem;
    } else {
        return null;
    }
}

saveLibrary();
</script>

問題是，當我運行它時，（編輯）它花費的時間比應該花費的時間長得多，並且會生成如下內容：

<library xmlns="http://www.tuningcode.com"><LI xmlns=""/></library>.

換句話說，它不打印任何div，而僅打印一個li元素。 我已經將它打印到控制台了很多，即使只有上面顯示的節點數量，它也可以將數千條語句打印到控制台。

問題：

如何跳過除div元素之外的所有元素遍歷樹？ 還是為什么上面的代碼無法正常工作？

這是一個JSFiddle：

http://jsfiddle.net/4bGjH/

Answer 1

我認為您遇到了非常長的運行時間，因為對於for循環的每次迭代walkLibraryTree2兩次調用walkLibraryTree2 ，從而導致指數級擴展（您的HTML深度為13個級別，因此， walkLibraryTree2被調用了8,000次以上）。

處理復雜的問題時，最好將其分解為較小的部分。 以下似乎有效：

<script>
function saveLibrary() {
    var xmlDoc = document.implementation.createDocument(null, 'library');
    var rootNode = document.getElementById('browserDiv');
    console.log("rootNode here: " + rootNode);

    appendNodes(xmlDoc.documentElement, processChildren(rootNode, xmlDoc));

    var oSerializer = new XMLSerializer();
    var sXML = oSerializer.serializeToString(xmlDoc);
    console.log("xmlDoc: " + xmlDoc);
    console.log(sXML);
}

// DomNode, Document -> Array[DomNode]
function processChildren(node, doc) {
    var nodes = [],
        i;

    for (i = 0; i < node.childNodes.length; i += 1) {
        nodes = nodes.concat(processNode(node.childNodes[i], doc));
    }

    return nodes;
}

// DomNode, Array[DomNode] -> void
function appendNodes(destNode, nodes) {
    var i;

    for (i = 0; i < nodes.length; i += 1) {
        destNode.appendChild(nodes[i]);
    }
}

// DomNode, Document -> Array[DomNode]
function processNode(node, doc) {
    var children = processChildren(node, doc);

    if (node.tagName == "DIV") {
        return [createTopicElement(node, doc, children)];
    } else {
        return children;
    }
}

// DomNode, Document, Array[DomNode] -> DomNode
function createTopicElement(baseNode, doc, children) {
    var el = doc.createElement("topic"),
        hasChildren = !! children.length,
        id = node.id.substring(2),
        encoding = node.getAttribute("data-content");

    el.setAttribute("children", hasChildren ? "yes" : "no");
    el.appendChild(createElementWithValue(doc, "id", id));
    el.appendChild(createElementWithValue(doc, "encoding", encoding));
    appendNodes(el, children);

    return el;
}

// Document, String, String -> DomNode
function createElementWithValue(doc, name, value) {
    var el = doc.createElement(name);
    el.textContent = value;
    return el;
}

saveLibrary();    
</script>

生成XML：

<library>
    <topic children="yes">
        <id>0</id>
        <encoding>2b-2t-38-w-2c-2w-2t-33-36-3d</encoding>
        <topic children="yes">
            <id>0-0</id>
            <encoding>1t-3c-2x-33-31-37</encoding>
            <topic children="no">
                <id>0-0-0</id>
                <encoding>1t-3c-2x-33-31-w-33-2u-w-2b-2t-34-2p-36-2p-38-2x-33-32</encoding>
            </topic>
        </topic>
    </topic>
    <topic children="no">
        <id>1</id>
        <encoding>1t-32-2p-30-3d-37-2x-37</encoding>
    </topic>
</library>

我不知道您的loadable屬性是如何確定的，或者標題是從哪里來的，但這應該可以幫助您達到目標。

http://jsfiddle.net/Weu4A/4/

從DOM生成XML文檔，同時跳過某些元素

問題描述

所以我有看起來像這樣的HTML：

這是屏幕截圖：

我打算最終產生的那種XML：

這是我目前正在迭代的方式：

問題：

這是一個JSFiddle：

1 個解決方案

解決方案1
1 已采納 2014-04-25 20:38:29

從DOM生成XML文檔，同時跳過某些元素

問題描述

所以我有看起來像這樣的HTML：

這是屏幕截圖：

我打算最終產生的那種XML：

這是我目前正在迭代的方式：

問題：

這是一個JSFiddle：

1 個解決方案

解決方案1 1 已采納 2014-04-25 20:38:29

解決方案1
1 已采納 2014-04-25 20:38:29