简体   繁体   English

DOMParser-子代不是DOM对象

[英]DOMParser - children are not DOM objects

There is a strange behavior with DOMParser. DOMParser有一个奇怪的行为。 When I use "text/xml" as the parameter I get my object and each time I use a child (like parentNodes), the child is itself a DOM object. 当我使用“ text / xml”作为参数时,得到对象,并且每次使用子对象(如parentNodes)时,该子对象本身就是DOM对象。 However, when I use "text/html" as the parameter, the children are not DOM objects. 但是,当我使用“ text / html”作为参数时,子级不是DOM对象。 Why is that and how can I have DOM objects for all the children? 为什么会这样?如何为所有孩子提供DOM对象?

Here is what I do: 这是我的工作:

parser = new DOMParser();
doc = parser.parseFromString(stringContainingHTMLSource, "text/html").getElementsByTagName('p');

console.log(doc[0].childNodes[0]);

My childNode returns the element but not as a DOM object... 我的childNode返回该元素,但不作为DOM对象返回...

Edit: Here are my recursive functions: 编辑:这是我的递归函数:

        var getParents = function(node, parentNodes){
            if(node.nodeName == 'span'){
                parentNodes.push(node.attributes[0].nodeValue);
            } else if(node.nodeName == 'p' && node.attributes.length > 0) {
                parentNodes.push(node.nodeName);
                parentNodes.push(node.attributes[0].nodeValue);
            } else {
                parentNodes.push(node.nodeName);
            }
            if(node.parentNode.nodeName != '#document'){
                getParents(node.parentNode, parentNodes);
            }
            return parentNodes;

        };
        var parse = function(node, vertical, horizontal, paragraph){
            if(node.childNodes.length > 0){
                for(var int = 0; int < node.childNodes.length; int++){
                    parse(node.childNodes[int], vertical, horizontal, paragraph);
                }
            } else{
                var object = {};
                var attributes = getParents(node, []);
                for(var int = 0; int < attributes.length; int++) {
                    // right alignment
                    if(/text-align/i.test(attributes[int])){
                        object.alignment = attributes[int].split(": ")[1].replace(';','');
                    } else if (/color/i.test(attributes[int])) {
                        // color
                        object.color = attributes[int].split(":")[1];
                    } else if (attributes[int] == 'em') {
                        // italic
                        if (object.italics) {
                            delete object.bold;
                            object.bolditalics = true;
                        } else {
                            object.italics = true;
                        }
                    } else if (attributes[int] == 'strong') {
                        // bold
                        if (object.italics) {
                            delete object.italics;
                            object.bolditalics = true;
                        } else {
                            object.bold = true;
                        }
                    } else if (attributes[int] == 'u') {
                        // underline
                        object.decoration = 'underline';
                    } else if (attributes[int] == 's') {
                        // strike
                        object.decoration = 'lineThrough';
                    }
                }
                object.text = node.textContent;
                pdfContent[vertical][horizontal].push(object);
            }
        };
        for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) {
            for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) {
                var parser = new DOMParser();
                var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p');
                for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) {
                    for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) {
                        parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph);
                    }
                }
            }
        }

I made a few assumptions on what the values are and after I Added a few verifications like if(node.attributes.length>0) into your code, it seems to work. 我对值是什么做了一些假设,并在代码中添加了一些类似if(node.attributes.length>0)验证后,它似乎可以正常工作。

 var payment={htmlContent:[['<p>some<em>text</em></p>', '<p>some<span>text<strong>here</strong></span></p>'],['<p>some<s>text</s></p>', '<p>some<span style="color:#FF00FF">text</span></p>']]}; var getParents = function(node, parentNodes){ if(node.nodeName == 'span'){ if(node.attributes.length>0) parentNodes.push(node.attributes[0].nodeValue); } else if(node.nodeName == 'p' && node.attributes.length > 0) { parentNodes.push(node.nodeName); if(node.attributes.length>0) parentNodes.push(node.attributes[0].nodeValue); } else { parentNodes.push(node.nodeName); } if(node.parentNode.nodeName != '#document'){ getParents(node.parentNode, parentNodes); } return parentNodes; }; var parse = function(node, vertical, horizontal, paragraph){ if(node.childNodes.length > 0){ for(var int = 0; int < node.childNodes.length; int++){ parse(node.childNodes[int], vertical, horizontal, paragraph); } } else{ var object = {}; var attributes = getParents(node, []); console.log(attributes); for(var int = 0; int < attributes.length; int++) { // right alignment if(/text-align/i.test(attributes[int])){ object.alignment = attributes[int].split(": ")[1].replace(';',''); } else if (/color/i.test(attributes[int])) { // color object.color = attributes[int].split(":")[1]; } else if (attributes[int] == 'em') { // italic if (object.italics) { delete object.bold; object.bolditalics = true; } else { object.italics = true; } } else if (attributes[int] == 'strong') { // bold if (object.italics) { delete object.italics; object.bolditalics = true; } else { object.bold = true; } } else if (attributes[int] == 'u') { // underline object.decoration = 'underline'; } else if (attributes[int] == 's') { // strike object.decoration = 'lineThrough'; } } object.text = node.textContent; if(!pdfContent[vertical])pdfContent[vertical]=[]; if(!pdfContent[vertical][horizontal]) pdfContent[vertical][horizontal]=[]; pdfContent[vertical][horizontal].push(object); } }; var pdfContent = []; for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) { for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) { var parser = new DOMParser(); var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p'); for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) { for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) { parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph); } } } } for(var i=0; i<pdfContent.length; i++){ for(var j=0; j<pdfContent[i].length; j++){ document.querySelector('#log').textContent+=pdfContent[i][j].toSource(); } } 
 <p id="log"></p> 

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM