[英]DOMParser - children are not DOM objects
There is a strange behavior with DOMParser. DOMParser有一个奇怪的行为。 When I use "text/xml" as the parameter I get my object and each time I use a child (like parentNodes), the child is itself a DOM object. 当我使用“ text / xml”作为参数时,得到对象,并且每次使用子对象(如parentNodes)时,该子对象本身就是DOM对象。 However, when I use "text/html" as the parameter, the children are not DOM objects. 但是,当我使用“ text / html”作为参数时,子级不是DOM对象。 Why is that and how can I have DOM objects for all the children? 为什么会这样?如何为所有孩子提供DOM对象?
Here is what I do: 这是我的工作:
parser = new DOMParser();
doc = parser.parseFromString(stringContainingHTMLSource, "text/html").getElementsByTagName('p');
console.log(doc[0].childNodes[0]);
My childNode returns the element but not as a DOM object... 我的childNode返回该元素,但不作为DOM对象返回...
Edit: Here are my recursive functions: 编辑:这是我的递归函数:
var getParents = function(node, parentNodes){
if(node.nodeName == 'span'){
parentNodes.push(node.attributes[0].nodeValue);
} else if(node.nodeName == 'p' && node.attributes.length > 0) {
parentNodes.push(node.nodeName);
parentNodes.push(node.attributes[0].nodeValue);
} else {
parentNodes.push(node.nodeName);
}
if(node.parentNode.nodeName != '#document'){
getParents(node.parentNode, parentNodes);
}
return parentNodes;
};
var parse = function(node, vertical, horizontal, paragraph){
if(node.childNodes.length > 0){
for(var int = 0; int < node.childNodes.length; int++){
parse(node.childNodes[int], vertical, horizontal, paragraph);
}
} else{
var object = {};
var attributes = getParents(node, []);
for(var int = 0; int < attributes.length; int++) {
// right alignment
if(/text-align/i.test(attributes[int])){
object.alignment = attributes[int].split(": ")[1].replace(';','');
} else if (/color/i.test(attributes[int])) {
// color
object.color = attributes[int].split(":")[1];
} else if (attributes[int] == 'em') {
// italic
if (object.italics) {
delete object.bold;
object.bolditalics = true;
} else {
object.italics = true;
}
} else if (attributes[int] == 'strong') {
// bold
if (object.italics) {
delete object.italics;
object.bolditalics = true;
} else {
object.bold = true;
}
} else if (attributes[int] == 'u') {
// underline
object.decoration = 'underline';
} else if (attributes[int] == 's') {
// strike
object.decoration = 'lineThrough';
}
}
object.text = node.textContent;
pdfContent[vertical][horizontal].push(object);
}
};
for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) {
for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) {
var parser = new DOMParser();
var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p');
for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) {
for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) {
parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph);
}
}
}
}
I made a few assumptions on what the values are and after I Added a few verifications like if(node.attributes.length>0)
into your code, it seems to work. 我对值是什么做了一些假设,并在代码中添加了一些类似if(node.attributes.length>0)
验证后,它似乎可以正常工作。
var payment={htmlContent:[['<p>some<em>text</em></p>', '<p>some<span>text<strong>here</strong></span></p>'],['<p>some<s>text</s></p>', '<p>some<span style="color:#FF00FF">text</span></p>']]}; var getParents = function(node, parentNodes){ if(node.nodeName == 'span'){ if(node.attributes.length>0) parentNodes.push(node.attributes[0].nodeValue); } else if(node.nodeName == 'p' && node.attributes.length > 0) { parentNodes.push(node.nodeName); if(node.attributes.length>0) parentNodes.push(node.attributes[0].nodeValue); } else { parentNodes.push(node.nodeName); } if(node.parentNode.nodeName != '#document'){ getParents(node.parentNode, parentNodes); } return parentNodes; }; var parse = function(node, vertical, horizontal, paragraph){ if(node.childNodes.length > 0){ for(var int = 0; int < node.childNodes.length; int++){ parse(node.childNodes[int], vertical, horizontal, paragraph); } } else{ var object = {}; var attributes = getParents(node, []); console.log(attributes); for(var int = 0; int < attributes.length; int++) { // right alignment if(/text-align/i.test(attributes[int])){ object.alignment = attributes[int].split(": ")[1].replace(';',''); } else if (/color/i.test(attributes[int])) { // color object.color = attributes[int].split(":")[1]; } else if (attributes[int] == 'em') { // italic if (object.italics) { delete object.bold; object.bolditalics = true; } else { object.italics = true; } } else if (attributes[int] == 'strong') { // bold if (object.italics) { delete object.italics; object.bolditalics = true; } else { object.bold = true; } } else if (attributes[int] == 'u') { // underline object.decoration = 'underline'; } else if (attributes[int] == 's') { // strike object.decoration = 'lineThrough'; } } object.text = node.textContent; if(!pdfContent[vertical])pdfContent[vertical]=[]; if(!pdfContent[vertical][horizontal]) pdfContent[vertical][horizontal]=[]; pdfContent[vertical][horizontal].push(object); } }; var pdfContent = []; for(var vertical = 0; vertical < payment.htmlContent.length; vertical++) { for(var horizontal = 0; horizontal < payment.htmlContent[vertical].length; horizontal++) { var parser = new DOMParser(); var paragraphs = parser.parseFromString(payment.htmlContent[vertical][horizontal], "text/xml").getElementsByTagName('p'); for (var paragraph = 0; paragraph < paragraphs.length; paragraph++) { for (var num = 0; num < paragraphs[paragraph].childNodes.length; num++) { parse(paragraphs[paragraph].childNodes[num], vertical, horizontal, paragraph); } } } } for(var i=0; i<pdfContent.length; i++){ for(var j=0; j<pdfContent[i].length; j++){ document.querySelector('#log').textContent+=pdfContent[i][j].toSource(); } }
<p id="log"></p>
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.