[英]How can I grab the content of a DOM element only once if selecting an entire tree which also contains those same nested elements?
例如:通過 id (getElementById) 定位容器元素 (div#container) 會返回一個 HTML 集合,其中包含每個元素及其所有屬性,包括在每個嵌套項中重復的子節點。 然后我將每個項目迭代到一個數組中,但在 DOM 樹的每個級別中都會重復相同的數據。
0: <div class="container"><div><div><main><footer><div class="container-fluid"><p> © 2018-2020 Copyright: </p></div></footer></main></div></div></div>
1: <div><div><main><footer><div class="container-fluid"><p> © 2018-2020 Copyright: </p></div></footer></main></div></div>
2: <div><main><footer><div class="container-fluid"><p> © 2018-2020 Copyright: </p></div></footer></main></div>
3: <main><footer><div class="container-fluid"><p> © 2018-2020 Copyright: </p></div></footer></main>
4: <footer><div class="container-fluid"><p> © 2018-2020 Copyright: </p></div></footer>
5: <div class="container-fluid"><p> © 2018-2020 Copyright: </p></div>
6: <p> © 2018-2020 Copyright: </p>
我想要做的是只獲取一次實際內容(例如<p> © 2018-2020 Copyright: </p>
)——並將其與相關的 XPath 位置相關聯——以便重新組裝 HTML稍后使用上面的包含結構保存元素標簽和屬性; 僅將內容插入節點的最后一個子節點,如下所示:
/DIV/DIV/DIV/MAIN/FOOTER/ --> `<div class="container-fluid"><p></p></div>`
/DIV/DIV/DIV/MAIN/FOOTER/DIV --> `<p></p>`
/DIV/DIV/DIV/MAIN/FOOTER/DIV/P --> © 2018-2020 Copyright:
背景/上下文:這樣做的目的是減少數組對象中的冗余,以構建有效的有效負載(最終字符串化為 JSON)以發送到 Microsoft Translator API,這樣我就不會不必要地按順序翻譯相同的內容節點通過使用 XPath 和 jQuery 將翻譯文本響應注入回其原始 DOM 位置來重建翻譯頁面。
到目前為止,我已經使用 jQuery 和 TreeWalker Web API ( https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker ) 來完成我迄今為止所做的......
JavaScript:
// Get all element nodes of page
var content = document.getElementById('container');
//array for DOM elements
var b = [];
function elementNodesUnder(el) {
var n;
nodeFilter = function(node) {
if (node.innerHTML && node.tagName !== 'SCRIPT' && node.tagName !==
'STYLE' && node.tagName !== 'svg' && node.tagName !== 'I' &&
node.tagName !== 'VIDEO') { return NodeFilter.FILTER_ACCEPT;
} else {
return NodeFilter.FILTER_SKIP;
}
};
walk = document.createTreeWalker(
el,
NodeFilter.SHOW_ELEMENT,
nodeFilter,
false
);
while ((n = walk.nextNode())) b.push(n);
return b;
}
elementNodesUnder(content);
console.log(b);
//array variables for xpath + innerHTML collections
var xPathArray = [];
var innerHTMLdinner = [];
//loop through text nodes & assign xPath
$.each(b, function(i, c) {
if (c.innerHTML) {
//console.log(i+" "+getElementXPath(c)+" = "+c.innerHTML);
//push each corresponding item to an array for xpath + innerHTML
xPathArray.push(getElementXPath(c));
innerHTMLdinner.push(c.innerHTML);
}
});
//map the xPath and innerHTML arrays together and then stringify
var xpathNodeMap = xPathArray.map((xPathers, index) => ({xPathArray: xPathers, innerHTML: innerHTMLdinner[index]}));
var xpathNodeMapJSON = JSON.stringify(xpathNodeMap);
console.log(xpathNodeMapJSON);
// given a document element returns the xpath string expression of that element.
function getElementXPath(elt) {
var path = '';
for (; elt && elt.nodeType == 1; elt = elt.parentNode) {
idx = getElementIdx(elt);
xname = elt.tagName;
if (idx > 1) xname += '[' + idx + ']';
path = '/' + xname + path;
}
return path;
}
function getElementIdx(elt) {
var count = 1;
for (var sib = elt.previousSibling; sib; sib = sib.previousSibling) {
if (sib.nodeType == 1 && sib.tagName == elt.tagName) count++;
}
return count;
}
HTML 示例:
<html>
<body>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div id="container">
<div class="layout">
<div class="bodyContainer">
<main class="wrapper">
<footer class="full-standard">
<div class="container no-print">
<div class="row">
<img alt="" src="" />
</div> <!-- footer > div.row -->
</div> <!-- /div.container.no-print -->
<div class="footer-copyright>
<div class="container-fluid">
<p>© 2020 Copyright</p>
</div> <!-- /div.container-fluid -->
</div> <!-- /div.footer-copyright -->
</footer> <!-- /footer.full-standard -->
</main> <!-- /main.wrapper -->
</div> <!-- /div.bodyContainer-->
</div> <!--/div.layout -->
</div> <!-- / div#container -->
</body>
</html>
XPath 結果示例:
{
"xPathArray": "/HTML/BODY/DIV[6]/DIV/DIV/MAIN/FOOTER/DIV[2]",
"innerHTML": "<div class=\"container-fluid\"><p> © 2018-2020 Copyright: </p></div>"
},
{
"xPathArray": "/HTML/BODY/DIV[6]/DIV/DIV/MAIN/FOOTER/DIV[2]/DIV",
"innerHTML": "<p> © 2018-2020 Copyright: </p>"
},
{
"xPathArray": "/HTML/BODY/DIV[6]/DIV/DIV/MAIN/FOOTER/DIV[2]/DIV/P",
"innerHTML": " © 2018-2020 Copyright: "
}
令人驚訝的是,我還沒有找到任何與這個問題太接近的東西,所以如果我錯過了它,我深表歉意,但任何幫助我指明正確方向的幫助將不勝感激。 謝謝!
嘗試為您想要的元素分配一個唯一的 id - 然后通過 id 抓取該元素並將該元素的 innerText 傳遞給您的處理程序?
<p id='unique_id'> Some Text </p>
document.getElementbById('unique_id')[0].innerHTML
您可能需要稍微調整一下,但總體思路應該可行
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.