[英]Stripping HTML Down To Allowed Tags
I'd like to strip all HTML tags (but not the content), apart from ones given in a list. 除了列表中给出的标签之外,我想删除所有HTML标签(但不包括内容)。
I'd like to do this using Node. 我想用Node做到这一点。
The following regex can match tags <[a|br].+?>|<\\/[a]>
but how can I then proceed to remove all tags except the ones matched? 以下正则表达式可以匹配标签
<[a|br].+?>|<\\/[a]>
但是如何继续删除除匹配的标签之外的所有标签?
To replace only a
and br
: 仅替换
a
和br
:
|
|
in sets []
. []
。 The set [a|br]
is: a
, |
[a|br]
是: a
, |
, b
or r
. b
或r
。 Use a non-capturing group instead. .+?
should be .*?
.*?
to replace <a>
and <br>
. <a>
和<br>
。 [a]
could be just a
. [a]
可能只是a
。 Try this: 尝试这个:
/<(?:a|br).*?>|<\/a>/g
https://regex101.com/r/KWJi01/2 https://regex101.com/r/KWJi01/2
To replace every tag except a
and br
: 要替换除
a
和br
之外a
所有标记:
Use this regexp: 使用此正则表达式:
/<(?:(?!\/?a|br).*?)>/g
https://regex101.com/r/KWJi01/3 https://regex101.com/r/KWJi01/3
One alternative solution to the problem, without using regular expressions, is the following approach: 在不使用正则表达式的情况下,该问题的一种替代解决方案是以下方法:
// creating a function which takes two arguments:
// htmlString: String, a string of HTML to process,
// permitted: Array, an array of HTML element tag-names.
function filterHTML(htmlString, permitted) {
// here we iterate over the Array of permitted elements,
// and convert any uppercase tag-names to lowercase:
permitted = permitted.map(
el => el.toLowerCase()
);
// creating a new element in which to hold the passed-in
// string of HTML when parsed into HTML:
let temp = document.createElement('div');
// assigning the passed-in string of HTML as the
// innerHTML of the temp element:
temp.innerHTML = htmlString;
// finding all elements held within the temp element,
// and passing the NodeList to Array.from() in order
// to convert the Array-like NodeList into an Array:
let allElements = Array.from(temp.querySelectorAll('*'));
// iterating over the array of found elements, using
// Array.prototype.forEach():
allElements.forEach(
// using an Arrow function, 'element' is the current
// element of the Array of elements over which we're
// iterating:
element => {
// if the current element's tagName - converted to
// lowercase - is not found within the Array of
// permitted tags:
if (permitted.indexOf(element.tagName.toLowerCase()) === -1) {
// while the current (unpermitted) element has
// a first-child:
while (element.firstChild) {
// we access the element's parent node, and
// call node.insertBefore() to place the
// first-child of the element before the
// element (removing it from the element
// which is unpermitted and placing it as
// a previous-sibling):
element.parentNode.insertBefore(element.firstChild, element);
}
// finding the element's parent node, and calling
// node.removeChild() in order to remove the current
// element from its parent node (and therefore from
// the temp element):
element.parentNode.removeChild(element);
}
});
// here we return the innerHTML of the temp element, after
// all unpermitted elements have been removed:
return temp.innerHTML;
}
// the allowed tags, to be used in the above function,
// note that the tags do not have the '<', '>' or any
// attributes:
let allowedTags = ['br', 'div'];
// the following is just to visually display on the page
// the unprocessed and processed innerHTML, and also the
// rendered HTML following the processing:
document.querySelector('#input').textContent = document.querySelector('#test').innerHTML;
document.querySelector('#output').textContent = filterHTML(document.querySelector('#test').innerHTML, allowedTags).trim();
document.querySelector('#result').innerHTML = document.querySelector('#output').textContent;
function filterHTML(htmlString, permitted) { let temp = document.createElement('div'); temp.innerHTML = htmlString; let allElements = Array.from(temp.querySelectorAll('*')); allElements.forEach( element => { if (permitted.indexOf(element.tagName.toLowerCase()) === -1) { while (element.firstChild) { element.parentNode.insertBefore(element.firstChild, element); } element.parentNode.removeChild(element); } }); return temp.innerHTML; } let allowedTags = ['br', 'div']; document.querySelector('#input').textContent = document.querySelector('#test').innerHTML; document.querySelector('#output').textContent = filterHTML(document.querySelector('#test').innerHTML, allowedTags).trim(); document.querySelector('#result').innerHTML = document.querySelector('#output').textContent;
div { border: 2px solid #000; margin: 0 0 1em 0; padding: 0.5em; box-sizing: border-box; border-radius: 1em; } div[id$=put] { white-space: pre-wrap; } #input { border-color: #f00; } #output { border-color: limegreen; } ::before { color: #666; display: block; border-bottom: 1px solid #666; margin-bottom: 0.5em; } #input::before { content: 'The original innerHTML of the "#test" element:'; } #output::before { content: 'The filtered innerHTML, with only permitted elements remaining:' } #result::before { content: 'This element shows the filtered HTML of the "test" element:' }
<div id="test"> <div><span> <a href="#">link text!</a> <br /> <hr /> <div>div text!</div> </span></div> </div> <div id="input"></div> <div id="output"></div> <div id="result"></div>
References: 参考文献:
Array.from()
. Array.from()
。 Array.prototype.forEach()
. Array.prototype.forEach()
。 Array.prototype.indexOf()
. Array.prototype.indexOf()
。 Array.prototype.map()
. Array.prototype.map()
。 document.createElement()
. document.createElement()
。 document.querySelector()
. document.querySelector()
。 document.querySelectorAll()
. document.querySelectorAll()
。 let
statement . let
声明 。 Node.firstChild
. Node.firstChild
。 Node.insertBefore()
. Node.insertBefore()
。 Node.parentNode
. Node.parentNode
。 Node.removeChild()
. Node.removeChild()
。 String.prototype.toLowerCase()
. String.prototype.toLowerCase()
。 while () {...}
. while () {...}
。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.