简体   繁体   中英

Advanced JavaScript RegExp Replacement Within HTML tags

I have javascript code that works pretty well like:

var rgx = /MyName/g;
var curInnerHTML = document.body.innerHTML;
curInnerHTML = curInnerHTML.replace(rgx, "<span><span class='myName'>MyNameReplace</span></span>");

The problem is that its matching the regex even in scenarios where it is contained within HTML attributes and what-not. How can I modify the regex so that it will only find it within the content of the HTML? For example, in this string

    <div class="someclass" title="MyName">
MyName
</div>

it currently results like (note the change in the title attribute):

        <div class="someclass" title="<span><span class='myName'>MyNameReplace</span</span>">
<span><span class='myName'>
    MyNameReplace</span></span>
    </div>

But I need it to be (leave the title attribute untouched):

    <div class="someclass" title="MyName">
<span><span class='myName'>MyNameReplace</span></span>
</div>

Your best bet, and it's a lot easier than it sounds, is not to try to use regex to parse HTML, but to take advantage of the fact that the DOM already has and recursively process the text nodes.

Here's an off-the-cuff: Live Example | Source

// We use this div's `innerHTML` to parse the markup of each replacment
var div = document.createElement('div');

// This is the recursive-descent function that processes all text nodes
// within the element you give it and its descendants
function doReplacement(node, rex, text) {
    var child, sibling, frag;

    // What kind of node did we get?
    switch (node.nodeType) {
        case 1: // Element
            // Probably best to leave `script` elements alone.
            // You'll probably find you want to add to this list
            // (`object`, `applet`, `style`, ...)
            if (node.nodeName.toUpperCase() !== "SCRIPT") {
                // It's an element we want to process, start with its
                // *last* child and work forward, since part of what
                // we're doing inserts into the DOM.
                for (child = node.lastChild; child; child = sibling) {
                    // Before we change this node, grab a reference to the
                    // one that follows it
                    sibling = child.previousSibling;

                    // Recurse
                    doReplacement(child, rex, text);
                }
            }
            break;
        case 3: // Text
            // A text node -- let's do our replacements!
            // The first two deal with the fact that the text node
            // may have less-than symbols or ampersands in it.
            // The third, of course, does your replacement.
            div.innerHTML = node.nodeValue
                                .replace(/&/g, "&amp;")
                                .replace(/</g, "&lt;")
                                .replace(rex, text);

            // Now, the `div` has real live DOM elements for the replacement.
            // Insert them in front of this text node...
            insertChildrenBefore(div, node);
            // ...and remove the text node.
            node.parentNode.removeChild(node);
            break;
    }
}

// This function just inserts all of the children of the given container
// in front of the given reference node.
function insertChildrenBefore(container, refNode) {
    var parent, child, sibling;

    parent = refNode.parentNode;
    for (child = container.firstChild; child; child = sibling) {
        sibling = child.nextSibling;
        parent.insertBefore(child, refNode);
    }
}

Which you'd call like this:

doReplacement(document.body,
              /MyName/g,
              "<span><span class='myName'>MyNameReplace</span></span>");

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM