简体   繁体   中英

Regex to transform hashtag in a link without breaking existing HTML code

I want to convert all the URLs in a javascript string to links, in this strings there are also words that begin with a hashtag #.

As of now I created two regex in cascade, one that creates html anchor tags based on urls and another that creates anchor tags for the hashtags (like in Twitter).

I am having a lot of problems trying to parse www.sitename.com/index.php#someAnchor into the right markup.

content = urlifyLinks(content);
content = urlifyHashtags(content);

where the two functions are as follows:

function urlifyHashtags(text) {
    var hashtagRegex = /^#([a-zA-Z0-9]+)/g;
    var tempText = text.replace(hashtagRegex, '<a href="index.php?keywords=$1">#$1</a>');

    var hashtagRegex2 = /([^&])#([a-zA-Z0-9]+)/g;
    tempText = tempText.replace(hashtagRegex2, '$1<a href="index.php?keywords=$2">#$2</a>');

    return tempText;
}

function urlifyLinks(inputText) {
    var replaceText, replacePattern1, replacePattern2, replacePattern3;

    replacePattern1 = /(\b(https?|ftp):\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])/gim;
    replacedText = inputText.replace(replacePattern1, '<a href="$1" target="_blank">$1</a>');

    replacePattern2 = /(^|[^\/])(www\.[\S]+(\b|$))/gim;
    replacedText = replacedText.replace(replacePattern2, '$1<a href="http://$2" target="_blank">$2</a>');

    replacePattern3 = /(\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,6})/gim;
    replacedText = replacedText.replace(replacePattern3, '<a href="mailto:$1">$1</a>');
    return replacedText;
}

I am considering to parse the output of urlifyLinks and apply the regex to all the dom elements that are text elements on the first level, is that an ugly thing to do?

You can avoid this problem by using a single regex with a callback function replacement.

For example:

function linkify(str){
    // order matters
    var re = [
        "\\b((?:https?|ftp)://[^\\s\"'<>]+)\\b",
        "\\b(www\\.[^\\s\"'<>]+)\\b",
        "\\b(\\w[\\w.+-]*@[\\w.-]+\\.[a-z]{2,6})\\b", 
        "#([a-z0-9]+)"];
    re = new RegExp(re.join('|'), "gi");

    return str.replace(re, function(match, url, www, mail, twitler){
        if(url)
            return "<a href=\"" + url + "\">" + url + "</a>";
        if(www)
            return "<a href=\"http://" + www + "\">" + www + "</a>";
        if(mail)
            return "<a href=\"mailto:" + mail + "\">" + mail + "</a>";
        if(twitler)
            return "<a href=\"foo?bar=" + twitler + "\">#" + twitler + "</a>";

        // shouldnt get here, but just in case
        return match;
    });
}

Twitler

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM