簡體   English   中英

在 JavaScript 中將特殊字符轉換為 HTML

[英]Convert special characters to HTML in JavaScript

如何在 JavaScript 中將特殊字符轉換為 HTML?

例子:

  • & (與號)變為&amp
  • " (雙引號)在未設置ENT_NOQUOTES時變為&quot
  • ' (單引號)僅在設置了ENT_QUOTES時才變為&#039
  • < (小於)變為&lt
  • > (大於)變為&gt

我認為最好的方法是使用瀏覽器內置的 HTML 轉義功能來處理許多情況。 為此,只需在 DOM 樹中創建一個元素並將該元素的innerText設置為您的字符串。 然后檢索元素的innerHTML 瀏覽器將返回一個 HTML 編碼的字符串。

function HtmlEncode(s)
{
  var el = document.createElement("div");
  el.innerText = el.textContent = s;
  s = el.innerHTML;
  return s;
}

測試運行:

alert(HtmlEncode('&;\'><"'));

輸出:

&amp;;'&gt;&lt;"

Prototype JS 庫也使用了這種轉義 HTML 的方法,盡管與我給出的簡單示例不同。

注意:您仍然需要自己轉義引號(雙引號和單引號)。 您可以使用此處其他人概述的任何方法。

你需要一個函數來做類似的事情

return mystring.replace(/&/g, "&amp;").replace(/>/g, "&gt;").replace(/</g, "&lt;").replace(/"/g, "&quot;");

但考慮到您對單引號/雙引號的不同處理的願望。

對於那些想要解碼&#xxx;這樣的整數字符代碼的人在一個字符串中,使用這個函數:

 function decodeHtmlCharCodes(str) { return str.replace(/(&#(\d+);)/g, function(match, capture, charCode) { return String.fromCharCode(charCode); }); } // Will output "The show that gained int'l reputation'!" console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

ES6

 const decodeHtmlCharCodes = str => str.replace(/(&#(\d+);)/g, (match, capture, charCode) => String.fromCharCode(charCode)); // Will output "The show that gained int'l reputation'!" console.log(decodeHtmlCharCodes('The show that gained int&#8217;l reputation&#8217;!'));

此通用函數將每個非字母字符編碼為其 HTML 代碼(數字字符引用(NCR)):

function HTMLEncode(str) {
    var i = str.length,
        aRet = [];

    while (i--) {
        var iC = str[i].charCodeAt();
        if (iC < 65 || iC > 127 || (iC>90 && iC<97)) {
            aRet[i] = '&#'+iC+';';
        } else {
            aRet[i] = str[i];
        }
    }
    return aRet.join('');
}

[編輯 2022 ] 更現代的方法:

 const toHtmlEntities = (str, showInHtml = false) => [...str].map( v => `${showInHtml ? `&amp;#` : `&#`}${v.charCodeAt(0)};`).join(``); const str = `&Hellõ Wórld`; document.body.insertAdjacentHTML(`beforeend`, `<ul> <li>Show the entities (<code>toHtmlEntities(str, true)</code>): <b>${ toHtmlEntities(str, true)}</b></li> <li>Let the browser decide (<code>toHtmlEntities(str)</code>): <b>${ toHtmlEntities(str)}</b></li> <li id="textOnly"></li></ul>`); document.querySelector(`#textOnly`).textContent = `As textContent: ${ toHtmlEntities(str)}`;
 body { font: 14px / 18px "normal verdana", arial; margin: 1rem; } code { background-color: #eee; }

創建一個使用字符串replace的函數

function convert(str)
{
  str = str.replace(/&/g, "&amp;");
  str = str.replace(/>/g, "&gt;");
  str = str.replace(/</g, "&lt;");
  str = str.replace(/"/g, "&quot;");
  str = str.replace(/'/g, "&#039;");
  return str;
}

來自 Mozilla...

請注意,charCodeAt 將始終返回小於 65,536 的值。 這是因為較高的代碼點由一對(較低值的)“代理”偽字符表示,這些偽字符用於組成真實字符。 因此,為了檢查或再現值 65,536 及以上的單個字符的完整字符,對於此類字符,不僅需要檢索 charCodeAt(i),還需要檢索 charCodeAt(i+1)(就像檢查/ 用兩個 > 字母復制一個字符串)。

最佳解決方案

/**
 * (c) 2012 Steven Levithan <http://slevithan.com/>
 * MIT license
 */
if (!String.prototype.codePointAt) {
    String.prototype.codePointAt = function (pos) {
        pos = isNaN(pos) ? 0 : pos;
        var str = String(this),
            code = str.charCodeAt(pos),
            next = str.charCodeAt(pos + 1);
        // If a surrogate pair
        if (0xD800 <= code && code <= 0xDBFF && 0xDC00 <= next && next <= 0xDFFF) {
            return ((code - 0xD800) * 0x400) + (next - 0xDC00) + 0x10000;
        }
        return code;
    };
}

/**
 * Encodes special html characters
 * @param string
 * @return {*}
 */
function html_encode(string) {
    var ret_val = '';
    for (var i = 0; i < string.length; i++) { 
        if (string.codePointAt(i) > 127) {
            ret_val += '&#' + string.codePointAt(i) + ';';
        } else {
            ret_val += string.charAt(i);
        }
    }
    return ret_val;
}

使用示例:

html_encode("✈");

正如dragon 提到的,最干凈的方法是使用jQuery

function htmlEncode(s) {
    return $('<div>').text(s).html();
}

function htmlDecode(s) {
    return $('<div>').html(s).text();
}
function char_convert() {

    var chars = ["©","Û","®","ž","Ü","Ÿ","Ý","$","Þ","%","¡","ß","¢","à","£","á","À","¤","â","Á","¥","ã","Â","¦","ä","Ã","§","å","Ä","¨","æ","Å","©","ç","Æ","ª","è","Ç","«","é","È","¬","ê","É","­","ë","Ê","®","ì","Ë","¯","í","Ì","°","î","Í","±","ï","Î","²","ð","Ï","³","ñ","Ð","´","ò","Ñ","µ","ó","Õ","¶","ô","Ö","·","õ","Ø","¸","ö","Ù","¹","÷","Ú","º","ø","Û","»","ù","Ü","@","¼","ú","Ý","½","û","Þ","€","¾","ü","ß","¿","ý","à","‚","À","þ","á","ƒ","Á","ÿ","å","„","Â","æ","…","Ã","ç","†","Ä","è","‡","Å","é","ˆ","Æ","ê","‰","Ç","ë","Š","È","ì","‹","É","í","Œ","Ê","î","Ë","ï","Ž","Ì","ð","Í","ñ","Î","ò","‘","Ï","ó","’","Ð","ô","“","Ñ","õ","”","Ò","ö","•","Ó","ø","–","Ô","ù","—","Õ","ú","˜","Ö","û","™","×","ý","š","Ø","þ","›","Ù","ÿ","œ","Ú"]; 
    var codes = ["&copy;","&#219;","&reg;","&#158;","&#220;","&#159;","&#221;","&#36;","&#222;","&#37;","&#161;","&#223;","&#162;","&#224;","&#163;","&#225;","&Agrave;","&#164;","&#226;","&Aacute;","&#165;","&#227;","&Acirc;","&#166;","&#228;","&Atilde;","&#167;","&#229;","&Auml;","&#168;","&#230;","&Aring;","&#169;","&#231;","&AElig;","&#170;","&#232;","&Ccedil;","&#171;","&#233;","&Egrave;","&#172;","&#234;","&Eacute;","&#173;","&#235;","&Ecirc;","&#174;","&#236;","&Euml;","&#175;","&#237;","&Igrave;","&#176;","&#238;","&Iacute;","&#177;","&#239;","&Icirc;","&#178;","&#240;","&Iuml;","&#179;","&#241;","&ETH;","&#180;","&#242;","&Ntilde;","&#181;","&#243;","&Otilde;","&#182;","&#244;","&Ouml;","&#183;","&#245;","&Oslash;","&#184;","&#246;","&Ugrave;","&#185;","&#247;","&Uacute;","&#186;","&#248;","&Ucirc;","&#187;","&#249;","&Uuml;","&#64;","&#188;","&#250;","&Yacute;","&#189;","&#251;","&THORN;","&#128;","&#190;","&#252","&szlig;","&#191;","&#253;","&agrave;","&#130;","&#192;","&#254;","&aacute;","&#131;","&#193;","&#255;","&aring;","&#132;","&#194;","&aelig;","&#133;","&#195;","&ccedil;","&#134;","&#196;","&egrave;","&#135;","&#197;","&eacute;","&#136;","&#198;","&ecirc;","&#137;","&#199;","&euml;","&#138;","&#200;","&igrave;","&#139;","&#201;","&iacute;","&#140;","&#202;","&icirc;","&#203;","&iuml;","&#142;","&#204;","&eth;","&#205;","&ntilde;","&#206;","&ograve;","&#145;","&#207;","&oacute;","&#146;","&#208;","&ocirc;","&#147;","&#209;","&otilde;","&#148;","&#210;","&ouml;","&#149;","&#211;","&oslash;","&#150;","&#212;","&ugrave;","&#151;","&#213;","&uacute;","&#152;","&#214;","&ucirc;","&#153;","&#215;","&yacute;","&#154;","&#216;","&thorn;","&#155;","&#217;","&yuml;","&#156;","&#218;"];

    for(x=0; x<chars.length; x++){
        for (i=0; i<arguments.length; i++){
            arguments[i].value = arguments[i].value.replace(chars[x], codes[x]);
        }
    }
 }

char_convert(this);
function ConvChar(str) {
    c = {'&lt;':'&amp;lt;', '&gt;':'&amp;gt;', '&':'&amp;amp;',
         '"':'&amp;quot;', "'":'&amp;#039;', '#':'&amp;#035;' };

    return str.replace(/[&lt;&amp;>'"#]/g, function(s) { return c[s]; });
}

alert(ConvChar('&lt;-"-&-"->-&lt;-\'-#-\'->'));

結果:

&lt;-&quot;-&ampamp;-&quot;-&gt;-&lt;-&#039;-&#035;-&#039;-&gt;

在 testarea 標簽中:

<-"-&-"->-<-'-#-'->

如果您只是更改長代碼中的幾個字符...

如果您需要支持所有標准化的命名字符引用Unicode模棱兩可的 & 符號,那么he庫是我所知道的唯一 100% 可靠的解決方案!


示例使用

he.encode('foo © bar ≠ baz 𝌆 qux');
// Output: 'foo &#xA9; bar &#x2260; baz &#x1D306; qux'

he.decode('foo &copy; bar &ne; baz &#x1D306; qux');
// Output: 'foo © bar ≠ baz 𝌆 qux'

PRE標記中 - 以及大多數其他 HTML 標記中 - 使用輸出重定向字符( <> )的批處理文件的純文本將破壞 HTML,但這是我的提示:任何內容都包含在TEXTAREA元素中 - 它不會破壞 HTML,主要是因為我們位於由操作系統實例化和處理的控件內部,因此 HTML 引擎不會解析其內容。

例如,假設我想使用 JavaScript 突出顯示批處理文件的語法。 我只是將代碼粘貼到 textarea 中,而不用擔心 HTML 保留字符,並讓腳本處理 textarea 的innerHTML屬性,該屬性評估文本並將 HTML 保留字符替換為相應的ISO 8859-1實體。

當您檢索元素的innerHTML (和outerHTML )屬性時,瀏覽器會自動轉義特殊字符。 使用 textarea(誰知道呢,可能是文本類型的輸入)只是讓您免於進行轉換(手動或通過代碼)。

我使用這個技巧來測試我的語法熒光筆,當我完成創作和測試時,我只是從視圖中隱藏了 textarea。

一種解決方法:

var temp = $("div").text("<");
var afterEscape = temp.html(); // afterEscape == "&lt;"
<!doctype html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>html</title>

        <script>
            $(function() {
                document.getElementById('test').innerHTML = "&amp;";
            });
        </script>
    </head>

    <body>
        <div id="test"></div>
    </body>
</html>

您可以使用上面的代碼簡單地將特殊字符轉換為 HTML。

這是一個很好的庫,我發現在這種情況下非常有用。

https://github.com/mathiasbynens/he

據其作者稱:

它支持按照 HTML 的所有標准化命名字符引用,像瀏覽器一樣處理模棱兩可的 & 和其他邊緣情況,具有廣泛的測試套件,並且 - 與許多其他 JavaScript 解決方案相反 - 他可以很好地處理星體 Unicode 符號

利用:

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 169, 61558, 8226, 61607);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&copy;", "&bull;", "&bull;", "&bull;");

var TextCheck = {
    doCWBind:function(div){
        $(div).bind({
            bind:function(){
                TextCheck.cleanWord(div);
            },
            focus:function(){
                TextCheck.cleanWord(div);
            },
            paste:function(){
                TextCheck.cleanWord(div);
            }
        });
    },
    cleanWord:function(div){
        var output = $(div).val();
        for (i = 0; i < swapCodes.length; i++) {
            var swapper = new RegExp("\\u" + swapCodes[i].toString(16), "g");
            output = output.replace(swapper, swapStrings[i]);
        }
        $(div).val(output);
    }
}

我們現在使用的另一種有效。 上面的那個我讓它調用一個腳本並返回轉換后的代碼。 它只適用於小文本區域(意味着文章、博客等不完整)


對於上述。 它適用於大多數角色。

var swapCodes   = new Array(8211, 8212, 8216, 8217, 8220, 8221, 8226, 8230, 8482, 61558, 8226, 61607, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 338, 339, 352, 353, 376, 402);
var swapStrings = new Array("--", "--", "'",  "'",  '"',  '"',  "*",  "...", "&trade;", "&bull;", "&bull;", "&bull;", "&iexcl;", "&cent;", "&pound;", "&curren;", "&yen;", "&brvbar;", "&sect;", "&uml;", "&copy;", "&ordf;", "&laquo;", "&not;", "&shy;", "&reg;", "&macr;", "&deg;", "&plusmn;", "&sup2;", "&sup3;", "&acute;", "&micro;", "&para;", "&middot;", "&cedil;", "&sup1;", "&ordm;", "&raquo;", "&frac14;", "&frac12;", "&frac34;", "&iquest;", "&Agrave;", "&Aacute;", "&Acirc;", "&Atilde;", "&Auml;", "&Aring;", "&AElig;", "&Ccedil;", "&Egrave;", "&Eacute;", "&Ecirc;", "&Euml;", "&Igrave;", "&Iacute;", "&Icirc;", "&Iuml;", "&ETH;", "&Ntilde;", "&Ograve;", "&Oacute;", "&Ocirc;", "&Otilde;", "&Ouml;", "&times;", "&Oslash;", "&Ugrave;", "&Uacute;", "&Ucirc;", "&Uuml;", "&Yacute;", "&THORN;", "&szlig;", "&agrave;", "&aacute;", "&acirc;", "&atilde;", "&auml;", "&aring;", "&aelig;", "&ccedil;", "&egrave;", "&eacute;", "&ecirc;", "&euml;", "&igrave;", "&iacute;", "&icirc;", "&iuml;", "&eth;", "&ntilde;", "&ograve;", "&oacute;", "&ocirc;", "&otilde;", "&ouml;", "&divide;", "&oslash;", "&ugrave;", "&uacute;", "&ucirc;", "&uuml;", "&yacute;", "&thorn;", "&yuml;", "&#338;", "&#339;", "&#352;", "&#353;", "&#376;", "&#402;");

我創建了一個具有很多功能的 javascript 文件,包括上述功能。 http://www.neotropicsolutions.com/JSChars.zip

包括所有需要的文件。 我添加了 jQuery 1.4.4。 只是因為我在其他版本中看到了問題,還沒有嘗試過。

Requires: jQuery & jQuery Impromptu from: http://trentrichardson.com/Impromptu/index.php

1. Word Count
2. Character Conversion
3. Checks to ensure this is not passed: "notsomeverylongstringmissingspaces"
4. Checks to make sure ALL IS NOT ALL UPPERCASE.
5. Strip HTML

    // Word Counter
    $.getScript('js/characters.js', function(){
        $('#adtxt').bind("keyup click blur focus change paste",
            function(event){
                TextCheck.wordCount(30, "#adtxt", "#adtxt_count", event);
        });
        $('#adtxt').blur(
            function(event){
                TextCheck.check_length('#adtxt'); // unsures properly spaces-not one long word
                TextCheck.doCWBind('#adtxt'); // char conversion
        });

        TextCheck.wordCount(30, "#adtxt", "#adtxt_count", false);
    });

    //HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" class="wordCount"></textarea>

    <div id="adtxt_count" class="clear"></div>

    // Just Character Conversions:
    TextCheck.doCWBind('#myfield');

    // Run through form fields in a form for case checking.
    // Alerts user when field is blur'd.
    var labels = new Array("Brief Description", "Website URL", "Contact Name", "Website", "Email", "Linkback URL");
    var checking = new Array("descr", "title", "fname", "website", "email", "linkback");
    TextCheck.check_it(checking, labels);

    // Extra security to check again, make sure form is not submitted
    var pass = TextCheck.validate(checking, labels);
    if(pass){
        // Do form actions
    }

    //Strip HTML
    <textarea name="adtxt" id="adtxt" rows="10" cols="70" onblur="TextCheck.stripHTML(this);"></textarea>
function escape (text)
{
  return text.replace(/[<>\&\"\']/g, function(c) {
    return '&#' + c.charCodeAt(0) + ';';
  });
}

alert(escape("<>&'\""));

這並不能直接回答您的問題,但是如果您使用innerHTML來在元素中編寫文本並且遇到編碼問題,只需使用textContent ,即:

var s = "Foo 'bar' baz <qux>";

var element = document.getElementById('foo');
element.textContent = s;

// <div id="foo">Foo 'bar' baz <qux></div>

以下是我無需jQuery即可使用的幾種方法:

您可以對字符串中的每個字符進行編碼

function encode(e){return e.replace(/[^]/g, function(e) {return "&#" + e.charCodeAt(0) + ";"})}

或者只是針對主要的安全編碼字符來擔心(&、inebreaks、<、>、“和'),例如:

 function encode(r){ return r.replace(/[\x26\x0A\<>'"]/g, function(r){return "&#" + r.charCodeAt(0) + ";"}) } test.value = encode('How to encode\nonly html tags &<>\'" nice & fast!'); /************* * \x26 is &ampersand (it has to be first), * \x0A is newline, *************/
 <textarea id=test rows="9" cols="55">www.WHAK.com</textarea>

我們可以使用 JavaScript 的DOMParser進行特殊字符的轉換。

const parser = new DOMParser();
const convertedValue = (parser.parseFromString("&#039 &amp &#039 &lt &gt", "application/xml").body.innerText;

您可以通過將函數 .text() 替換為 .html() 來修復它。 它為我工作。

如果您使用的是Lodash ,您可以這樣做(從文檔中復制粘貼):

_.escape('fred, barney, & pebbles');
// => 'fred, barney, &amp; pebbles'

更多信息:_. escape([string=''])

我自己為此苦苦掙扎了很長一段時間,但我決定使用這個否定匹配正則表達式來匹配所有特殊字符並將它們轉換為相關的字符代碼:

var encoded = value.replace(/[^A-Za-z0-9]/g, function(i) {
    return '&#' + i.charCodeAt(0) + ';';
});

參見 JavaScript htmlentities http://phpjs.org/functions/htmlentities:425

是的,但是如果您需要將結果字符串插入某處而不將其轉換回來,則需要執行以下操作:

str.replace(/'/g,"&amp;amp;#39;"); // and so on

以下是在 JavaScript 中對 XML 轉義字符進行編碼的函數:

Encoder.htmlEncode(unsafeText);
<html>
    <body>
        <script type="text/javascript">
            var str = "&\"'<>";
            alert('B4 Change: \n' + str);

            str = str.replace(/\&/g, '&amp;');
            str = str.replace(/</g,  '&lt;');
            str = str.replace(/>/g,  '&gt;');
            str = str.replace(/\"/g, '&quot;');
            str = str.replace(/\'/g, '&#039;');

            alert('After change: \n' + str);
        </script>
    </body>
</html>

使用它來測試: http ://www.w3schools.com/js/tryit.asp?filename=tryjs_text

使用 JavaScript 函數escape() ,它可以讓您對字符串進行編碼。

例如,

escape("yourString");
public static string HtmlEncode (string text)
{
    string result;
    using (StringWriter sw = new StringWriter())
    {
        var x = new HtmlTextWriter(sw);
        x.WriteEncodedText(text);
        result = sw.ToString();
    }
    return result;

}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM