簡體   English   中英

如何使用XML / SGML實體將UTF-16轉換為ASCII / ANSI?

[英]How to convert UTF-16 to ASCII/ANSI with XML/SGML entities?

XML文件:

<?xml version="1.0" encoding="utf-8"?>
<response>
<center>
<b>Need to decode this -> 😉</b>
</center>
</response>

我當前的代碼:

procedure TForm1.Button1Click(Sender: TObject);
var
  Doc: IXMLDocument;
  S: AnsiString;
  SW: WideString;
  I: Integer;
begin
  Doc := TXMLDocument.Create(nil);
  Doc.LoadFromFile('example.xml');
  SW := Doc.DocumentElement.ChildNodes['center'].ChildNodes['b'].NodeValue;
  S := '';
  for I := 1 to Length(SW) do
    if Ord(SW[I]) > $04FF then
      S := S + IntToHex(Ord(SW[I]), 4) + ' '
    else
      S := S + SW[I];
  Memo1.Text := s;
end;

SW以UTF-16(寬字符串)編碼,並包含字符序列#$D83D#$DE09 ,但我需要將其作為XML / SGML實體(如'&#128521;' 我該如何編碼?

使用的字符是這樣的: http : //www.fileformat.info/info/unicode/char/1f609/index.htm

使用ANSI Delphi時,您必須手動處理UTF-16代理對(或使用某些第三方庫)。

這應該在ANSI和Unicode Delphi中工作:

uses
  {$IFDEF UNICODE}
  Xml.XMLDoc, Xml.XMLIntf, System.AnsiStrings, System.Character;
  {$ELSE}
  XMLDoc, XMLIntf;
  {$ENDIF}

{$R *.dfm}

type
{$IFDEF UNICODE}
    ValueString = UnicodeString;
{$ELSE}
    ValueString = WideString;
{$ENDIF}

procedure Check(ATrue: Boolean; const AMessage: string);
begin
  if not ATrue then
    raise Exception.Create(AMessage);
end;

function IsHighSurrogate(AChar: WideChar): Boolean;
begin
{$IFDEF UNICODE}
  Result := TCharacter.IsHighSurrogate(AChar);
{$ELSE}
  Result := (AChar >= #$D800) and (AChar <= #$DBFF);
{$ENDIF}
end;

function ConvertToUtf32(AHigh, ALow: WideChar): Integer;
begin
  {$IFDEF UNICODE}
  Result := Ord(TCharacter.ConvertToUtf32(AHigh, ALow));
  {$ELSE}
  Check(AHigh >= #$D800, 'Invalid high surrogate code point');
  Check(AHigh <= #$DBFF, 'Invalid high surrogate code point');
  Check(ALow  >= #$DC00, 'Invalid low surrogate code point');
  Check(ALow  <= #$DFFF, 'Invalid low surrogate code point');
  // This will return the ordinal value of the Unicode character represented by the two surrogate code points
  Result := $010000 + ((Ord(AHigh) - $D800) shl 10) or (Ord(ALow) - $DC00);
  {$ENDIF}
end;

function MakeEntity(AValue: Integer): AnsiString;
begin
  Result := Format(AnsiString('&#%d;'), [AValue]);
end;

function UnicodeToAsciiWithEntities(const AInput: ValueString): AnsiString;
var
  C: WideChar;
  I: Integer;
begin
  Result := '';
  I := 1;
  while I <= Length(AInput) do
  begin
    C := AInput[I];
    if C < #$0080 then
      Result := Result + AnsiChar(C)
    else
    if IsHighSurrogate(C) then
    begin
      Check((I + 1) <= Length(AInput), 'String truncated after high surrogate');
      Result := Result + MakeEntity(ConvertToUtf32(C, AInput[I + 1]));
      // Skip low surrogate
      Inc(I);
    end
    else
      Result := Result + MakeEntity(Ord(C));
    Inc(I);
  end;
end;

procedure TForm1.Button1Click(Sender: TObject);
begin
  Memo1.Lines.Text := string(UnicodeToAsciiWithEntities(LoadXMLDocument(
    'example.xml').DocumentElement.ChildNodes['center'].ChildNodes['b'].NodeValue
  ));
end;

我這里沒有Delphi 7,因此可能需要進行一些細微調整,該代碼可在XE2和2007中使用。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM