简体   繁体   English

Delphi 2009-从字符串中去除非字母数字

[英]Delphi 2009 - Strip non alpha numeric from string

I've got the following code, and need to strip all non alpha numeric characters. 我有以下代码,并且需要去除所有非字母数字字符。 It's not working in delphi 2009 在Delphi 2009中不起作用

unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls;
Type
     TExplodeArray = Array Of String;

  TForm2 = class(TForm)
    Memo1: TMemo;
    ListBox1: TListBox;
    Button1: TButton;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
    Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
    Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
    Function StripHTML ( S : String ) : String;
    function allwords(data:string):integer;
  end;

var
  Form2: TForm2;
  allword, allphrase: TExplodeArray;

implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
     TagBegin, TagEnd, TagLength : Integer;
Begin
     TagBegin := Pos ( '<', S );      // search position of first <

     While ( TagBegin > 0 ) Do
          Begin  // while there is a < in S
          TagEnd := Pos ( '>', S );              // find the matching >
          TagLength := TagEnd - TagBegin + 1;
          Delete ( S, TagBegin, TagLength );     // delete the tag
          TagBegin := Pos ( '<', S );            // search for next <
          End;

     Result := S;                   // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
     i : Integer;
Begin
     Result := '';
     For i := 0 To Length ( cArray ) - 1 Do
          Begin
          Result := Result + cSeparator + cArray [i];
          End;
     System.Delete ( Result, 1, Length ( cSeparator ) );
End;

Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
     i : Integer;
     S : String;
Begin
     S := vString;
     SetLength ( Result, 0 );
     i := 0;
     While Pos ( cSeparator, S ) > 0 Do
          Begin
          SetLength ( Result, Length ( Result ) + 1 );
          Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
          Inc ( i );
          S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
          End;
     SetLength ( Result, Length ( Result ) + 1 );
     Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
  Source, Dest: PChar;
begin
  SetLength(Result, Length(S));
  UniqueString(Result);
  Source := PChar(S);
  Dest := PChar(Result);
  while (Source <> nil) and (Source^ <> #0) do
  begin
    if Source^ in Chars then
    begin
      Dest^ := Source^;
      Inc(Dest);
    end;
    Inc(Source);
  end;
  SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> {greater than less than} #0 do begin
    if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
      DestPtr[0] := '\';
      DestPtr[1] := 't';
      Inc(SrcPtr);
      Inc(DestPtr);
    end else
      DestPtr[0] := SrcPtr[0];
    Inc(SrcPtr);
    Inc(DestPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
  SrcPtr, DestPtr: PChar;
begin
  SrcPtr := PChar(AValue);
  SetLength(Result, Length(AValue));
  DestPtr := PChar(Result);
  while SrcPtr <> #0 do begin
    if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
      DestPtr[0] := SrcPtr[0];
      Inc(DestPtr);
    end;
    Inc(SrcPtr);
  end;
  SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
  listbox1.Items.add(data);
  data:= StripHTML ( data );
  listbox1.Items.add(data);
  //////////////////////////////////////////////////////////////
  data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
  // Strips out everything data comes back blank in Delphi 2009
  //////////////////////////////////////////////////////////////
  listbox1.Items.add(data);
  data := stringreplace(data,'  ',' ', [rfReplaceAll, rfIgnoreCase] );
  //Replace two spaces with one.
  listbox1.Items.add(data);
  allword:= explode(' ',data);
 { // Converting the following PHP code to Delphi
    $text = ereg_replace("[^[:alnum:]]", " ", $text);
    while(strpos($text,'  ')!==false) $text = ereg_replace("  ", " ", $text);
    $text=$string=strtolower($text);
    $text=explode(" ",$text);
    return count($text);
}
 for I := 0 to Length(allword) - 1 do
 listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]

allwords(memo1.Text);
end;

end.

How else would I go about doing this? 我还能怎么做呢?

想到的最简单的解决方案是定义一个正则表达式,该正则表达式返回输入字符串减去其中的任何非字母字符。

It's been a while since I did much with Delphi - version 5 was my playground. 自从我对Delphi做很多事情以来已经有一段时间了-版本5是我的游乐场。

Isn't one of the primary features of Delphi 2009 that it's now Unicode throughout, by default. 它不是Delphi 2009的主要功能之一,默认情况下现在始终是Unicode。

This has impact on anything that tries to process character by character. 这会影响尝试逐字符处理的任何内容。 Could it be the source of your problem? 可能是您遇到问题的根源吗?


Uses StrUtils; //StuffString

var
    Regex: TPerlRegEx;
  I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
    repeat
    data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
    until not Regex.MatchAgain;
end;

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM