[英]Delphi 2009 - Strip non alpha numeric from string
I've got the following code, and need to strip all non alpha numeric characters. 我有以下代码,并且需要去除所有非字母数字字符。 It's not working in delphi 2009
在Delphi 2009中不起作用
unit Unit2;
//Used information from
// http://stackoverflow.com/questions/574603/what-is-the-fastest-way-of-stripping-non-alphanumeric-characters-from-a-string-in
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls;
Type
TExplodeArray = Array Of String;
TForm2 = class(TForm)
Memo1: TMemo;
ListBox1: TListBox;
Button1: TButton;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
Function Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Function Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Function StripHTML ( S : String ) : String;
function allwords(data:string):integer;
end;
var
Form2: TForm2;
allword, allphrase: TExplodeArray;
implementation
{$R *.dfm}
Function TForm2.StripHTML ( S : String ) : String;
Var
TagBegin, TagEnd, TagLength : Integer;
Begin
TagBegin := Pos ( '<', S ); // search position of first <
While ( TagBegin > 0 ) Do
Begin // while there is a < in S
TagEnd := Pos ( '>', S ); // find the matching >
TagLength := TagEnd - TagBegin + 1;
Delete ( S, TagBegin, TagLength ); // delete the tag
TagBegin := Pos ( '<', S ); // search for next <
End;
Result := S; // give the result
End;
Function TForm2.Implode ( Const cSeparator : String; Const cArray : TExplodeArray ) : String;
Var
i : Integer;
Begin
Result := '';
For i := 0 To Length ( cArray ) - 1 Do
Begin
Result := Result + cSeparator + cArray [i];
End;
System.Delete ( Result, 1, Length ( cSeparator ) );
End;
Function TForm2.Explode ( Const cSeparator, vString : String ) : TExplodeArray;
Var
i : Integer;
S : String;
Begin
S := vString;
SetLength ( Result, 0 );
i := 0;
While Pos ( cSeparator, S ) > 0 Do
Begin
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Pos ( cSeparator, S ) - 1 );
Inc ( i );
S := Copy ( S, Pos ( cSeparator, S ) + Length ( cSeparator ), Length ( S ) );
End;
SetLength ( Result, Length ( Result ) + 1 );
Result[i] := Copy ( S, 1, Length ( S ) );
End;
//Copied from JclStrings
function StrKeepChars(const S: AnsiString; const Chars: TSysCharSet): AnsiString;
var
Source, Dest: PChar;
begin
SetLength(Result, Length(S));
UniqueString(Result);
Source := PChar(S);
Dest := PChar(Result);
while (Source <> nil) and (Source^ <> #0) do
begin
if Source^ in Chars then
begin
Dest^ := Source^;
Inc(Dest);
end;
Inc(Source);
end;
SetLength(Result, (Longint(Dest) - Longint(PChar(Result))) div SizeOf(AnsiChar));
end;
function ReplaceNewlines(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> {greater than less than} #0 do begin
if (SrcPtr[0] = #13) and (SrcPtr[1] = #10) then begin
DestPtr[0] := '\';
DestPtr[1] := 't';
Inc(SrcPtr);
Inc(DestPtr);
end else
DestPtr[0] := SrcPtr[0];
Inc(SrcPtr);
Inc(DestPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function StripNonAlphaNumeric(const AValue: string): string;
var
SrcPtr, DestPtr: PChar;
begin
SrcPtr := PChar(AValue);
SetLength(Result, Length(AValue));
DestPtr := PChar(Result);
while SrcPtr <> #0 do begin
if SrcPtr[0] in ['a'..'z', 'A'..'Z', '0'..'9'] then begin
DestPtr[0] := SrcPtr[0];
Inc(DestPtr);
end;
Inc(SrcPtr);
end;
SetLength(Result, DestPtr - PChar(Result));
end;
function TForm2.allwords(data:string):integer;
var i:integer;
begin
listbox1.Items.add(data);
data:= StripHTML ( data );
listbox1.Items.add(data);
//////////////////////////////////////////////////////////////
data := StrKeepChars(data, ['A'..'Z', 'a'..'z', '0'..'9']);
// Strips out everything data comes back blank in Delphi 2009
//////////////////////////////////////////////////////////////
listbox1.Items.add(data);
data := stringreplace(data,' ',' ', [rfReplaceAll, rfIgnoreCase] );
//Replace two spaces with one.
listbox1.Items.add(data);
allword:= explode(' ',data);
{ // Converting the following PHP code to Delphi
$text = ereg_replace("[^[:alnum:]]", " ", $text);
while(strpos($text,' ')!==false) $text = ereg_replace(" ", " ", $text);
$text=$string=strtolower($text);
$text=explode(" ",$text);
return count($text);
}
for I := 0 to Length(allword) - 1 do
listbox1.Items.Add(allword[i]);
end;
procedure TForm2.Button1Click(Sender: TObject);
begin
//[^[:alnum:]]
allwords(memo1.Text);
end;
end.
How else would I go about doing this? 我还能怎么做呢?
想到的最简单的解决方案是定义一个正则表达式,该正则表达式返回输入字符串减去其中的任何非字母字符。
It's been a while since I did much with Delphi - version 5 was my playground. 自从我对Delphi做很多事情以来已经有一段时间了-版本5是我的游乐场。
Isn't one of the primary features of Delphi 2009 that it's now Unicode throughout, by default. 它不是Delphi 2009的主要功能之一,默认情况下现在始终是Unicode。
This has impact on anything that tries to process character by character. 这会影响尝试逐字符处理的任何内容。 Could it be the source of your problem?
可能是您遇到问题的根源吗?
Uses StrUtils; //StuffString
var
Regex: TPerlRegEx;
I:Integer;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '[^[:alnum:]]';
Regex.Options := [preMultiLine];
Regex.Subject := data;
if Regex.Match then begin
repeat
data := StuffString(data,Regex.MatchedExpressionOffset,Regex.MatchedExpressionLength,' ');
until not Regex.MatchAgain;
end;
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.