I have an app that needs to do heavy text manipulation in a TStringList. Basically i need to split text by a delimiter ; for instance, if i have a singe line with 1000 chars and this delimiter occurs 3 times in this line, then i need to split it in 3 lines. The delimiter can contain more than one char, it can be a tag like '[test]' for example.
I've wrote two functions to do this task with 2 different approaches, but both are slow in big amounts of text (more then 2mbytes usually).
How can i achieve this goal in a faster way ?
Here are both functions, both receive 2 paramaters : 'lines' which is the original tstringlist and 'q' which is the delimiter.
function splitlines(lines : tstringlist; q: string) : integer;
var
s, aux, ant : string;
i,j : integer;
flag : boolean;
m2 : tstringlist;
begin
try
m2 := tstringlist.create;
m2.BeginUpdate;
result := 0;
for i := 0 to lines.count-1 do
begin
s := lines[i];
for j := 1 to length(s) do
begin
flag := lowercase(copy(s,j,length(q))) = lowercase(q);
if flag then
begin
inc(result);
m2.add(aux);
aux := s[j];
end
else
aux := aux + s[j];
end;
m2.add(aux);
aux := '';
end;
m2.EndUpdate;
lines.text := m2.text;
finally
m2.free;
end;
end;
function splitLines2(lines : tstringlist; q: string) : integer;
var
aux, p : string;
i : integer;
flag : boolean;
begin
//maux1 and maux2 are already instanced in the parent class
try
maux2.text := lines.text;
p := '';
i := 0;
flag := false;
maux1.BeginUpdate;
maux2.BeginUpdate;
while (pos(lowercase(q),lowercase(maux2.text)) > 0) and (i < 5000) do
begin
flag := true;
aux := p+copy(maux2.text,1,pos(lowercase(q),lowercase(maux2.text))-1);
maux1.add(aux);
maux2.text := copy(maux2.text,pos(lowercase(q),lowercase(maux2.text)),length(maux2.text));
p := copy(maux2.text,1,1);
maux2.text := copy(maux2.text,2,length(maux2.text));
inc(i);
end;
finally
result := i;
maux1.EndUpdate;
maux2.EndUpdate;
if flag then
begin
maux1.add(p+maux2.text);
lines.text := maux1.text;
end;
end;
end;
I've not tested the speed, but for academic purposes, here's an easy way to split the strings:
myStringList.Text :=
StringReplace(myStringList.Text, myDelimiter, #13#10, [rfReplaceAll]);
// Use [rfReplaceAll, rfIgnoreCase] if you want to ignore case
When you set the Text
property of TStringList
, it parses on new lines and splits there, so converting to a string, replacing the delimiter with new lines, then assigning it back to the Text
property works.
The problems with your code (at least second approach) are
I have a tokenizer in my library. Its not the fastest or best but it should do (you can get it from Cromis Library , just use the units Cromis.StringUtils and Cromis.Unicode):
type
TTokens = array of ustring;
TTextTokenizer = class
private
FTokens: TTokens;
FDelimiters: array of ustring;
public
constructor Create;
procedure Tokenize(const Text: ustring);
procedure AddDelimiters(const Delimiters: array of ustring);
property Tokens: TTokens read FTokens;
end;
{ TTextTokenizer }
procedure TTextTokenizer.AddDelimiters(const Delimiters: array of ustring);
var
I: Integer;
begin
if Length(Delimiters) > 0 then
begin
SetLength(FDelimiters, Length(Delimiters));
for I := 0 to Length(Delimiters) - 1 do
FDelimiters[I] := Delimiters[I];
end;
end;
constructor TTextTokenizer.Create;
begin
SetLength(FTokens, 0);
SetLength(FDelimiters, 0);
end;
procedure TTextTokenizer.Tokenize(const Text: ustring);
var
I, K: Integer;
Counter: Integer;
NewToken: ustring;
Position: Integer;
CurrToken: ustring;
begin
SetLength(FTokens, 100);
CurrToken := '';
Counter := 0;
for I := 1 to Length(Text) do
begin
CurrToken := CurrToken + Text[I];
for K := 0 to Length(FDelimiters) - 1 do
begin
Position := Pos(FDelimiters[K], CurrToken);
if Position > 0 then
begin
NewToken := Copy(CurrToken, 1, Position - 1);
if NewToken <> '' then
begin
if Counter > Length(FTokens) then
SetLength(FTokens, Length(FTokens) * 2);
FTokens[Counter] := Trim(NewToken);
Inc(Counter)
end;
CurrToken := '';
end;
end;
end;
if CurrToken <> '' then
begin
if Counter > Length(FTokens) then
SetLength(FTokens, Length(FTokens) * 2);
FTokens[Counter] := Trim(CurrToken);
Inc(Counter)
end;
SetLength(FTokens, Counter);
end;
How about just using StrTokens from the JCL library
procedure StrTokens(const S: string; const List: TStrings);
It's open source http://sourceforge.net/projects/jcl/
As an additional option, you can use regular expressions. Recent versions of Delphi (XE4 and XE5) come with built in regular expression support; older versions can find a free regex library download (zip file) at Regular-Expressions.info .
For the built-in regex support (uses the generic TArray<string>
):
var
RegexObj: TRegEx;
SplitArray: TArray<string>;
begin
SplitArray := nil;
try
RegexObj := TRegEx.Create('\[test\]'); // Your sample expression. Replace with q
SplitArray := RegexObj.Split(Lines, 0);
except
on E: ERegularExpressionError do begin
// Syntax error in the regular expression
end;
end;
// Use SplitArray
end;
For using TPerlRegEx in earlier Delphi versions:
var
Regex: TPerlRegEx;
m2: TStringList;
begin
m2 := TStringList.Create;
try
Regex := TPerlRegEx.Create;
try
Regex.RegEx := '\[test\]'; // Using your sample expression - replace with q
Regex.Options := [];
Regex.State := [preNotEmpty];
Regex.Subject := Lines.Text;
Regex.SplitCapture(m2, 0);
finally
Regex.Free;
end;
// Work with m2
finally
m2.Free;
end;
end;
(For those unaware, the \\
in the sample expression used are because the []
characters are meaningful in regular expressions and need to be escaped to be used in the regular expression text. Typically, they're not required in the text.)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.