简体   繁体   English

更快的替代InttoStr / StrToInt?

[英]faster alternative to InttoStr/StrToInt?

I wonder if there are faster alternative than System.IntToStr / System.StrToInt . 我想知道是否有比System.IntToStr / System.StrToInt更快的替代方案。 There is a fast version but only UTF8. 有一个快速版本,但只有UTF8。 Which is Int32ToUTF8 from SynCommons.pas and due to slow string conversions it is bound to be slow. 这是Int32ToUTF8SynCommons.pas并且由于缓慢的字符串转换势必会很慢。 The purepascal RTL versions are really slow for 64 bit. purepascal RTL版本对于64位来说真的很慢。

This routine is approximately 40% faster than the routine in the RTL. 此例程比RTL中的例程快约40%。 It could be much faster if you worked with WideChar[] buffers because the string allocation is taking up 75% of the time used by the conversion routine: 如果您使用WideChar []缓冲区可能会快得多,因为字符串分配占用转换例程使用时间的75%:

  • IntS32ToWide: 5,50 ns/item (PWideChar) IntS32ToWide:5,50 ns / item(PWideChar)
  • IntToStr: 34,51 ns/item (RTL) IntToStr:34,51 ns / item(RTL)
  • IntS32ToStr: 24,77 ns/item (RTL replacement) IntS32ToStr:24,77 ns /项(RTL更换)

Please note that the routine below uses SSE2 and only x86 and x64 versions are fully implemented and tested. 请注意,下面的例程使用SSE2,只有x86和x64版本完全实现和测试。

In the initialization: 在初始化中:

function IntU32ToWide( X: Longword; P: PWideChar ): PWideChar; register;
function IntS32ToWide( X: Integer;  P: PWideChar ): PWideChar; register;
function IntS32ToStr ( X: Longword ): UnicodeString; register; inline;

In the implementation: 在实施中:

{$CODEALIGN 16}
{$ALIGN 16}

const
  DigitsClippedW: array [ 0..99 ] of LongWord = (
    $000030, $000031, $000032, $000033, $000034, $000035, $000036, $000037, $000038, $000039,
    $300031, $310031, $320031, $330031, $340031, $350031, $360031, $370031, $380031, $390031,
    $300032, $310032, $320032, $330032, $340032, $350032, $360032, $370032, $380032, $390032,
    $300033, $310033, $320033, $330033, $340033, $350033, $360033, $370033, $380033, $390033,
    $300034, $310034, $320034, $330034, $340034, $350034, $360034, $370034, $380034, $390034,
    $300035, $310035, $320035, $330035, $340035, $350035, $360035, $370035, $380035, $390035,
    $300036, $310036, $320036, $330036, $340036, $350036, $360036, $370036, $380036, $390036,
    $300037, $310037, $320037, $330037, $340037, $350037, $360037, $370037, $380037, $390037,
    $300038, $310038, $320038, $330038, $340038, $350038, $360038, $370038, $380038, $390038,
    $300039, $310039, $320039, $330039, $340039, $350039, $360039, $370039, $380039, $390039 );

// Delphi XE3 has no working alignment for 16 bytes for data but it has alignment for 16 bytes for code!
// So we encode our constants as a procedure and use constant offsets to the data.
const
  Div10000_Shl45d        = $00;
  Shl16_minus_10000d     = $10;
  Div_1000_100_10_1w     = $20;
  Shl_1000_100_10_1w     = $30;
  Mul_10w                = $40;
  To_Asciiw              = $50;
  Mul_10000d             = $60;
  Div100_Shl19w          = $70;
  Mul100w                = $80;
  Div10_shl16w           = $90;
  To_Asciib              = $A0;

procedure IntUToStrConsts();
asm
  {$if defined( CPUX64 )}.NOFRAME{$ifend}
  dd $d1b71759,         $d1b71759,          $d1b71759,         $d1b71759;          // RoundUp( 2^45 / 10000 )
  dd $10000 - 10000,    $10000 - 10000,     $10000 - 10000,    $10000 - 10000;     // 1 shl 16 - 1e4
  dw 8389,    5243,     13108,    $8000,    8389,    5243,     13108,    $8000;    // 1000 100 10 1 div
  dw 1 shl 7, 1 shl 11, 1 shl 13, 1 shl 15, 1 shl 7, 1 shl 11, 1 shl 13, 1 shl 15; // 1000 100 10 1 shr
  dw 10,      10,       10,       10,       10,      10,       10,       10;       // 10
  dw $30,     $30,      $30,      $30,      $30,     $30,      $30,      $30;      // To Unicode / ASCII
  dd 10000,             10000,              10000,             10000;              // 10000
  dw $147b,   $147b,    $147b,    $147b,    $147b,   $147b,    $147b,    $147b     // RoundUp( 2^19 / 100 )
  dw 100,     100,      100,      100,      100,     100,      100,      100       // 100
  dw $199a,   $199a,    $199a,    $199a,    $199a,   $199a,    $199a,    $199a     // RoundUp( 2^16 / 10 )
  dd $30303030,         $30303030,          $30303030,         $30303030           // To bytewise / ASCII
end;


function IntS32ToStr( X: Longword ): UnicodeString; register;
var
  P, Q: PWideChar;
begin
  SetLength( Result, 11 );
  P := PWideChar( Pointer( Result ) );
  // Full string buffer and set the length of the string with no resizing!
  PLongword( ( NativeInt( Result ) - sizeof( Longword ) ) )^ := IntS32ToWide( X, P ) - P;
end;

function IntS32ToWide( X: Integer; P: PWideChar ): PWideChar;
{$if defined( CPUX86 )}
asm // eax = X, edx = P
  cmp  eax, 0
  jge  IntU32ToWide
  mov  word ptr [ edx ], Word( '-' )
  neg  eax
  lea  edx, [ edx + 2 ]
  jmp  IntU32ToWide
end;
{$else if defined( CPUX64 )}
asm // ecx = X, rdx = P
  .NOFRAME
  cmp  ecx, 0
  jge  IntU32ToWide
  mov  word ptr [ rdx ], Word( '-' )
  neg  ecx
  lea  rdx, [ rdx + 2 ]
  jmp  IntU32ToWide
end;
{$else}
begin
  if X >= 0 then begin
    Result := IntU32ToWide( Longword( X ), P );
  end else begin
    P^ := '-';
    Result := IntU32ToWide( Longword( -X ), P + 1 );
  end;
end;
{$ifend}

function IntU32ToWide( X: Longword; P: PWideChar ): PWideChar; register;
{$if defined( CPUX86 )}
asm
  cmp       eax, 100000000
  jb        @Medium
@Large:
  push      edx
  xor       edx, edx
  mov       ecx, 100000000
  div       ecx
  pop       ecx

  //        eax = high one or two digit value, edx = 8 digit value, ecx = pointer
  // Emit the first 2 digits
  mov       eax, dword ptr [ DigitsClippedW + eax * 4 ]
  mov       [ ecx ], eax
  cmp       eax, $10000
  setae     al
  movzx     eax, al
  lea       eax, [ eax * 2 + ecx + 18 ]

  //        edx = 8 digit value, ecx = pointer
  // Emit 8 follow digits
  movd      xmm1, edx          // xmm1 = Value
  movdqa    xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
  pmuludq   xmm0, xmm1
  psrlq     xmm0, 45           // xmm0 = xmm1 div 10000
  pmuludq   xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
  paddd     xmm0, xmm1         // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
  psllq     xmm0, 2
  punpcklwd xmm0, xmm0
  punpckldq xmm0, xmm0         // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
  movdqa    xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ]            // xmm2 := xmm0 * 10; shift to left one word.
  pmullw    xmm2, xmm0
  psllq     xmm2, 16
  psubw     xmm0, xmm2         // Extract digits
  por       xmm0, dqword ptr [ IntUToStrConsts + To_ASCIIw ]          // Digits to ASCII
  shufps    xmm0, xmm0, $4E
  movdqu    [ eax - 16 ], xmm0    // And save 8 digits at once
  ret
@Medium:
  cmp       eax, 100
  jb        @Small
  //        eax 2..8 digits, edx = pointer
  // Emit 2..8 digits
  movd      xmm1, eax          // xmm1 = Value
  movdqa    xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
  pmuludq   xmm0, xmm1
  psrlq     xmm0, 45           // xmm0 = xmm1 div 10000
  pmuludq   xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
  paddd     xmm0, xmm1         // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
  psllq     xmm0, 2
  punpcklwd xmm0, xmm0
  punpckldq xmm0, xmm0         // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
  movdqa    xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ]            // xmm2 := xmm0 * 10; shift to left one word.
  pmullw    xmm2, xmm0
  psllq     xmm2, 16
  psubw     xmm0, xmm2         // Extract digits

  movdqa    xmm1, dqword ptr [ IntUToStrConsts + To_ASCIIw ]          // Digits to ASCII
  por       xmm0, xmm1
  shufps    xmm0, xmm0, $4E
  // Now we have 8 Unicode characters in the xmm0 register in the correct order.
  pcmpeqw   xmm1, xmm0         // scan for zeroes.
  pmovmskb  eax, xmm1
  packuswb  xmm0, xmm0         // convert to bytes
  xor       eax, $FFFF         // change polarity
  bsf       eax, eax           // amount to shift in bytes.
  lea       ecx, [ eax * 4 ]
  movd      xmm1, ecx
  psrlq     xmm0, xmm1         // bytes shifted.
  pxor      xmm2, xmm2
  punpcklbw xmm0, xmm2
  neg       eax
  movdqu    dqword ptr [ edx ], xmm0
  lea       eax, [ edx + 16 + eax ]
  ret
@Small:
  //        eax 1..2 digits, edx = pointer
  // Emit one or two digits
  mov       eax, dword ptr [ DigitsClippedW + eax * 4 ]
  mov       [ edx ], eax
  cmp       eax, $10000
  setae     al
  movzx     eax, al
  lea       eax, [ edx + eax * 2 + 2 ]
end;
{$else if defined( CPUX64 )}
asm
  cmp       ecx, 100000000
  jb        @Medium
@Large:
  mov       r8,  rdx                  // r8 = pointer

  // Split up low 8 digits from high 1 or 2 digits..
  mov       eax, ecx
  mov       r9,  12379400392853802749 // RoundUp( 2^64+26 / 1e8 )
  mul       rax, r9
  shr       rdx, 26
  mov       r10, rdx                  // r10 = eax div 1e8
  mov       rax, rdx
  mov       r9,  100000000
  mul       rax, r9
  sub       ecx, eax                  // ecx = eax mod 1e8

  // Emit the first 2 digits
  lea       r9, [ DigitsClippedW ]
  mov       eax, dword ptr [ r9 + r10 * 4 ]
  mov       dword ptr [ r8 ], eax
  // advance pointer ( also for the next 8 bytes)
  cmp       eax, $10000
  setae     al
  movzx     rax, al
  lea       rax, [ rax * 2 + r8 + 2 + 16 ]

  // ecx = 8 digit value, r8 = pointer + 8
  movd      xmm1, ecx          // xmm1 = Value
  movdqa    xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
  pmuludq   xmm0, xmm1
  psrlq     xmm0, 45           // xmm0 = xmm1 div 10000
  pmuludq   xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
  paddd     xmm0, xmm1         // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
  psllq     xmm0, 2
  punpcklwd xmm0, xmm0
  punpckldq xmm0, xmm0         // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
  movdqa    xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ]            // xmm2 := xmm0 * 10; shift to left one word.
  pmullw    xmm2, xmm0
  psllq     xmm2, 16
  psubw     xmm0, xmm2         // Extract digits
  por       xmm0, dqword ptr [ IntUToStrConsts + To_ASCIIw ]          // Digits to ASCII
  shufps    xmm0, xmm0, $4E
  movdqu    [ rax - 16 ], xmm0    // And save 8 digits at once
  ret
@Medium:
  cmp       ecx, 100
  jb        @Small
  //        eax 2..8 digits, rdx = pointer
  //        Emit 2..8 digits
  movd      xmm1, ecx          // xmm1 = Value
  movdqa    xmm0, dqword ptr [ IntUToStrConsts + Div10000_Shl45d ]
  pmuludq   xmm0, xmm1
  psrlq     xmm0, 45           // xmm0 = xmm1 div 10000
  pmuludq   xmm0, dqword ptr [ IntUToStrConsts + Shl16_minus_10000d ]
  paddd     xmm0, xmm1         // xmm0 = word( lo digits ), word( hi digit ), 0 (6x)
  psllq     xmm0, 2
  punpcklwd xmm0, xmm0
  punpckldq xmm0, xmm0         // xmm0 *= 4 (lo, lo, lo, lo, hi, hi, hi, hi)W (LSW, MSW)
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Div_1000_100_10_1w ]
  pmulhuw   xmm0, dqword ptr [ IntUToStrConsts + Shl_1000_100_10_1w ] // xmm0 = ( lo, lo div 10, lo div 100, lo div 100, (same with hi) )W
  movdqa    xmm2, dqword ptr [ IntUToStrConsts + Mul_10w ]            // xmm2 := xmm0 * 10; shift to left one word.
  pmullw    xmm2, xmm0
  psllq     xmm2, 16
  psubw     xmm0, xmm2         // Extract digits
  movdqa    xmm1, dqword ptr [ IntUToStrConsts + To_ASCIIw ]          // Digits to ASCII
  por       xmm0, xmm1
  shufps    xmm0, xmm0, $4E
  // Now we have 8 Unicode characters in the xmm0 register in the correct order.
  pcmpeqw   xmm1, xmm0         // scan for zeroes.
  pmovmskb  eax, xmm1
  packuswb  xmm0, xmm0         // convert to bytes
  xor       eax, $FFFF         // change polarity
  bsf       eax, eax           // amount to shift in bytes.
  lea       ecx, [ eax * 4 ]
  movd      xmm1, ecx
  psrlq     xmm0, xmm1         // bytes shifted.
  pxor      xmm2, xmm2
  punpcklbw xmm0, xmm2
  neg       rax
  movdqu    dqword ptr [ rdx ], xmm0
  lea       rax, [ rdx + 16 + rax ]
  ret
@Small:
  //        ecx 1..2 digits, rdx = pointer
  // Emit one or two digits
  lea       r9, [ DigitsClippedW ]
  mov       eax, dword ptr [ r9 + rcx * 4 ]
  mov       [ rdx ], eax
  cmp       eax, $10000
  setae     al
  movzx     rax, al
  lea       rax, [ rdx + rax * 2 + 2 ]
end;
{$else}
begin
  Assert( False, 'Not implemented.' );
end;
{$ifend}

In SynCommons.pas , you have also the following function: SynCommons.pas中 ,您还具有以下功能:

function IntToString(Value: integer): string;
var tmp: array[0..15] of AnsiChar;
    P: PAnsiChar;
begin
  P := StrInt32(@tmp[15],Value);
  Ansi7ToString(PWinAnsiChar(P),@tmp[15]-P,result);
end;

I suspect it will be also fast, even on Win64 platform. 我怀疑它也会很快,即使在Win64平台上也是如此。 Slower than asm, but fast enough for small numbers (which tends to be most of the integer in the wild). 比asm慢,但对于小数字来说足够快(这通常是野外的大部分整数)。

There will be only one memory allocation in this function, which is pretty fast even on Win64, thanks to the updated version of FastMM4 , which has its own optimized x64 asm. 由于FastMM4的更新版本具有自己的优化x64 asm,因此在此功能中只有一个内存分配,即使在Win64上也非常快。

In my opinion, the key way to improve performance is to avoid heap allocations. 在我看来,提高性能的关键方法是避免堆分配。 The time spent by IntToStr doing the allocations is greater than the time spent doing the decimal conversion. IntToStr进行分配所花费的时间大于进行十进制转换所花费的时间。 And if you are wanting to use multiple threads then this is even more important because the default Delphi memory manager does not scale well under thread contention. 如果你想使用多个线程,那么这更为重要,因为默认的Delphi内存管理器在线程争用下不能很好地扩展。

It's true that the decimal conversion can also be optimised, but I always try to optimise by picking off the low-hanging fruit first. 确实,十进制转换也可以进行优化,但我总是首先尝试通过挑选低悬的果实进行优化。

So, for the sake of completeness, in case these functions prove useful to others, here are my routines for heap allocation free integer to string conversion: 因此,为了完整起见,如果这些函数对其他函数有用,这里是我的例程,用于堆分配自由整数到字符串转换:

procedure DivMod(Dividend, Divisor: Cardinal; out Quotient, Remainder: Cardinal);
{$IFDEF CPUX86}
asm
        PUSH  EBX
        MOV   EBX,EDX
        XOR   EDX,EDX
        DIV   EBX
        MOV   [ECX],EAX
        MOV   EBX,Remainder
        MOV   [EBX],EDX
        POP   EBX
end;
{$ELSE IF Defined(CPUX64)}
asm
        .NOFRAME
        MOV   EAX,ECX
        MOV   ECX,EDX
        XOR   EDX,EDX
        DIV   ECX
        MOV   [R8],EAX
        MOV   [R9],EDX
end;
{$ELSE}
  {$Message Error 'Unrecognised platform.'}
{$ENDIF}

{$IFOPT R+}
  {$DEFINE RANGECHECKSON}
  {$R-}
{$ENDIF}
{$IFOPT Q+}
  {$DEFINE OVERFLOWCHECKSON}
  {$Q-}
{$ENDIF}

// disable range checks and overflow checks so that abs() functions in case Value = low(Value)

function CopyIntegerToAnsiBuffer(const Value: Integer; var Buffer: array of AnsiChar): Integer;
var
  i, j: Integer;
  val, remainder: Cardinal;
  negative: Boolean;
  tmp: array [0..15] of AnsiChar;
begin
  negative := Value<0;
  val := abs(Value);
  Result := 0;
  repeat
    DivMod(val, 10, val, remainder);
    tmp[Result] := AnsiChar(remainder + ord('0'));
    inc(Result);
  until val=0;
  if negative then begin
    tmp[Result] := '-';
    inc(Result);
  end;
  Assert(Result<=Length(Buffer));

  i := 0;
  j := Result-1;
  while i<Result do begin
    Buffer[i] := tmp[j];
    inc(i);
    dec(j);
  end;
end;

function CopyInt64ToAnsiBuffer(const Value: Int64; var Buffer: array of AnsiChar): Integer;
var
  i, j: Integer;
  val, remainder: UInt64;
  negative: Boolean;
  tmp: array [0..23] of AnsiChar;
begin
  negative := Value<0;
  val := abs(Value);
  Result := 0;
  repeat
    DivMod(val, 10, val, remainder);
    tmp[Result] := AnsiChar(remainder + ord('0'));
    inc(Result);
  until val=0;
  if negative then begin
    tmp[Result] := '-';
    inc(Result);
  end;
  Assert(Result<=Length(Buffer));

  i := 0;
  j := Result-1;
  while i<Result do begin
    Buffer[i] := tmp[j];
    inc(i);
    dec(j);
  end;
end;

{$IFDEF RANGECHECKSON}
  {$R+}
  {$UNDEF RANGECHECKSON}
{$ENDIF}
{$IFDEF OVERFLOWCHECKSON}
  {$Q+}
  {$UNDEF OVERFLOWCHECKSON}
{$ENDIF}

My use case requires an array of AnsiChar , but it is of course simple to amend these functions to populate WideChar arrays. 我的用例需要一个AnsiChar数组,但修改这些函数来填充WideChar数组当然很简单。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM