在Delphi 2010中使用FPC .o文件

Question

我已經為Delphi和FPC的矩陣運算編寫了一個很大的庫。 現在，該庫有一個針對Intel AVX擴展的擴展，但是我只能設法在FPC中進行編譯。 我的想法是在包含AVX匯編代碼的FPC中創建.o文件，並在Delphi中包含這些文件。 我在這里嘗試遵循此問題：將FPC .o文件鏈接到Delphi中

但沒有成功。 我能夠轉儲函數名稱，並嘗試將其導入Delphi單元中。 問題是，我總是會收到一條錯誤消息，指出.o文件的格式錯誤。

我使用CodeTyphoon進行編譯，該編譯器內部首次使用FPC 3.1.1和Delphi2010。

使用適當的ifdefs在FPC中編譯一次代碼，在Delphi中編譯一次。

我的基本代碼如下（僅摘錄）：

// ###################################################################
// #### This file is part of the mathematics library project, and is
// #### offered under the licence agreement described on
// #### http://www.mrsoft.org/
// ####
// #### Copyright:(c) 2011, Michael R. . All rights reserved.
// ####
// #### Unless required by applicable law or agreed to in writing, software
// #### distributed under the License is distributed on an "AS IS" BASIS,
// #### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// #### See the License for the specific language governing permissions and
// #### limitations under the License.
// ###################################################################


unit AVXMatrixMultOperations;

interface

{$IFDEF CPUX64}
{$DEFINE x64}
{$ENDIF}
{$IFDEF cpux86_64}
{$DEFINE x64}
{$ENDIF}
{$IFNDEF x64}

uses MatrixConst;

{$IFNDEF FPC}
// this fails -> wrong object format
{$L '.\AVXPrecompiled\win32\AVXMatrixMultOperations.o'}
{$ENDIF}

// full matrix operations
procedure AVXMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeInt; mt1, mt2 : PDouble; width1, height1, width2, height2 : TASMNativeInt; const LineWidth1, LineWidth2 : TASMNativeInt);
{$IFNDEF FPC} external '' name 'AVXMATRIXMULTOPERATIONS_$$_AVXMATRIXMULTALIGNED$crc2A67AB04'; {$ENDIF}

{$ENDIF}

implementation

{$IFDEF FPC} {$ASMMODE intel} {$ENDIF}

{$IFNDEF x64}

{$IFDEF FPC}

procedure AVXMatrixMultAligned(dest : PDouble; const destLineWidth : TASMNativeInt; mt1, mt2 : PDouble; width1, height1, width2, height2 : TASMNativeInt; const LineWidth1, LineWidth2 : TASMNativeInt);
var bytesWidth2, destOffset : TASMNativeInt;
    iter : TASMNativeInt;
{$IFDEF FPC}
begin
{$ENDIF}
asm
   // prolog - simulate stack
   push ebx;
   push edi;
   push esi;

   mov ecx, dest;

   mov edi, width1;
   imul edi, -8;
   mov iter, edi;

   sub mt1, edi;

   //destOffset := destLineWidth - Width2*sizeof(double);
   mov ebx, Width2;
   shl ebx, 3;
   mov eax, destLineWidth;
   sub eax, ebx;
   mov destOffset, eax;

   //bytesWidth2 := width2*sizeof(double);
   mov bytesWidth2, ebx;

   // for y := 0 to height1 - 1 do
   @@foryloop:

      // r12 -> counter to width2
      mov esi, width2;
      sub esi, 2;
      jl @LastXColumn;

      @@forxloop:
      // for x := 0 to width2 div 2 - 1
          // esi: mt1 - width1*sizeof(double)
          // mt2: mt2
          mov edx, mt1;
          mov ebx, mt2;
          mov eax, iter;
          mov edi, LineWidth2;

          vxorpd ymm0, ymm0, ymm0;
          vxorpd ymm1, ymm1, ymm1;

          cmp eax, -32;
          jg @@Innerloop2Begin;

          // for z := 0 to width1 - 1do
          // AVX part:
          @@InnerLoop1:
             // 4x4 block
             vmovapd xmm2, [ebx];
             add ebx, edi;
             vmovapd xmm4, xmm2;

             vmovapd xmm3, [ebx];
             add ebx, edi;

             // shuffle so we can multiply

             // swap such that we can immediately multiply
             vmovlhps xmm2, xmm2, xmm3;
             vmovhlps xmm3, xmm3, xmm4;

             // next 4 elements
             vmovapd xmm4, [ebx];
             add ebx, edi;
             vmovapd xmm6, xmm4;

             vmovapd xmm5, [ebx];
             add ebx, edi;

             vmovapd ymm7, [edx + eax]

             vmovlhps xmm4, xmm4, xmm5;
             vmovhlps xmm5, xmm5, xmm6;

             vinsertf128 ymm2, ymm2, xmm4, 1;
             vinsertf128 ymm3, ymm3, xmm5, 1;

             // now multiply and add
             vmulpd ymm2, ymm2, ymm7;
             vmulpd ymm3, ymm3, ymm7;

             vaddpd ymm0, ymm0, ymm2;
             vaddpd ymm1, ymm1, ymm3;
          add eax, 32;
          jl @@InnerLoop1;

          vextractf128 xmm2, ymm0, 1;
          vextractf128 xmm3, ymm1, 1;

          vhaddpd xmm0, xmm0, xmm2;
          vhaddpd xmm1, xmm1, xmm3;

          test eax, eax;
          jz @@InnerLoopEnd2;

          @@Innerloop2Begin:

          // rest in single elements
          @@InnerLoop2:
             vmovapd xmm2, [ebx];
             add ebx, edi;

             vmovddup xmm3, [edx + eax];

             vmulpd xmm2, xmm2, xmm3;
             vmovhlps xmm4, xmm4, xmm2;

             vaddsd xmm0, xmm0, xmm2;
             vaddsd xmm1, xmm1, xmm4;
          add eax, 8;
          jnz @@InnerLoop2;

          @@InnerLoopEnd2:

          // finall horizontal addition
          vhaddpd xmm0, xmm0, xmm1;

          vmovapd [ecx], xmm0;

          // increment the pointers
          // inc(mt2), inc(dest);
          //add dword ptr [mt2], 8;
          add mt2, 16;
          add ecx, 16;

      // end for x := 0 to width2 div 2 - 1
      sub esi, 2;
      jge @@forxloop;

      @LastXColumn:

      cmp esi, -1;
      jne @NextLine;

      // last column of mt2
      mov eax, iter;
      mov ebx, mt2;

      vxorpd xmm0, xmm0, xmm0;

      @InnerLoop2:
         vmovsd xmm1, [edx + eax];
         vmovsd xmm2, [ebx];

         vmulsd xmm1, xmm1, xmm2;
         vaddsd xmm0, xmm0, xmm1;

         add ebx, edi;
      add eax, 8;
      jnz @InnerLoop2;

      vmovsd [ecx], xmm0;
      add ecx, 8;
      add mt2, 8;

      @NextLine:
      // dec(mt2, Width2);
      // inc(PByte(mt1), LineWidth1);
      // inc(PByte(dest), destOffset);
      //mov ebx, bytesWidth2;
      //sub dword ptr [mt2], ebx;
      mov eax, bytesWidth2;
      sub mt2, eax;
      mov eax, LineWidth1;
      add mt1, eax;
      add ecx, destOffset;

   // end for y := 0 to height1 - 1
   //dec eax;
   dec height1;
   jnz @@foryloop;

   // epilog
   vzeroupper;

   pop esi;
   pop edi;
   pop ebx;
end;
{$IFDEF FPC}
end;
{$ENDIF}

{$ENDIF}

{$ENDIF}

end.

Answer 1

由於此處涉及單個功能，最簡單的方法是IMHO，直接轉換FPC AVXMatrixMultOperations.o文件。

使用出色的Object文件轉換器工具。

您可以嘗試將一種二進制格式轉換為Delphi接受的另一種格式。

但是我想最干凈的方法是將其轉換為asm：

objconv -fasm AVXMatrixMultOperations.o

它將創建一個AVXMatrixMultOperations.asm文件，該文件可用於通過簡單的db ..,..,..,..字節來替換未知的AVX指令。 通常，生成的.asm文件在左側具有匯編程序，在右側具有原始十六進制字節。

例如，這就是我處理庫中舊的Delphi編譯器的方式：

function crc32csse42(crc: cardinal; buf: PAnsiChar; len: cardinal): cardinal;
asm // eax=crc, edx=buf, ecx=len
        not     eax
        test    ecx, ecx
        jz      @0
        test    edx, edx
        jz      @0
@3:     test    edx, 3
        jz      @8 // align to 4 bytes boundary
        {$ifdef ISDELPHI2010}
        crc32   eax, byte ptr[edx]
        {$else}
        db      $F2, $0F, $38, $F0, $02
        {$endif}
        inc     edx
        ....

所以在你的情況下

{$ifdef FPC}
vinsertf128 ymm2, ymm2, xmm4, 1;
vinsertf128 ymm3, ymm3, xmm5, 1;
{$else}
db $xx,$yy,$zz
db $xx,$yy,$zz
{$endif}

在Delphi 2010中使用FPC .o文件

問題描述

1 個解決方案

解決方案1
2 已采納 2018-02-09 23:24:14

在Delphi 2010中使用FPC .o文件

問題描述

1 個解決方案

解決方案1 2 已采納 2018-02-09 23:24:14

解決方案1
2 已采納 2018-02-09 23:24:14