繁体   English   中英

装配MMX点产品细分错误

[英]Assembly MMX Dotproduct Segmentation fault

我正在尝试使用汇编语言对两个小数组执行简单的dotproduct计算。 这是我的代码:

#include <cstdio>
#include <cstdint>
#include <cstdlib>


void fillArray(int16_t* a, int16_t* b, int n){
    std::srand(1);
        int i = 0;
        do
        {
            a[i] = rand() % 50;
            b[i] = rand() % 10;
            i++;
        } while (i < n);
}

void printArray(int16_t* a, int16_t* b, int n){
    int i = 0;
    do
    {
        printf("a[%d]: %d; b[%d]: %d\n", i, a[i], i, b[i]);
        i++;
    } while (i < n);
}
//control operation
int16_t dotCpp(int16_t* a, int16_t* b, int n){
    int16_t dotProd;
    int i = 0; 
    do
    {
        dotProd += a[i] * b[i];
        i++;
    } while (i < n);
    return dotProd;
}

extern "C" void dotAsm_(int16_t* a, int16_t* b, int16_t *dotProd);

//dotAsm_ file
section .data


section .text

        global  dotAsm_

dotAsm_:

        push        ebp
        mov         ebp, esp

        mov         eax, [ebp+8]    ;load a
        mov         ebx, [ebp+12]   ;load b
        mov         ecx, [ebp+16]   ;load address of dotProd

        movq        mm0, [eax]      ;move content of eax to mm0
        movq        mm1, [ebx]      ;move content of ebx to mm1

        movq        mm2, mm0        ;copy mm0

        pmaddwd     mm2, mm1        ;multiply and add
        movq        mm3, mm2        ;copy mm2 to mm3
        psrlq       mm3, 32         ;shift mm3 by 32 bits to the right putting the higher-order bits into the lower-order bits
        paddd       mm2, mm3        ;add lower-order bits saving result in mm2

        punpcklwd   mm4, mm2        ;unpack the lower order bits
        psrld       mm4, 16         ;shift right by 16 bits, get the result of the addition

        movq        [ecx], mm4      ;move result back to the register

        pop         ebp
        emms
        ret

int main(int argc, char *argv[])
{

    int n = 4; 
    int16_t sum = 0;
    int16_t *dot;
    int16_t a[n], b[n];

    fillArray(a, b, n);
    printArray(a, b, n);
    sum = dotCpp(a, b,  n);
    printf("dotprod: %d\n", sum);


   dotAsm_(a, b, dot);
    // printf("ASM dotprod: %i\n", &dot);

   return 0;
}

从makefile编译:

CXX = g++
CXXFLGS = -g -Wall -std=c++11
SRC = main.o innerProd.o 
EXEC = innerProd

$(EXEC): $(SRC)
    $(CXX) $(CXXFLGS) $(SRC) -o $(EXEC)

innerProd.o: innerProd.asm
    nasm -f elf -F stabs innerProd.asm -o innerProd.o

main.o: innerProd.cpp
    $(CXX) $(CXXFLGS) -c innerProd.cpp -o main.o

结果是:

a[0]: 33; b[0]: 6
a[1]: 27; b[1]: 5
a[2]: 43; b[2]: 5
a[3]: 36; b[3]: 2
dotprod: 620
Segmentation fault (core dumped)

使用gdb分析原因显示以下内容:

//having successfully performed the neccessary calculations:
mm4 {uint64 = 0x26c, v2_int32 = {0x26c, 0x0} ...}
//hence the correct result 0x26c = (dec) 620 has been obtained however loading it back
//into the register causes the segmentation fault.
(gdb) ni
Program received signal SIGSEGV, Segmentation fault.
0x08048709 in dotAsm_ ()

我不知道为什么不能将结果移回寄存器。 任何建议都将受到高度赞赏。

预先多谢您。

文森特

问题不在于汇编代码,而在于main

int16_t *dot;

这是一个未初始化的指针; 它可以指向任何地方,通常意味着指向的不是您的随机地址。 因此,此处出现段错误:

movq        [ecx], mm4

最快的解决方案是更换

int16_t *dot;

通过:

int16_t dot[1];

尽管我更倾向于使点成为直接的整数变量:

int16_t dot;

然后将其地址传递给汇编例程:

dotAsm_(a, b, &dot);
printf("ASM dotprod: %i\n", dot);

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM