繁体   English   中英

哪个函数在执行时间方面更快? beta() 还是 alpha()?

[英]Which of the functions is faster in terms of the execution time? beta() or alpha()?

您希望哪个更快? (假设数组a[100]b[100]是初始化的全局变量)

void beta(){

    int i;
    for (i=0;i<100;i++){
    a[i] = a[i] + b[i];
    }

}

void alpha(){

    int i=0;
    while (i<100){
    a[i] += b[i++];
    a[i] += b[i++];
    a[i] += b[i++];
    a[i] += b[i++];
    }

}

为了避免 UB,我重写了alpha函数:

void alpha(){

    int i=0;
    while (i<100)
    {
        a[i] += b[i];
        i++;
        a[i] += b[i];
        i++;
        a[i] += b[i];
        i++;
        a[i] += b[i];
        i++;
    }
}

生成的代码取决于平台:

对于 x86,它完全一样。

beta:
        xor     eax, eax
.L2:
        movdqa  xmm0, XMMWORD PTR a[rax]
        paddd   xmm0, XMMWORD PTR b[rax]
        add     rax, 16
        movaps  XMMWORD PTR a[rax-16], xmm0
        cmp     rax, 400
        jne     .L2
        ret
alpha:
        xor     eax, eax
.L6:
        movdqa  xmm0, XMMWORD PTR a[rax]
        paddd   xmm0, XMMWORD PTR b[rax]
        add     rax, 16
        movaps  XMMWORD PTR a[rax-16], xmm0
        cmp     rax, 400
        jne     .L6
        ret
b:
        .zero   400
a:
        .zero   400

但如果我们考虑 ARM Cortex,alpha 的执行速度会更快。

beta:
        ldr     r3, .L6
        ldr     r1, .L6+4
        add     ip, r3, #400
.L2:
        ldr     r2, [r3, #4]!
        ldr     r0, [r1, #4]!
        cmp     r3, ip
        add     r2, r2, r0
        str     r2, [r3]
        bne     .L2
        bx      lr
.L6:
        .word   a-4
        .word   b-4
alpha:
        ldr     r3, .L13
        ldr     r2, .L13+4
        push    {r4, r5, r6, r7, r8, lr}
        add     r7, r3, #400
.L9:
        ldr     lr, [r3]
        ldr     ip, [r3, #4]
        ldr     r0, [r3, #8]
        ldr     r1, [r3, #12]
        ldr     r8, [r2]
        ldr     r6, [r2, #4]
        ldr     r5, [r2, #8]
        ldr     r4, [r2, #12]
        add     lr, lr, r8
        add     ip, ip, r6
        add     r0, r0, r5
        add     r1, r1, r4
        str     lr, [r3]
        str     ip, [r3, #4]
        str     r0, [r3, #8]
        str     r1, [r3, #12]
        add     r3, r3, #16
        cmp     r3, r7
        add     r2, r2, #16
        bne     .L9
        pop     {r4, r5, r6, r7, r8, pc}
.L13:
        .word   a
        .word   b

所以一般的答案是:始终对代码进行基准测试

https://godbolt.org/z/sWjqE1

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM