简体   繁体   English

为什么 GCC -Ofast 会使程序出错,但只有在它打印两次结果时才会出错?

[英]Why GCC -Ofast makes the program wrong but only when it prints the result twice?

Recompiling an old program made it output the wrong result.重新编译旧程序使其 output 结果错误。 I'd like to know why .我想知道为什么

I know that -Ofast may "disregard strict standards compliance" but I'm curious about what happens under the hood.我知道-Ofast可能“无视严格的标准合规性”,但我很好奇幕后发生的事情。

I reduced the program to this minimal example foo1.c :我将程序简化为这个最小的示例foo1.c

#include <stdio.h>

double my_pow(double x, unsigned n)
{ /* returns x^n */
        double y = 1;

        while(n--) y *= x;
        return y;
}

void foo(double small)
{ /* prints small^19 */
        double x = my_pow(small,19);

        printf("%E\n",x);
        printf("%E\n",x);

}

int main(void)
{
        foo(1-0.8-0.2);

        return 0;
}

When compiled with -Ofast it gives a different output than with any other optimization level.当使用-Ofast编译时,它给出的 output 与任何其他优化级别不同。

gcc -Ofast foo1.c &&./a.out : gcc -Ofast foo1.c &&./a.out

-0.000000E+00
-0.000000E+00

gcc foo1.c &&./a.out : gcc foo1.c &&./a.out

-1.390671E-309
-1.390671E-309

A strange fact is that when one of the printf is commented out (file foo2.c ) this behavior doesn't replicate making it a sort of heisenbug.一个奇怪的事实是,当其中一个printf被注释掉(文件foo2.c )时,这种行为不会复制,使其成为一种海森错误。

gcc -Ofast foo2.c &&./a.out : gcc -Ofast foo2.c &&./a.out

-1.390671E-309

gcc foo2.c &&./a.out : gcc foo2.c &&./a.out

-1.390671E-309

Informations that might be useful:可能有用的信息:

gcc -v : gcc -v

Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/4.8.5/lto-wrapper
Target: x86_64-redhat-linux
Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-bootstrap --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu --enable-languages=c,c++,objc,obj-c++,java,fortran,ada,go,lto --enable-plugin --enable-initfini-array --disable-libgcj --with-isl=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/isl-install --with-cloog=/builddir/build/BUILD/gcc-4.8.5-20150702/obj-x86_64-redhat-linux/cloog-install --enable-gnu-indirect-function --with-tune=generic --with-arch_32=x86-64 --build=x86_64-redhat-linux
Thread model: posix
gcc version 4.8.5 20150623 (Red Hat 4.8.5-39) (GCC)

gcc -Ofast foo1.c -S -o - : gcc -Ofast foo1.c -S -o -

    .file   "foo1.c"
    .text
    .p2align 4,,15
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB11:
    .cfi_startproc
    testl   %edi, %edi
    leal    -1(%rdi), %edx
    je  .L10
    movl    %edi, %ecx
    shrl    %ecx
    movl    %ecx, %esi
    addl    %esi, %esi
    je  .L11
    cmpl    $9, %edi
    jbe .L11
    movapd  %xmm0, %xmm1
    movapd  .LC0(%rip), %xmm2
    xorl    %eax, %eax
    unpcklpd    %xmm1, %xmm1
.L9:
    addl    $1, %eax
    mulpd   %xmm1, %xmm2
    cmpl    %eax, %ecx
    ja  .L9
    movapd  %xmm2, -24(%rsp)
    subl    %esi, %edx
    cmpl    %esi, %edi
    movsd   -16(%rsp), %xmm1
    mulsd   %xmm2, %xmm1
    je  .L2
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
.L35:
    cmpl    $1, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $2, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $3, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $4, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $5, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $6, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $7, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    mulsd   %xmm0, %xmm1
    .p2align 4,,10
    .p2align 3
.L2:
    movapd  %xmm1, %xmm0
    ret
    .p2align 4,,10
    .p2align 3
.L11:
    movsd   .LC1(%rip), %xmm1
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    jmp .L35
    .p2align 4,,10
    .p2align 3
.L10:
    movsd   .LC1(%rip), %xmm1
    jmp .L2
    .cfi_endproc
.LFE11:
    .size   my_pow, .-my_pow
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC2:
    .string "%E\n"
    .text
    .p2align 4,,15
    .globl  foo
    .type   foo, @function
foo:
.LFB12:
    .cfi_startproc
    movapd  %xmm0, %xmm2
    subq    $24, %rsp
    .cfi_def_cfa_offset 32
    movl    $.LC2, %edi
    movl    $1, %eax
    unpcklpd    %xmm2, %xmm2
    movapd  %xmm2, %xmm1
    mulpd   %xmm2, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm2, %xmm1
    movapd  %xmm1, %xmm2
    unpckhpd    %xmm1, %xmm1
    mulsd   %xmm1, %xmm2
    mulsd   %xmm0, %xmm2
    movapd  %xmm2, %xmm0
    movsd   %xmm2, 8(%rsp)
    call    printf
    movsd   8(%rsp), %xmm2
    movl    $.LC2, %edi
    movl    $1, %eax
    addq    $24, %rsp
    .cfi_def_cfa_offset 8
    movapd  %xmm2, %xmm0
    jmp printf
    .cfi_endproc
.LFE12:
    .size   foo, .-foo
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB13:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movsd   .LC3(%rip), %xmm0
    call    foo
    xorl    %eax, %eax
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE13:
    .size   main, .-main
    .section    .rodata.cst16,"aM",@progbits,16
    .align 16
.LC0:
    .long   0
    .long   1072693248
    .long   0
    .long   1072693248
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC1:
    .long   0
    .long   1072693248
    .align 8
.LC3:
    .long   0
    .long   -1131413504
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc foo1.c -S -o - : gcc foo1.c -S -o -

    .file   "foo1.c"
    .text
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB0:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movsd   %xmm0, -24(%rbp)
    movl    %edi, -28(%rbp)
    movabsq $4607182418800017408, %rax
    movq    %rax, -8(%rbp)
    jmp .L2
.L3:
    movsd   -8(%rbp), %xmm0
    mulsd   -24(%rbp), %xmm0
    movsd   %xmm0, -8(%rbp)
.L2:
    movl    -28(%rbp), %eax
    leal    -1(%rax), %edx
    movl    %edx, -28(%rbp)
    testl   %eax, %eax
    jne .L3
    movq    -8(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   my_pow, .-my_pow
    .section    .rodata
.LC1:
    .string "%E\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
.LFB1:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movsd   %xmm0, -24(%rbp)
    movq    -24(%rbp), %rax
    movl    $19, %edi
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    call    my_pow
    movsd   %xmm0, -32(%rbp)
    movq    -32(%rbp), %rax
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movabsq $-4859383997932765184, %rax
    movq    %rax, -8(%rbp)
    movsd   -8(%rbp), %xmm0
    call    foo
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc -Ofast foo2.c -S -o - : gcc -Ofast foo2.c -S -o -

    .file   "foo2.c"
    .text
    .p2align 4,,15
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB11:
    .cfi_startproc
    testl   %edi, %edi
    leal    -1(%rdi), %edx
    je  .L10
    movl    %edi, %ecx
    shrl    %ecx
    movl    %ecx, %esi
    addl    %esi, %esi
    je  .L11
    cmpl    $9, %edi
    jbe .L11
    movapd  %xmm0, %xmm1
    movapd  .LC0(%rip), %xmm2
    xorl    %eax, %eax
    unpcklpd    %xmm1, %xmm1
.L9:
    addl    $1, %eax
    mulpd   %xmm1, %xmm2
    cmpl    %eax, %ecx
    ja  .L9
    movapd  %xmm2, -24(%rsp)
    subl    %esi, %edx
    cmpl    %esi, %edi
    movsd   -16(%rsp), %xmm1
    mulsd   %xmm2, %xmm1
    je  .L2
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
.L35:
    cmpl    $1, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $2, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $3, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $4, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $5, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $6, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    cmpl    $7, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    mulsd   %xmm0, %xmm1
    .p2align 4,,10
    .p2align 3
.L2:
    movapd  %xmm1, %xmm0
    ret
    .p2align 4,,10
    .p2align 3
.L11:
    movsd   .LC1(%rip), %xmm1
    testl   %edx, %edx
    mulsd   %xmm0, %xmm1
    je  .L2
    jmp .L35
    .p2align 4,,10
    .p2align 3
.L10:
    movsd   .LC1(%rip), %xmm1
    jmp .L2
    .cfi_endproc
.LFE11:
    .size   my_pow, .-my_pow
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC2:
    .string "%E\n"
    .text
    .p2align 4,,15
    .globl  foo
    .type   foo, @function
foo:
.LFB12:
    .cfi_startproc
    movapd  %xmm0, %xmm2
    movl    $.LC2, %edi
    movl    $1, %eax
    unpcklpd    %xmm2, %xmm2
    movapd  %xmm2, %xmm1
    mulpd   %xmm2, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm1, %xmm1
    mulpd   %xmm2, %xmm1
    movapd  %xmm1, %xmm2
    unpckhpd    %xmm1, %xmm1
    mulsd   %xmm1, %xmm2
    mulsd   %xmm0, %xmm2
    movapd  %xmm2, %xmm0
    jmp printf
    .cfi_endproc
.LFE12:
    .size   foo, .-foo
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB13:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movl    $.LC2, %edi
    movl    $1, %eax
    movsd   .LC3(%rip), %xmm0
    call    printf
    xorl    %eax, %eax
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE13:
    .size   main, .-main
    .section    .rodata.cst16,"aM",@progbits,16
    .align 16
.LC0:
    .long   0
    .long   1072693248
    .long   0
    .long   1072693248
    .section    .rodata.cst8,"aM",@progbits,8
    .align 8
.LC1:
    .long   0
    .long   1072693248
    .align 8
.LC3:
    .long   0
    .long   -2147418112
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

gcc foo2.c -S -o - : gcc foo2.c -S -o -

    .file   "foo2.c"
    .text
    .globl  my_pow
    .type   my_pow, @function
my_pow:
.LFB0:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movsd   %xmm0, -24(%rbp)
    movl    %edi, -28(%rbp)
    movabsq $4607182418800017408, %rax
    movq    %rax, -8(%rbp)
    jmp .L2
.L3:
    movsd   -8(%rbp), %xmm0
    mulsd   -24(%rbp), %xmm0
    movsd   %xmm0, -8(%rbp)
.L2:
    movl    -28(%rbp), %eax
    leal    -1(%rax), %edx
    movl    %edx, -28(%rbp)
    testl   %eax, %eax
    jne .L3
    movq    -8(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   my_pow, .-my_pow
    .section    .rodata
.LC1:
    .string "%E\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
.LFB1:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $32, %rsp
    movsd   %xmm0, -24(%rbp)
    movq    -24(%rbp), %rax
    movl    $19, %edi
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    call    my_pow
    movsd   %xmm0, -32(%rbp)
    movq    -32(%rbp), %rax
    movq    %rax, -8(%rbp)
    movq    -8(%rbp), %rax
    movq    %rax, -32(%rbp)
    movsd   -32(%rbp), %xmm0
    movl    $.LC1, %edi
    movl    $1, %eax
    call    printf
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movabsq $-4859383997932765184, %rax
    movq    %rax, -8(%rbp)
    movsd   -8(%rbp), %xmm0
    call    foo
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (GNU) 4.8.5 20150623 (Red Hat 4.8.5-39)"
    .section    .note.GNU-stack,"",@progbits

Under -ffast-math (and it's siblings like -Ofast ) gcc links your app with a special startup code in crtfastmath.c which sets flush-to-zero flag:-ffast-math (和它的兄弟姐妹一样-Ofast ) gcc 将您的应用程序与crtfastmath.c中的特殊启动代码链接起来,该代码设置刷新为零标志:

static void __attribute__((constructor))
set_fast_math (void)
{
#ifndef __x86_64__
...
#else
  unsigned int mxcsr = __builtin_ia32_stmxcsr ();
  mxcsr |= MXCSR_DAZ | MXCSR_FTZ;
  __builtin_ia32_ldmxcsr (mxcsr);
#endif
}

(from here ). (从这里)。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM