繁体   English   中英

访问堆栈变量比解引用指针慢?

[英]Accessing stack variable slower than dereferencing pointer?

我运行了两个执行相同操作的测试:添加N个随机数。 Test_1在堆栈上使用int,Test_2在堆上使用int。 出乎意料的是,在我的机器上,Test_1的运行时间约为945毫秒,Test_2的运行时间约为915毫秒(尽管时间可能有所不同,但差异非常一致且非常明显)。 有什么能解释如此重大的差异? 我将g ++与-O2和-O3一起使用。

#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <chrono>

using namespace std;

int main()
{
  constexpr int N = 100000000;
  int x = 0; // TEST_1
  //int* p = new int; //TEST_2
  auto start_time = chrono::high_resolution_clock::now();
  for (int i = 0; i < N; ++i)
  {
    x += rand(); //TEST_1
    //*p += rand(); // TEST_2
  }
  auto end_time = chrono::high_resolution_clock::now();

  cout << x << endl; // TEST_1
  //cout << *p << endl; //TEST_2
  cout << "Time: ";
  cout << chrono::duration_cast<chrono::milliseconds>(end_time - start_time).count() << endl;
}

测试1组装:

    .file   "main.cpp"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "Time: "
    .section    .text.unlikely,"ax",@progbits
.LCOLDB1:
    .section    .text.startup,"ax",@progbits
.LHOTB1:
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB1578:
    .cfi_startproc
    pushq   %r12
    .cfi_def_cfa_offset 16
    .cfi_offset 12, -16
    pushq   %rbp
    .cfi_def_cfa_offset 24
    .cfi_offset 6, -24
    xorl    %ebp, %ebp
    pushq   %rbx
    .cfi_def_cfa_offset 32
    .cfi_offset 3, -32
    movl    $100000000, %ebx
    call    _ZNSt6chrono3_V212system_clock3nowEv
    movq    %rax, %r12
    .p2align 4,,10
    .p2align 3
.L2:
    call    rand
    addl    %eax, %ebp
    subl    $1, %ebx
    jne .L2
    call    _ZNSt6chrono3_V212system_clock3nowEv
    movl    %ebp, %esi
    movl    $_ZSt4cout, %edi
    movq    %rax, %rbx
    call    _ZNSolsEi
    movq    %rax, %rdi
    call    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
    movl    $.LC0, %esi
    movl    $_ZSt4cout, %edi
    call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
    movq    %rbx, %rcx
    movabsq $4835703278458516699, %rdx
    movl    $_ZSt4cout, %edi
    subq    %r12, %rcx
    movq    %rcx, %rax
    sarq    $63, %rcx
    imulq   %rdx
    sarq    $18, %rdx
    movq    %rdx, %rsi
    subq    %rcx, %rsi
    call    _ZNSo9_M_insertIlEERSoT_
    movq    %rax, %rdi
    call    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
    popq    %rbx
    .cfi_def_cfa_offset 24
    xorl    %eax, %eax
    popq    %rbp
    .cfi_def_cfa_offset 16
    popq    %r12
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE1578:
    .size   main, .-main
    .section    .text.unlikely
.LCOLDE1:
    .section    .text.startup
.LHOTE1:
    .section    .text.unlikely
.LCOLDB2:
    .section    .text.startup
.LHOTB2:
    .p2align 4,,15
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1743:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movl    $_ZStL8__ioinit, %edi
    call    _ZNSt8ios_base4InitC1Ev
    movl    $__dso_handle, %edx
    movl    $_ZStL8__ioinit, %esi
    movl    $_ZNSt8ios_base4InitD1Ev, %edi
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    jmp __cxa_atexit
    .cfi_endproc
.LFE1743:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .text.unlikely
.LCOLDE2:
    .section    .text.startup
.LHOTE2:
    .section    .init_array,"aw"
    .align 8
    .quad   _GLOBAL__sub_I_main
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .hidden __dso_handle
    .ident  "GCC: (Ubuntu 4.9.1-16ubuntu6) 4.9.1"
    .section    .note.GNU-stack,"",@progbits

Test_2组装

    .file   "main.cpp"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "Time: "
    .section    .text.unlikely,"ax",@progbits
.LCOLDB1:
    .section    .text.startup,"ax",@progbits
.LHOTB1:
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB1578:
    .cfi_startproc
    pushq   %r12
    .cfi_def_cfa_offset 16
    .cfi_offset 12, -16
    pushq   %rbp
    .cfi_def_cfa_offset 24
    .cfi_offset 6, -24
    movl    $4, %edi
    pushq   %rbx
    .cfi_def_cfa_offset 32
    .cfi_offset 3, -32
    movl    $100000000, %ebx
    call    _Znwm
    movq    %rax, %rbp
    call    _ZNSt6chrono3_V212system_clock3nowEv
    movq    %rax, %r12
    .p2align 4,,10
    .p2align 3
.L2:
    call    rand
    addl    %eax, 0(%rbp)
    subl    $1, %ebx
    jne .L2
    call    _ZNSt6chrono3_V212system_clock3nowEv
    movl    0(%rbp), %esi
    movl    $_ZSt4cout, %edi
    movq    %rax, %rbx
    call    _ZNSolsEi
    movq    %rax, %rdi
    call    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
    movl    $.LC0, %esi
    movl    $_ZSt4cout, %edi
    call    _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
    movq    %rbx, %rcx
    movabsq $4835703278458516699, %rdx
    movl    $_ZSt4cout, %edi
    subq    %r12, %rcx
    movq    %rcx, %rax
    sarq    $63, %rcx
    imulq   %rdx
    sarq    $18, %rdx
    movq    %rdx, %rsi
    subq    %rcx, %rsi
    call    _ZNSo9_M_insertIlEERSoT_
    movq    %rax, %rdi
    call    _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
    popq    %rbx
    .cfi_def_cfa_offset 24
    xorl    %eax, %eax
    popq    %rbp
    .cfi_def_cfa_offset 16
    popq    %r12
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE1578:
    .size   main, .-main
    .section    .text.unlikely
.LCOLDE1:
    .section    .text.startup
.LHOTE1:
    .section    .text.unlikely
.LCOLDB2:
    .section    .text.startup
.LHOTB2:
    .p2align 4,,15
    .type   _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1743:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movl    $_ZStL8__ioinit, %edi
    call    _ZNSt8ios_base4InitC1Ev
    movl    $__dso_handle, %edx
    movl    $_ZStL8__ioinit, %esi
    movl    $_ZNSt8ios_base4InitD1Ev, %edi
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
    jmp __cxa_atexit
    .cfi_endproc
.LFE1743:
    .size   _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
    .section    .text.unlikely
.LCOLDE2:
    .section    .text.startup
.LHOTE2:
    .section    .init_array,"aw"
    .align 8
    .quad   _GLOBAL__sub_I_main
    .local  _ZStL8__ioinit
    .comm   _ZStL8__ioinit,1,1
    .hidden __dso_handle
    .ident  "GCC: (Ubuntu 4.9.1-16ubuntu6) 4.9.1"
    .section    .note.GNU-stack,"",@progbits

使用优化的编译器,无论变量在内存中的位置如何,对变量的访问都应该相同。 编译器可以设置一个指针,然后取消对该指针的引用以获取该值。

其他因素:

  • 变量超出了程序的空间,因此OS必须加载到正确的内存页面中。
  • 硬件对于不同的内存地址范围可能具有不同的内存访问时间。
  • 处理器的数据缓存通过存储值来歪曲结果。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM