[英]Accessing stack variable slower than dereferencing pointer?
我运行了两个执行相同操作的测试:添加N个随机数。 Test_1在堆栈上使用int,Test_2在堆上使用int。 出乎意料的是,在我的机器上,Test_1的运行时间约为945毫秒,Test_2的运行时间约为915毫秒(尽管时间可能有所不同,但差异非常一致且非常明显)。 有什么能解释如此重大的差异? 我将g ++与-O2和-O3一起使用。
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <chrono>
using namespace std;
int main()
{
constexpr int N = 100000000;
int x = 0; // TEST_1
//int* p = new int; //TEST_2
auto start_time = chrono::high_resolution_clock::now();
for (int i = 0; i < N; ++i)
{
x += rand(); //TEST_1
//*p += rand(); // TEST_2
}
auto end_time = chrono::high_resolution_clock::now();
cout << x << endl; // TEST_1
//cout << *p << endl; //TEST_2
cout << "Time: ";
cout << chrono::duration_cast<chrono::milliseconds>(end_time - start_time).count() << endl;
}
测试1组装:
.file "main.cpp"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "Time: "
.section .text.unlikely,"ax",@progbits
.LCOLDB1:
.section .text.startup,"ax",@progbits
.LHOTB1:
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB1578:
.cfi_startproc
pushq %r12
.cfi_def_cfa_offset 16
.cfi_offset 12, -16
pushq %rbp
.cfi_def_cfa_offset 24
.cfi_offset 6, -24
xorl %ebp, %ebp
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset 3, -32
movl $100000000, %ebx
call _ZNSt6chrono3_V212system_clock3nowEv
movq %rax, %r12
.p2align 4,,10
.p2align 3
.L2:
call rand
addl %eax, %ebp
subl $1, %ebx
jne .L2
call _ZNSt6chrono3_V212system_clock3nowEv
movl %ebp, %esi
movl $_ZSt4cout, %edi
movq %rax, %rbx
call _ZNSolsEi
movq %rax, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
movl $.LC0, %esi
movl $_ZSt4cout, %edi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
movq %rbx, %rcx
movabsq $4835703278458516699, %rdx
movl $_ZSt4cout, %edi
subq %r12, %rcx
movq %rcx, %rax
sarq $63, %rcx
imulq %rdx
sarq $18, %rdx
movq %rdx, %rsi
subq %rcx, %rsi
call _ZNSo9_M_insertIlEERSoT_
movq %rax, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
popq %rbx
.cfi_def_cfa_offset 24
xorl %eax, %eax
popq %rbp
.cfi_def_cfa_offset 16
popq %r12
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE1578:
.size main, .-main
.section .text.unlikely
.LCOLDE1:
.section .text.startup
.LHOTE1:
.section .text.unlikely
.LCOLDB2:
.section .text.startup
.LHOTB2:
.p2align 4,,15
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1743:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, %edx
movl $_ZStL8__ioinit, %esi
movl $_ZNSt8ios_base4InitD1Ev, %edi
addq $8, %rsp
.cfi_def_cfa_offset 8
jmp __cxa_atexit
.cfi_endproc
.LFE1743:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .text.unlikely
.LCOLDE2:
.section .text.startup
.LHOTE2:
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.ident "GCC: (Ubuntu 4.9.1-16ubuntu6) 4.9.1"
.section .note.GNU-stack,"",@progbits
Test_2组装
.file "main.cpp"
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "Time: "
.section .text.unlikely,"ax",@progbits
.LCOLDB1:
.section .text.startup,"ax",@progbits
.LHOTB1:
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB1578:
.cfi_startproc
pushq %r12
.cfi_def_cfa_offset 16
.cfi_offset 12, -16
pushq %rbp
.cfi_def_cfa_offset 24
.cfi_offset 6, -24
movl $4, %edi
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset 3, -32
movl $100000000, %ebx
call _Znwm
movq %rax, %rbp
call _ZNSt6chrono3_V212system_clock3nowEv
movq %rax, %r12
.p2align 4,,10
.p2align 3
.L2:
call rand
addl %eax, 0(%rbp)
subl $1, %ebx
jne .L2
call _ZNSt6chrono3_V212system_clock3nowEv
movl 0(%rbp), %esi
movl $_ZSt4cout, %edi
movq %rax, %rbx
call _ZNSolsEi
movq %rax, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
movl $.LC0, %esi
movl $_ZSt4cout, %edi
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
movq %rbx, %rcx
movabsq $4835703278458516699, %rdx
movl $_ZSt4cout, %edi
subq %r12, %rcx
movq %rcx, %rax
sarq $63, %rcx
imulq %rdx
sarq $18, %rdx
movq %rdx, %rsi
subq %rcx, %rsi
call _ZNSo9_M_insertIlEERSoT_
movq %rax, %rdi
call _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
popq %rbx
.cfi_def_cfa_offset 24
xorl %eax, %eax
popq %rbp
.cfi_def_cfa_offset 16
popq %r12
.cfi_def_cfa_offset 8
ret
.cfi_endproc
.LFE1578:
.size main, .-main
.section .text.unlikely
.LCOLDE1:
.section .text.startup
.LHOTE1:
.section .text.unlikely
.LCOLDB2:
.section .text.startup
.LHOTB2:
.p2align 4,,15
.type _GLOBAL__sub_I_main, @function
_GLOBAL__sub_I_main:
.LFB1743:
.cfi_startproc
subq $8, %rsp
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, %edx
movl $_ZStL8__ioinit, %esi
movl $_ZNSt8ios_base4InitD1Ev, %edi
addq $8, %rsp
.cfi_def_cfa_offset 8
jmp __cxa_atexit
.cfi_endproc
.LFE1743:
.size _GLOBAL__sub_I_main, .-_GLOBAL__sub_I_main
.section .text.unlikely
.LCOLDE2:
.section .text.startup
.LHOTE2:
.section .init_array,"aw"
.align 8
.quad _GLOBAL__sub_I_main
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.ident "GCC: (Ubuntu 4.9.1-16ubuntu6) 4.9.1"
.section .note.GNU-stack,"",@progbits
使用优化的编译器,无论变量在内存中的位置如何,对变量的访问都应该相同。 编译器可以设置一个指针,然后取消对该指针的引用以获取该值。
其他因素:
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.