简体   繁体   中英

where is the C language global array located in its disassembly code

I wrote a very simple C code as follow:

int data_items[] = {3,67,32,4,89,6,34,2,9,0};

int max(int* pt)
{
  int val = *pt;
  while(*pt != 0)
  {
    if (*pt > val)
    {
      val = *pt;
    }
    ++pt;
  }
  return val;
}

int main()
{

  max(data_items);

  return 0;
}

then I compiled it with gcc:

gcc main.c

and then disassembled it with:

objdump -d a.out

At last I get the assemble code:

a.out:     file format elf64-x86-64


Disassembly of section .init:

00000000004003a8 <_init>:
  4003a8:   48 83 ec 08             sub    $0x8,%rsp
  4003ac:   48 8b 05 45 0c 20 00    mov    0x200c45(%rip),%rax        # 600ff8 <_DYNAMIC+0x1d0>
  4003b3:   48 85 c0                test   %rax,%rax
  4003b6:   74 05                   je     4003bd <_init+0x15>
  4003b8:   e8 33 00 00 00          callq  4003f0 <__gmon_start__@plt>
  4003bd:   48 83 c4 08             add    $0x8,%rsp
  4003c1:   c3                      retq   

Disassembly of section .plt:

00000000004003d0 <__libc_start_main@plt-0x10>:
  4003d0:   ff 35 32 0c 20 00       pushq  0x200c32(%rip)        # 601008 <_GLOBAL_OFFSET_TABLE_+0x8>
  4003d6:   ff 25 34 0c 20 00       jmpq   *0x200c34(%rip)        # 601010 <_GLOBAL_OFFSET_TABLE_+0x10>
  4003dc:   0f 1f 40 00             nopl   0x0(%rax)

00000000004003e0 <__libc_start_main@plt>:
  4003e0:   ff 25 32 0c 20 00       jmpq   *0x200c32(%rip)        # 601018 <_GLOBAL_OFFSET_TABLE_+0x18>
  4003e6:   68 00 00 00 00          pushq  $0x0
  4003eb:   e9 e0 ff ff ff          jmpq   4003d0 <_init+0x28>

00000000004003f0 <__gmon_start__@plt>:
  4003f0:   ff 25 2a 0c 20 00       jmpq   *0x200c2a(%rip)        # 601020 <_GLOBAL_OFFSET_TABLE_+0x20>
  4003f6:   68 01 00 00 00          pushq  $0x1
  4003fb:   e9 d0 ff ff ff          jmpq   4003d0 <_init+0x28>

Disassembly of section .text:

0000000000400400 <_start>:
  400400:   31 ed                   xor    %ebp,%ebp
  400402:   49 89 d1                mov    %rdx,%r9
  400405:   5e                      pop    %rsi
  400406:   48 89 e2                mov    %rsp,%rdx
  400409:   48 83 e4 f0             and    $0xfffffffffffffff0,%rsp
  40040d:   50                      push   %rax
  40040e:   54                      push   %rsp
  40040f:   49 c7 c0 b0 05 40 00    mov    $0x4005b0,%r8
  400416:   48 c7 c1 40 05 40 00    mov    $0x400540,%rcx
  40041d:   48 c7 c7 28 05 40 00    mov    $0x400528,%rdi
  400424:   e8 b7 ff ff ff          callq  4003e0 <__libc_start_main@plt>
  400429:   f4                      hlt    
  40042a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

0000000000400430 <deregister_tm_clones>:
  400430:   b8 8f 10 60 00          mov    $0x60108f,%eax
  400435:   55                      push   %rbp
  400436:   48 2d 88 10 60 00       sub    $0x601088,%rax
  40043c:   48 83 f8 0e             cmp    $0xe,%rax
  400440:   48 89 e5                mov    %rsp,%rbp
  400443:   77 02                   ja     400447 <deregister_tm_clones+0x17>
  400445:   5d                      pop    %rbp
  400446:   c3                      retq   
  400447:   b8 00 00 00 00          mov    $0x0,%eax
  40044c:   48 85 c0                test   %rax,%rax
  40044f:   74 f4                   je     400445 <deregister_tm_clones+0x15>
  400451:   5d                      pop    %rbp
  400452:   bf 88 10 60 00          mov    $0x601088,%edi
  400457:   ff e0                   jmpq   *%rax
  400459:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)

0000000000400460 <register_tm_clones>:
  400460:   b8 88 10 60 00          mov    $0x601088,%eax
  400465:   55                      push   %rbp
  400466:   48 2d 88 10 60 00       sub    $0x601088,%rax
  40046c:   48 c1 f8 03             sar    $0x3,%rax
  400470:   48 89 e5                mov    %rsp,%rbp
  400473:   48 89 c2                mov    %rax,%rdx
  400476:   48 c1 ea 3f             shr    $0x3f,%rdx
  40047a:   48 01 d0                add    %rdx,%rax
  40047d:   48 d1 f8                sar    %rax
  400480:   75 02                   jne    400484 <register_tm_clones+0x24>
  400482:   5d                      pop    %rbp
  400483:   c3                      retq   
  400484:   ba 00 00 00 00          mov    $0x0,%edx
  400489:   48 85 d2                test   %rdx,%rdx
  40048c:   74 f4                   je     400482 <register_tm_clones+0x22>
  40048e:   5d                      pop    %rbp
  40048f:   48 89 c6                mov    %rax,%rsi
  400492:   bf 88 10 60 00          mov    $0x601088,%edi
  400497:   ff e2                   jmpq   *%rdx
  400499:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)

00000000004004a0 <__do_global_dtors_aux>:
  4004a0:   80 3d e1 0b 20 00 00    cmpb   $0x0,0x200be1(%rip)        # 601088 <__TMC_END__>
  4004a7:   75 11                   jne    4004ba <__do_global_dtors_aux+0x1a>
  4004a9:   55                      push   %rbp
  4004aa:   48 89 e5                mov    %rsp,%rbp
  4004ad:   e8 7e ff ff ff          callq  400430 <deregister_tm_clones>
  4004b2:   5d                      pop    %rbp
  4004b3:   c6 05 ce 0b 20 00 01    movb   $0x1,0x200bce(%rip)        # 601088 <__TMC_END__>
  4004ba:   f3 c3                   repz retq 
  4004bc:   0f 1f 40 00             nopl   0x0(%rax)

00000000004004c0 <frame_dummy>:
  4004c0:   48 83 3d 58 09 20 00    cmpq   $0x0,0x200958(%rip)        # 600e20 <__JCR_END__>
  4004c7:   00 
  4004c8:   74 1e                   je     4004e8 <frame_dummy+0x28>
  4004ca:   b8 00 00 00 00          mov    $0x0,%eax
  4004cf:   48 85 c0                test   %rax,%rax
  4004d2:   74 14                   je     4004e8 <frame_dummy+0x28>
  4004d4:   55                      push   %rbp
  4004d5:   bf 20 0e 60 00          mov    $0x600e20,%edi
  4004da:   48 89 e5                mov    %rsp,%rbp
  4004dd:   ff d0                   callq  *%rax
  4004df:   5d                      pop    %rbp
  4004e0:   e9 7b ff ff ff          jmpq   400460 <register_tm_clones>
  4004e5:   0f 1f 00                nopl   (%rax)
  4004e8:   e9 73 ff ff ff          jmpq   400460 <register_tm_clones>

00000000004004ed <max>:
  4004ed:   55                      push   %rbp
  4004ee:   48 89 e5                mov    %rsp,%rbp
  4004f1:   48 89 7d e8             mov    %rdi,-0x18(%rbp)
  4004f5:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  4004f9:   8b 00                   mov    (%rax),%eax
  4004fb:   89 45 fc                mov    %eax,-0x4(%rbp)
  4004fe:   eb 19                   jmp    400519 <max+0x2c>
  400500:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  400504:   8b 00                   mov    (%rax),%eax
  400506:   3b 45 fc                cmp    -0x4(%rbp),%eax
  400509:   7e 09                   jle    400514 <max+0x27>
  40050b:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  40050f:   8b 00                   mov    (%rax),%eax
  400511:   89 45 fc                mov    %eax,-0x4(%rbp)
  400514:   48 83 45 e8 04          addq   $0x4,-0x18(%rbp)
  400519:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  40051d:   8b 00                   mov    (%rax),%eax
  40051f:   85 c0                   test   %eax,%eax
  400521:   75 dd                   jne    400500 <max+0x13>
  400523:   8b 45 fc                mov    -0x4(%rbp),%eax
  400526:   5d                      pop    %rbp
  400527:   c3                      retq   

0000000000400528 <main>:
  400528:   55                      push   %rbp
  400529:   48 89 e5                mov    %rsp,%rbp
  40052c:   bf 60 10 60 00          mov    $0x601060,%edi
  400531:   e8 b7 ff ff ff          callq  4004ed <max>
  400536:   b8 00 00 00 00          mov    $0x0,%eax
  40053b:   5d                      pop    %rbp
  40053c:   c3                      retq   
  40053d:   0f 1f 00                nopl   (%rax)

0000000000400540 <__libc_csu_init>:
  400540:   41 57                   push   %r15
  400542:   41 89 ff                mov    %edi,%r15d
  400545:   41 56                   push   %r14
  400547:   49 89 f6                mov    %rsi,%r14
  40054a:   41 55                   push   %r13
  40054c:   49 89 d5                mov    %rdx,%r13
  40054f:   41 54                   push   %r12
  400551:   4c 8d 25 b8 08 20 00    lea    0x2008b8(%rip),%r12        # 600e10 <__frame_dummy_init_array_entry>
  400558:   55                      push   %rbp
  400559:   48 8d 2d b8 08 20 00    lea    0x2008b8(%rip),%rbp        # 600e18 <__init_array_end>
  400560:   53                      push   %rbx
  400561:   4c 29 e5                sub    %r12,%rbp
  400564:   31 db                   xor    %ebx,%ebx
  400566:   48 c1 fd 03             sar    $0x3,%rbp
  40056a:   48 83 ec 08             sub    $0x8,%rsp
  40056e:   e8 35 fe ff ff          callq  4003a8 <_init>
  400573:   48 85 ed                test   %rbp,%rbp
  400576:   74 1e                   je     400596 <__libc_csu_init+0x56>
  400578:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
  40057f:   00 
  400580:   4c 89 ea                mov    %r13,%rdx
  400583:   4c 89 f6                mov    %r14,%rsi
  400586:   44 89 ff                mov    %r15d,%edi
  400589:   41 ff 14 dc             callq  *(%r12,%rbx,8)
  40058d:   48 83 c3 01             add    $0x1,%rbx
  400591:   48 39 eb                cmp    %rbp,%rbx
  400594:   75 ea                   jne    400580 <__libc_csu_init+0x40>
  400596:   48 83 c4 08             add    $0x8,%rsp
  40059a:   5b                      pop    %rbx
  40059b:   5d                      pop    %rbp
  40059c:   41 5c                   pop    %r12
  40059e:   41 5d                   pop    %r13
  4005a0:   41 5e                   pop    %r14
  4005a2:   41 5f                   pop    %r15
  4005a4:   c3                      retq   
  4005a5:   66 66 2e 0f 1f 84 00    data32 nopw %cs:0x0(%rax,%rax,1)
  4005ac:   00 00 00 00 

00000000004005b0 <__libc_csu_fini>:
  4005b0:   f3 c3                   repz retq 

Disassembly of section .fini:

00000000004005b4 <_fini>:
  4005b4:   48 83 ec 08             sub    $0x8,%rsp
  4005b8:   48 83 c4 08             add    $0x8,%rsp
  4005bc:   c3                      retq   

My question is that, in main function:

  40052c:   bf 60 10 60 00          mov    $0x601060,%edi

the address $0x601060 should be the address of the array data_items in the C code. But I can not find the data_items array value in the assemble code, where is it?

By the way, in the max function code:

4004f1: 48 89 7d e8             mov    %rdi,-0x18(%rbp)

I do think the stack should be:

mov %rdi, -0x8(%rbp)

why the compiler makes some hole in the stack?

my system is Ubuntu 14.04 LTS.

objdump -d only disassembles sections of the executable where there's supposed to be code.

Use objdump -D to disassemble all sections, and you'll find this:

Disassembly of section .data:

08049760 <__data_start>:
        ...

08049780 <data_items>:
 8049780:       03 00                   add    (%eax),%eax
 8049782:       00 00                   add    %al,(%eax)
 8049784:       43                      inc    %ebx
 8049785:       00 00                   add    %al,(%eax)
 8049787:       00 20                   add    %ah,(%eax)
 8049789:       00 00                   add    %al,(%eax)
 804978b:       00 04 00                add    %al,(%eax,%eax,1)
 804978e:       00 00                   add    %al,(%eax)

That's your array. objdump will try to disassemble it as if it was machine code, so the assembly isn't going to make sense though).

如果运行nm a.out ,您将看到0x601060data_items的地址。

When it comes to runtime dissasembly gdb itself can be very handy:

(gdb) break main
Breakpoint 1 at 0x4004f3
(gdb) break max
Breakpoint 2 at 0x4004b8
(gdb) run
Starting program: /home/dtarcatu/workspace/ctest/a.out 

Breakpoint 1, 0x00000000004004f3 in main ()
(gdb) disas
Dump of assembler code for function main:
   0x00000000004004ef <+0>: push   %rbp
   0x00000000004004f0 <+1>: mov    %rsp,%rbp
=> 0x00000000004004f3 <+4>: mov    $0x601040,%edi
   0x00000000004004f8 <+9>: callq  0x4004b4 <max>
   0x00000000004004fd <+14>:    mov    $0x0,%eax
   0x0000000000400502 <+19>:    pop    %rbp
   0x0000000000400503 <+20>:    retq   
End of assembler dump.
(gdb) x 0x601040
0x601040 <data_items>:  0x00000003
(gdb) x /10d 0x601040
0x601040 <data_items>:  3   67  32  4
0x601050 <data_items+16>:   89  6   34  2
0x601060 <data_items+32>:   9   0

Now regarding your stack discipline dillema I'm not sure I can come up with a very good answer, but I think it's related to the x86-64 red zone optimization .

(gdb) c
 Continuing.

Breakpoint 2, 0x00000000004004b8 in max ()
(gdb) disas
Dump of assembler code for function max:
   0x00000000004004b4 <+0>: push   %rbp
   0x00000000004004b5 <+1>: mov    %rsp,%rbp
=> 0x00000000004004b8 <+4>: mov    %rdi,-0x18(%rbp)
   0x00000000004004bc <+8>: mov    -0x18(%rbp),%rax
   0x00000000004004c0 <+12>:    mov    (%rax),%eax
   0x00000000004004c2 <+14>:    mov    %eax,-0x4(%rbp)
   0x00000000004004c5 <+17>:    jmp    0x4004e0 <max+44>
   0x00000000004004c7 <+19>:    mov    -0x18(%rbp),%rax
   0x00000000004004cb <+23>:    mov    (%rax),%eax
   0x00000000004004cd <+25>:    cmp    -0x4(%rbp),%eax
   0x00000000004004d0 <+28>:    jle    0x4004db <max+39>
   0x00000000004004d2 <+30>:    mov    -0x18(%rbp),%rax
   0x00000000004004d6 <+34>:    mov    (%rax),%eax
   0x00000000004004d8 <+36>:    mov    %eax,-0x4(%rbp)
   0x00000000004004db <+39>:    addq   $0x4,-0x18(%rbp)
   0x00000000004004e0 <+44>:    mov    -0x18(%rbp),%rax
   0x00000000004004e4 <+48>:    mov    (%rax),%eax
   0x00000000004004e6 <+50>:    test   %eax,%eax
   0x00000000004004e8 <+52>:    jne    0x4004c7 <max+19>
   0x00000000004004ea <+54>:    mov    -0x4(%rbp),%eax
   0x00000000004004ed <+57>:    pop    %rbp
   0x00000000004004ee <+58>:    retq   
End of assembler dump.
(gdb) ni
0x00000000004004bc in max ()
(gdb) print $rbp
$1 = (void *) 0x7fffffffdf10
(gdb) print $rsp
$2 = (void *) 0x7fffffffdf10

The -0x18(%rbp) location is definitely inside the red zone. I'm not sure how the compiler uses that but it can probably do whatever the hack it wants in there. Your local int seems to be stored at -0x4(%rbp) later on, so I guess -0x18(%rbp) is just some sort of temporary buffer.

You could also ask the compiler to output its emitted assembler code. Notice that there is no observable side-effect in your program, so the compiler could optimize it to a nop -or compute the max at compile-time, etc.

However,compiling your code with (GCC 4.9.1 Debian/x86-64)

 gcc -fverbose-asm -O -S main.c

I'm getting a file main.s containing (with several lines before and after) :

    .globl  max
    .type   max, @function
max:
.LFB0:
    .file 1 "main.c"
    .loc 1 4 0
    .cfi_startproc
.LVL0:
    .loc 1 5 0
    movl    (%rdi), %eax    # *pt_5(D), val
.LVL1:
    .loc 1 6 0
    testl   %eax, %eax  # val
    je  .L2 #,
    movl    %eax, %edx  # val, val
.L3:
    cmpl    %edx, %eax  # val, val
    cmovl   %edx, %eax  # val,, val, val
.LVL2:
    .loc 1 12 0
    addq    $4, %rdi    #, pt
.LVL3:
    .loc 1 6 0
    movl    (%rdi), %edx    # MEM[base: pt_8, offset: 0B], val
    testl   %edx, %edx  # val
    jne .L3 #,
.L2:
    .loc 1 15 0
    rep ret
    .cfi_endproc
.LFE0:
    .size   max, .-max
    .globl  main
    .type   main, @function
main:
.LFB1:
    .loc 1 18 0
    .cfi_startproc
    .loc 1 20 0
    movl    $data_items, %edi   #,
    call    max #
.LVL4:
    .loc 1 23 0
    movl    $0, %eax    #,
    ret
    .cfi_endproc
.LFE1:
    .size   main, .-main
    .globl  data_items
    .data
    .align 32
    .type   data_items, @object
    .size   data_items, 40
data_items:
    .long   3
    .long   67
    .long   32
    .long   4
    .long   89
    .long   6
    .long   34
    .long   2
    .long   9
    .long   0
    .text

So you see that data_items goes into the data section (because of the .data directive)

BTW, GCC usually emits an assembler file transformed by the as assembler into an ELF file main.o containing the object code . That object file contains relocation directives and several sections which are later processed by the ld linker .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM