[英]Pointers. *((char *)y +1)
*((char *)y +1) = 42;
真的有用?
#include<stdio.h>
int main(void)
{
int sum=10,*x,**y,***z;
x=∑
y=&x;
z=&y;
printf("Addresses before : %d %d %d %d\n\n",&z,&y,&x,&sum);
printf("Content before : %d %d %d %d\n\n",z,y,x,sum);
*((char *)y +1) = 42;
printf("Addresses after : %d %d %d %d\n\n",&z,&y,&x,&sum);
printf("Content after : %d %d %d %d\n\n",z,y,x,sum);
}
給定*((char *)y +1) = 42;
評估的第一件事是cast (char *)y
所以, y
被解釋為char指針。 下一步是+1
。 對於指針,這些添加被重載,因此這意味着new_address = address_pointed_by_y + 1*sizeof(char)
。 所以現在我們有第二個字符的地址,假設y
指向一個字符串。 下一步是解除引用,所以我們得到了角色的值。 哪個設置為新值。 所以總而言之,字符串的第二個字符設置為值42,在ASCII中為*
。
這是真正有用的東西。
我們來看看x86_64程序集的外觀。 這是來自gcc -fverbose-asm -S testcode.c
,我正在添加更多評論。
.section .rodata
.align 8
.LC0:
.string "Addresses before : %p %p %p %p\n\n"
.align 8
.LC1:
.string "Content before : %p %p %p %d\n\n"
.align 8
.LC2:
.string "Addresses after : %p %p %p %p\n\n"
.align 8
.LC3:
.string "Content after : %p %p %p %d\n\n"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushq %rbp #
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
# Write the stack pointer into the base pointer register.
movq %rsp, %rbp #,
.cfi_def_cfa_register 6
# Now we reserve 32 bytes for our stack variables.
subq $32, %rsp #,
# And store 10 into sum, which is 4 bytes from the base of the stack.
movl $10, -4(%rbp) #, sum
# This Loads the Effective Address (LEA) of sum into register rax.
leaq -4(%rbp), %rax #, tmp75
# And writes RAX into x which is 32 bytes from the base of the stack.
movq %rax, -32(%rbp) # tmp75, x
# And get the address of x (which is -32 remember) into RAX
leaq -32(%rbp), %rax #, tmp76
# And write it into y AKA -24.
movq %rax, -24(%rbp) # tmp76, y
# Address of y into RAX
leaq -24(%rbp), %rax #, tmp77
# And write it into z
movq %rax, -16(%rbp) # tmp77, z
# Setting up the printf call. .LC0 is the address of the format string.
# The function call arguments are being set in reverse order here:
# really looks like printf RDI, RSI, RDX, RCX, R8
movl $.LC0, %eax #, D.2049
# Note that these are all Addresses and these are just numbers: Stack Base -4, -32, -24 and -16.
leaq -4(%rbp), %rdi #, tmp78
leaq -32(%rbp), %rcx #, tmp79
leaq -24(%rbp), %rdx #, tmp80
leaq -16(%rbp), %rsi #, tmp81
movq %rdi, %r8 # tmp78,
movq %rax, %rdi # D.2049,
movl $0, %eax #,
call printf #
# Set up for the next printf
# Now look we're using MOV to get the values.
# This just means to copy what is in Stack Base - 4 memory location into register EDI.
movl -4(%rbp), %edi # sum, sum.0
movq -32(%rbp), %rcx # x, x.1
movq -24(%rbp), %rdx # y, y.2
movq -16(%rbp), %rsi # z, z.3
movl $.LC1, %eax #, D.2054
movl %edi, %r8d # sum.0,
movq %rax, %rdi # D.2054,
movl $0, %eax #,
call printf #
# Here is the confusing line *((char *)y +1) = 42;
# Copy y into RAX. Remember y was set to RBP - 32.
# That's the location of x.
movq -24(%rbp), %rax # y, y.4
# Add 1 to RAX. So now it is RBP - 31
addq $1, %rax #, D.2056
# Write 42 into ONE BYTE at the memory location RAX is holding.
# And Intel is Least Significant Byte order so RBP - 31 is the byte that counts by
# 256 or the second byte from the right.
# And note that after this x no longer points to sum but to some other place.
movb $42, (%rax) #, *D.2056_8
# And blah blah more printfs.
movl $.LC2, %eax #, D.2057
leaq -4(%rbp), %rdi #, tmp82
leaq -32(%rbp), %rcx #, tmp83
leaq -24(%rbp), %rdx #, tmp84
leaq -16(%rbp), %rsi #, tmp85
movq %rdi, %r8 # tmp82,
movq %rax, %rdi # D.2057,
movl $0, %eax #,
call printf #
movl -4(%rbp), %edi # sum, sum.5
movq -32(%rbp), %rcx # x, x.6
movq -24(%rbp), %rdx # y, y.7
movq -16(%rbp), %rsi # z, z.8
movl $.LC3, %eax #, D.2062
movl %edi, %r8d # sum.5,
movq %rax, %rdi # D.2062,
movl $0, %eax #,
call printf #
movl $0, %eax #, D.2063
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
.section .note.GNU-stack,"",@progbits
注意:所有數字都是十六進制的。 顯示的字節順序是big-endian,盡管大多數PC都具有little-endian架構。 這確實會影響輸出結果,但與解釋無關。
C的內存模型大致如下所示:內存由一系列字節組成,每個字節都有一個地址:
__ ... __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ __ bytes
0 ... A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF addresses
變量代表一些存儲器地址,例如sum
等於地址A0
。 變量也有一個類型,它決定了該地址之后的內存段有多大。 例如, int
可能是4個字節:
__ ...|__ __ __ __|__ __ __ __ __ __ __ __ __ __ __ __ bytes
0 ...|A0 A1 A2 A3|A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF addresses
------|-----------|---------------------------------------------
|"sum" | names
| int | type
&
運算符獲取名稱的地址,例如&sum
這里是A0
。 該值稱為指針 ,即包含其他值的內存地址。 運行初始化代碼后,我們有以下內存內容:
v----------.v----------.v----------.
__ ...|00 00 00 A0|00 00 00 A0|00 00 00 A4|00 00 00 A8|bytes
0 ...|A0 A1 A2 A3|A4 A5 A6 A7|A8 A9 AA AB|AC AD AE AF|addresses
------|-----------|-----------|-----------|-----------|---------
|"sum" |"x" |"y" |"z" |names
| int | int * | int ** | int *** |type
|sum = 0xA0 |x = &sum |y = &x |z = &y |
y
變量的類型為int **
,即它是指向int的指針。 在上圖中,指針寬度為4個字節。 但是,我們可以不同地重新解釋這個字節序列: (char *)y
表示我們將y
位置的值解釋為指向char的指針(一個字節寬):
v----------.
__ ... 00 00 00 A0|00|00 00 A0|00 00 00 A4|00 00 00 A8 bytes
0 ... A0 A1 A2 A3|A4|A5 A6 A7|A8 A9 AA AB|AC AD AE AF addresses
------------------|--|--------|-----------|---------------------
| | |"y" | names
|ch| | char * | type
(char *)y + 1
將一個字符( 1
)的大小添加到y
處的值,將其解釋為char *
( A4
)。 這就是A5
。
*(...)
“deferences”該值,即訪問指定內存位置的值。 在我們的例子中,這是00
:
v----------.
__ ... 00 00 00 A0|00|00|00 A0|00 00 00 A4|00 00 00 A8 bytes
0 ... A0 A1 A2 A3|A4|A5|A6 A7|A8 A9 AA AB|AC AD AE AF addresses
------------------|--|--|-----|-----------|---------------------
| | | |"y" | names
|ch|ch| | char * | type
*(0xA4) --^ ^-- *(0xA5)
我們將該位置的值設置為十進制42
,即十六進制為2A
:
v----------.
__ ... 00 00 00 A0|00|2A|00 A0|00 00 00 A4|00 00 00 A8 bytes
0 ... A0 A1 A2 A3|A4|A5|A6 A7|A8 A9 AA AB|AC AD AE AF addresses
------------------|--|--|-----|-----------|---------------------
| | | |"y" | names
|ch|ch| | char * | type
*(0xA4) --^ ^-- *(0xA5)
設置完字節后,您將返回原始解釋:
v----------.v----------.
__ ...|00 00 00 A0|00 2A 00 A0|00 00 00 A4|00 00 00 A8|bytes
0 ...|A0 A1 A2 A3|A4 A5 A6 A7|A8 A9 AA AB|AC AD AE AF|addresses
------|-----------|-----------|-----------|-----------|---------
|"sum" |"x" |"y" |"z" |names
| int | int * | int ** | int *** |type
只有 x
的值發生了變化,所有其他值都保持不變。 變量的位置也是不變的。 雖然x
仍然具有int *
類型,但它並不指向sum
的位置更長。 相反,它指向遠處的某個地方,訪問該位置的值可能會產生段錯誤。
簡而言之, *((char *)y +1)
只是一種奇特的說法((char *)y)[1]
。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.