[英]Calling C function in Assembly Segfaults
我正在尝试编写一个汇编程序,该程序在c中调用一个函数,如果char数组中的当前字符满足某些条件,它将用预定义字符替换字符串中的某些字符。
我的C文件:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
//display *((char *) $edi)
// These functions will be implemented in assembly:
//
int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
int isvowel (int c) {
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
return 1 ;
if (c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U')
return 1 ;
return 0 ;
}
int main(){
char *str1;
int r;
// I ran my code through a debugger again, and it seems that when displaying
// the character stored in ecx is listed as "A" (correct) right before the call
// to "add ecx, 1" at which point ecx somehow resets to 0 when it should be "B"
str1 = strdup("ABC 123 779 Hello World") ;
r = strrepl(str1, '#', &isdigit) ;
printf("str1 = \"%s\"\n", str1) ;
printf("%d chararcters were replaced\n", r) ;
free(str1) ;
return 0;
}
和我的.asm文件:
; File: strrepl.asm
; Implements a C function with the prototype:
;
; int strrepl(char *str, int c, int (* isinsubset) (int c) ) ;
;
;
; Result: chars in string are replaced with the replacement character and string is returned.
SECTION .text
global strrepl
_strrepl: nop
strrepl:
push ebp ; set up stack frame
mov ebp, esp
push esi ; save registers
push ebx
xor eax, eax
mov ecx, [ebp + 8] ;load string (char array) into ecx
jecxz end ;jump if [ecx] is zero
mov esi, [ebp + 12] ;move the replacement character into esi
mov edx, [ebp + 16] ;move function pointer into edx
xor bl, bl ;bl will be our counter
firstLoop:
add bl, 1 ;inc bl would work too
add ecx, 1
mov eax, [ecx]
cmp eax, 0
jz end
push eax ; parameter for (*isinsubset)
;BREAK
call edx ; execute (*isinsubset)
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
end:
pop ebx ; restore registers
pop esi
mov esp, ebp ; take down stack frame
pop ebp
ret
当通过gdb运行此程序并将断点放在; BREAK时,在执行以下错误时调用s命令后会出现段错误:
Program received signal SIGSEGV, Segmentation fault.
0x0081320f in isdigit () from /lib/libc.so.6
isdigit是我包含在我的c文件中的标准c库的一部分,所以我不确定该怎么做。
编辑:我已经编辑了firstLoop,并包含了secondLoop,该第二环应该将所有数字替换为“#”,但是它似乎替换了整个数组。
firstLoop:
xor eax, eax
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
;cmp eax, 0
;jne end
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
使用gdb,当代码进入secondloop时,一切都正确。 ecx显示为“ 1”,这是从.c文件传入的字符串中的第一位数字。 Esi应该显示为“#”。 但是,在我执行mov [ecx]之后,esi似乎崩溃了。 ecx此时将显示为“#”,但是一旦我加1到达数组中的下一个字符,它就会显示为“ / 000”。 1后面的每个字符都被替换为“#”,并显示为“ / 000”。 在我让secondLoop尝试用“#”替换字符之前,我只是让firstLoop与它自身进行循环,以查看它是否可以通过整个数组而不会崩溃。 它做到了,并且在每次增量ecx之后都显示为正确的字符。 我不确定为什么要执行mov [ecx],esi会将ecx的其余部分设置为null。
在firstLoop:
您正在使用以下方法从字符串中加载字符:
mov eax, [ecx]
在一条领带上加载4个字节,而不是单个字节。 因此,您要传递给isdigit()
的int
可能远远超出了它的处理范围(它可能使用简单的表查找)。
您可以使用以下Intel asm语法加载单个字节:
movzx eax, byte ptr [ecx]
其他一些事情:
ecx
我已经在您的代码中添加了一些注释:
; this is OK: setting up the stack frame and saving important register
; on Win32, the registers that need saving are: esi, edi and ebx
; the rest can be used without needing to preserve them
push ebp
mov ebp, esp
push esi
push ebx
xor eax, eax
mov ecx, [ebp + 8]
; you said that this checked [ecx] for zero, but I think you've just written
; that wrong, this checks the value of ecx for zero, the [reg] form usually indicates
; the value at the address defined by reg
; so this is effectively doing a null pointer check (which is good)
jecxz end
mov esi, [ebp + 12]
mov edx, [ebp + 16]
xor bl, bl
firstLoop:
add bl, 1
; you increment ecx before loading the first character, this means
; that the function ignores the first character of the string
; and will therefore produce an incorrect result if the string
; starts with a character that needs replacing
add ecx, 1
; characters are 8 bit, not 32 bit (mentioned in comments elsewhere)
mov eax, [ecx]
cmp eax, 0
jz end
push eax
; possibly segfaults due to character out of range
; also, as mentioned elsewhere, the function you call here must conform to the
; the standard calling convention of the system (e.g, preserve esi, edi and ebx for
; Win32 systems), so eax, ecx and edx can change, so next time you call
; [edx] it might be referencing random memory
; either save edx on the stack (push before pushing parameters, pop after add esp)
; or just load edx with [ebp+16] here instead of at the start
call edx
add esp, 4
mov ebx, eax
; more functionality required here!
end:
; restore important values, etc
pop ebx
pop esi
mov esp, ebp
pop ebp
; the result of the function should be in eax, but that's not set up properly yet
ret
对您的内循环的评论:-
firstLoop:
xor eax, eax
; you're loading a 32 bit value and checking for zero,
; strings are terminated with a null character, an 8 bit value,
; not a 32 bit value, so you're reading past the end of the string
; so this is unlikely to correctly test the end of string
mov edi, [ecx]
cmp edi, 0
jz end
mov edi, ecx ; save array
movzx eax, byte [ecx] ;load single byte into eax
; you need to keep ebp! its value must be saved (at the end,
; you do a mov esp,ebp)
mov ebp, edx ; save function pointer
push eax ; parameter for (*isinsubset)
call edx ; execute (*isinsubset)
mov ecx, edi ; restore array
cmp eax, 0
jne secondLoop
mov edx, ebp ; restore function pointer
add esp, 4 ; "pop off" the parameter
mov ebx, eax ; store return value
add ecx, 1
jmp firstLoop
secondLoop:
; again, your accessing the string using a 32 bit value, not an 8 bit value
; so you're replacing the matched character and the three next characters
; with the new value
; the upper 24 bits are probably zero so the loop will terminate on the
; next character
; also, the function seems to be returning a count of characters replaced,
; but you're not recording the fact that characters have been replaced
mov [ecx], esi
mov edx, ebp
add esp, 4
mov ebx, eax
add ecx, 1
jmp firstLoop
您似乎确实对内存的工作方式有麻烦,您对8位和32位内存访问感到困惑。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.