繁体   English   中英

如何在 x86_64 程序集中从 STDIN 读取输入?

[英]How to read input from STDIN in x86_64 assembly?

我正在尝试学习 x86_64 程序集,今天我正在尝试标准输入输出,偶然发现了这篇文章学习程序集 - 回显程序名称如何从 STDIN 读取输入(使用 SYSCALL 指令)? 特别是如果我知道输入总是一个整数并且我想将它读到寄存器中?

编辑: @Daniel Kozar 在下面的回答帮助我了解了 STDIN 和 STDOUT 的东西如何与 Linux 上的 SYSCALL 指令一起工作。 我试图编写一个小程序,它从控制台输入中读取一个数字并打印与该数字对应的 ascii 字符。 假设你输入 65 作为输入,你应该得到 A 作为输出。 还有一个新的行字符。 如果有的话,它可以帮助其他任何人:-)

section .text
    global _start

_start:
    mov rdi, 0x0      ; file descriptor = stdin = 0
    lea rsi, [rsp+8]  ; buffer = address to store the bytes read
    mov rdx, 0x2      ; number of bytes to read
    mov rax, 0x0      ; SYSCALL number for reading from STDIN
    syscall           ; make the syscall

    xor rax, rax      ; clear off rax
    mov rbx, [rsp+8]  ; read the first byte read into rsp+8 by STDIN call to rbp
    sub rbx, 0x30     ; Since this is read as a character, it is obtained as ASCII value, so subtract by 0x30 to get the number
    and rbx, 0xff     ; This ensures that everything other than the last byte is set to 0 while the last byte is as is
    mov rax, rbx      ; move this value to rax since we want to store the final result in rax
    shl rbx, 0x1      ; We need to multiply this by 10 so that we can add up all the digits read so multiplying the number by 2 and then by 8 and adding them up, so multiply by 2 here
    shl rax, 0x3      ; multiply by 8 here
    add rax, rbx      ; add 8 times multiplied value with 2 times multiplied value to get 10 times multiplied value
    mov rbx, [rsp+9]  ; now read the next byte (or digit)
    sub rbx, 0x30     ; Again get the digit value from ASCII value of that digit's character
    and rbx, 0xff     ; clear higher bytes
    add rax, rbx      ; Add this to rax as unit's place value
    mov [rsp+8], rax  ; Move the entire byte to rax
    mov rdi, 0x1      ; file descriptor = stdout
    lea rsi, [rsp+8]  ; buffer = address to write to console
    mov rdx, 0x1      ; number of bytes to write
    mov rax, 0x1      ; SYSCALL number for writing to STDOUT
    syscall           ; make the syscall

    xor rax, rax      ; clear off rax
    mov rax, 0xa      ; move the new line character to rax
    mov [rsp+8], rax  ; put this on the stack
    mov rdi, 0x1      ; file descriptor = stdout
    lea rsi, [rsp+8]  ; buffer = address to write to console
    mov rdx, 0x1      ; number of bytes to write
    mov rax, 0x1      ; SYSCALL number for writing to STDOUT
    syscall           ; make the syscall

    mov rdi, 0        ; set exit status = 0
    mov rax, 60       ; SYSCALL number for EXIT
    syscall           ; make the syscall

编辑 2:这是我尝试从标准输入读取无符号 32 位十进制整数,将其存储为整数以进行计算,然后将其写回标准输出。

section .text
        global _start

_start:
;Read from STDIN
        mov rdi, 0x0      ; file descriptor = stdin = 0
        lea rsi, [rsp+8]  ; buffer = address to store the bytes read
        mov rdx, 0xa      ; number of bytes to read
        mov rax, 0x0      ; SYSCALL number for reading from STDIN
        syscall           ; make the syscall


; Ascii to decimal conversion
        xor rax, rax      ; clear off rax
        mov rbx, 0x0      ; initialize the counter which stores the number of bytes in the string representation of the integer
        lea rsi, [rsp+8]  ; Get the address on the stack where the first ASCII byte of the integer is stored.

rnext:
        mov rcx, [rsi]    ; Read the byte on the stack at the address represented by rsi
        cmp rcx, 0xa      ; Check if it is a newline character
        je  return        ; If so we are done
        cmp rbx, 0xa      ; OR check if we have read 10 bytes (the largest 32 bit number contains 10 digits, so we will have to process at most 10 bytes
        jg  return        ; If so we are done
        sub rcx, 0x30     ; For the byte read, subtract by 0x30/48 to get the value from the ASCII code. 0 == 0x30 in ASCII, 1 == 0x31 in ASCII and so on.
        and rcx, 0xff     ; Clear off the higher order bytes to ensure there is no interference
        mov rdx, rax      ; We need to multiple this by 10 to get the next byte which goes to the unit's place and this byte becomes the ten's value. So make a copy
        shl rax, 0x3      ; Multiply the original by 8 (Shift left by 3 is multiply by 8)
        shl rdx, 0x1      ; Multiply the copy by 2 (Shift left by 1 is multiply by 2)
        add rax, rdx      ; Add these a * 8 + a * 2 to get a * 10.
        add rax, rcx      ; Add the digit to be at the units place to the original number
        add rsi, 1        ; Advance the memory address by 1 to read the next byte
        inc rbx           ; Increment the digit counter
        jmp rnext         ; Loop until we have read all the digits or max is reached.

return:
        push rax          ; Push the read number on to the stack

; write New Line
        mov rax, 0xa      ; move the new line character to rax
        mov [rsp+8], rax  ; put this on the stack
        mov rdi, 0x1      ; file descriptor = stdout
        lea rsi, [rsp+8]  ; buffer = address to write to console
        mov rdx, 0x1      ; number of bytes to write
        mov rax, 0x1      ; SYSCALL number for writing to STDOUT
        syscall           ; make the syscall


; Convert from Decimal to bytes
        xor  rdx, rdx     ; Clear rdx which stores obtains a single digit of the number to convert to ASCII bytes
        mov  r8, 0x0      ; Initialize the counter containing the number of digits

        pop  rax          ; Pop the read number from the stack
        mov  rbx, 0xa     ; We store the divisor which is 10 for decimals (base-10) in rbx. rbx will be the divisor.

wnext: 
        div  rbx          ; Divide the number in rdx:rax by rbx to get the remainder in rdx
        add  rdx, 0x30    ; Add 0x30 to get the ASCII byte equivalent of the remainder which is the digit in the number to be written to display.
        push rdx          ; Push this byte to the stack. We do this because, we get the individial digit bytes in reverse order. So to reverse the order we use the stack
        xor  rdx, rdx     ; Clear rdx preparing it for next division
        inc  r8           ; Increment the digits counter
        cmp  rax, 0x0     ; Continue until the number becomes 0 when there are no more digits to write to the console.
        jne  wnext        ; Loop until there aren't any more digits.

popnext:
        cmp  r8, 0x0      ; Check if the counter which contains the number of digits to write is 0
        jle  endw         ; If so there are no more digits to write
        mov  rdx, 0x1     ; number of bytes to write
        mov  rsi, rsp     ; buffer = address to write to console
        mov  rdi, 0x1     ; file descriptor = stdout
        mov  rax, 0x1     ; SYSCALL number for writing to STDOUT
        syscall           ; make the syscall
        dec  r8           ; Decrement the counter
        pop  rbx          ; Pop the current digit that was already written to the display preparing the stack pointer for next digit.
        jmp  popnext      ; Loop until the counter which contains the number of digits goes down to 0.

endw:
; write New Line
        xor rax, rax      ; clear off rax
        mov rax, 0xa      ; move the new line character to rax
        mov [rsp+9], rax  ; put this on the stack
        mov rdi, 0x1      ; file descriptor = stdout
        lea rsi, [rsp+9]  ; buffer = address to write to console
        mov rdx, 0x1      ; number of bytes to write
        mov rax, 0x1      ; SYSCALL number for writing to STDOUT
        syscall           ; make the syscall

; Exit
        mov rdi, 0        ; set exit status = 0
        mov rax, 60       ; SYSCALL number for EXIT
        syscall           ; make the syscall

首先:汇编中没有变量 只有某种数据的标签。 根据设计,数据是无类型的——至少在真正的汇编程序中,而不是 HLA(例如 MASM)。

从标准输入读取是通过使用系统调用read 我假设您已经阅读了您提到的帖子,并且您知道如何在 x64 Linux 中调用系统调用。 假设您正在使用 NASM(或类似于其语法的东西),并且您想将来自 stdin 的输入存储在地址buffer ,您在其中保留了BUFSIZE字节的内存,执行系统调用将如下所示:

xor eax, eax      ; rax <- 0 (syscall number for 'read')
xor edi, edi      ; edi <- 0 (stdin file descriptor)
mov rsi, buffer   ; rsi <- address of the buffer.  lea rsi, [rel buffer]
mov edx, BUFSIZE  ; rdx <- size of the buffer
syscall           ; execute  read(0, buffer, BUFSIZE)

返回后, rax将包含系统调用的结果。 如果您想了解更多有关它的工作原理,请参阅man 2 read 请注意,在 mac 上read的系统调用是0x2000003而不是0 ,因此第一行将改为mov rax, 0x2000003

然而,在汇编语言中解析整数并不是那么简单。 由于read only 为您提供出现在标准输入上的纯二进制数据,因此您需要自己转换整数值。 请记住,您在键盘上键入的内容将作为 ASCII 代码(或您可能使用的任何其他编码 - 我在这里假设为 ASCII)发送到应用程序。 因此,您需要将数据从 ASCII 编码的十进制数转换为二进制数。

C 中用于将此类结构转换为普通 unsigned int 的函数可能如下所示:

unsigned int parse_ascii_decimal(char *str,unsigned int strlen)
{
    unsigned int ret = 0, mul = 1;
    int i = strlen-1;
    while(i >= 0)
    {
        ret += (str[i] & 0xf) * mul;
        mul *= 10;
        --i;
    }
    return ret;
}

将其转换为汇编(并扩展以支持有符号数)留给读者作为练习。 :)(或者查看NASM 程序集将输入转换为整数? - 更简单的算法每次迭代只有 1 次乘法, total = total*10 + digit 。并且您可以在迭代时检查第一个非数字字符而不是执行 strlen分开,如果长度未知。)


最后但并非最不重要的 - write系统调用要求您始终将指针传递到缓冲区,其中包含应该写入给定文件描述符的数据。 因此,如果要输出换行符,除了创建包含换行符序列的缓冲区外别无他法。

如果您能够使用scanf ,这里有一个简单的解决方案:

extern printf,scanf    ;import C functions

SECTION .data
    msg: db "Enter x: ",10,0
    format db '%d',0

SECTION .bss
    x resb 4

SECTION .text

global main

main:
    mov rdi,msg
    mov rax,0
    call printf     ;print a message

    mov rdi, format
    mov rsi, x
    mov rax, 0
    call scanf      ;input value for x

    ;do whatever with x

    mov rax, 60     ;program exit
    mov rdi, 0      
    syscall         

而已。 它与字符串类似。 我希望这会帮助某人。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM