简体   繁体   English

使用自己的 python 调试器获取系统调用的问题

[英]Trouble with getting syscalls with own python debugger

i was trying to write a simple debugger in python3 on an 32 bit test linux system (Lubuntu) which should be able to catch all syscalls of an abitrary programm (in this case: /bin/ls ).我试图在 32 位测试 linux 系统(Lubuntu)上的 python3 中编写一个简单的调试器,它应该能够捕获任意程序的所有系统调用(在这种情况下: /bin/ls )。 For this i used the ptrace syscall to singlestep through the process.为此,我使用 ptrace 系统调用单步执行该过程。 After every step i read the registers to find the instruction pointer eip to read 2 bytes from the next instruction.在每一步之后,我读取寄存器以找到指令指针eip从下一条指令读取 2 个字节。 If those 2 bytes are 0xcd and 0x80 this indicates an int 80 which is the syscall.如果这 2 个字节是0xcd0x80,则表示int 80是系统调用。 I know there is also the PTRACE_SYSCALL for this purpose, but i wanted to do this without using it.我知道还有 PTRACE_SYSCALL 用于此目的,但我想在不使用它的情况下执行此操作。

In the following i show you the code, and it seems to work, BUT there is some weird behavior:在下面我向您展示代码,它似乎工作,有一些奇怪的行为:

To figure out if this is working i used strace to compare it's output with my own syscalls.为了确定这是否有效,我使用strace将它的输出与我自己的系统调用进行比较。 And it seems that my programm shows only the first part of the syscalls, the second part is just missing.似乎我的程序只显示了系统调用的第一部分,第二部分只是丢失了。 To show you i posted the output of my programm and of strace in the following.为了向您展示,我在下面发布了我的程序和 strace 的输出。 Does someone have an idea what could be wrong here?有人知道这里有什么问题吗?

import os               # os interaction
from struct import pack # dealing with bytes (ptrace)
import ctypes           # support c data structures

""" ========================================================== """

# 32 bit reg process structrue
class UserRegsStruct(ctypes.Structure):
    _fields_ = [
        ("ebx", ctypes.c_ulong),
        ("ecx", ctypes.c_ulong),
        ("edx", ctypes.c_ulong),
        ("esi", ctypes.c_ulong),
        ("edi", ctypes.c_ulong),
        ("ebp", ctypes.c_ulong),
        ("eax", ctypes.c_ulong),
        ("xds", ctypes.c_ulong),
        ("xes", ctypes.c_ulong),
        ("xfs", ctypes.c_ulong),
        ("xgs", ctypes.c_ulong),
        ("orig_eax", ctypes.c_ulong),
        ("eip", ctypes.c_ulong),
        ("xcs", ctypes.c_ulong),
        ("eflags", ctypes.c_ulong),
        ("esp", ctypes.c_ulong),
        ("xss", ctypes.c_ulong),
    ]

# ptrace constants
PTRACE_TRACEME = 0
PTRACE_PEEKDATA = 2
PTRACE_SINGLESTEP = 9
PTRACE_GETREGS = 12

CPU_WORD_SIZE = 4   # size of cpu word size (32 bit = 4 bytes)

# for syscalls
libc = ctypes.CDLL('libc.so.6')

# check if child (tracee) is still running
def WIFSTOPPED(status):
    return (status & 0xff) == 0x7f

# read from process memory by PTRACE_PEEKDATA
def ReadProcessMemory(pid, address, size):

    # address must be aligned!!
    offset = address % CPU_WORD_SIZE
    if offset:
        address -= offset
        word = libc.ptrace(PTRACE_PEEKDATA, pid, address, 0)
        wordbytes = pack("i", word)
        subsize = min(CPU_WORD_SIZE - offset, size)
        data = wordbytes[offset:offset + subsize]
        size -= subsize
        address += CPU_WORD_SIZE
    else:
        data = bytes(0)

    while size:
        word = libc.ptrace(PTRACE_PEEKDATA, pid, address, 0)
        wordbytes = pack("i", word)
        if size < CPU_WORD_SIZE:
            data += wordbytes[:size]
            break
        data += wordbytes
        size -= CPU_WORD_SIZE
        address += CPU_WORD_SIZE

    return data

""" ========================================================== """

# extract syscall names
fp = open("/usr/include/i386-linux-gnu/asm/unistd_32.h", "r")
syscalls = [0] * 400

for line in fp:
    if "__NR_" in line:
        a = line.rstrip().split(" ")
        name = a[1].split("NR_")[1]
        number = int(a[2])
        syscalls[number] = name

# "int 80" asm instruction = (0xCD 0x80)
a0 = 0xcd
a1 = 0x80

# create child tracee
pid = os.fork()

if pid == 0:    # in tracee
    libc.ptrace(PTRACE_TRACEME, 0, 0, 0)    # make child traceable
    os.execv("/bin/ls", [":-P"])            # run test programm
else:           # in tracer
    pid, status = os.waitpid(pid, 0)
    regs = UserRegsStruct()

# catch all syscalls
while True:

    libc.ptrace(PTRACE_SINGLESTEP, pid, 0, 0)               # execute next instruction
    pid, status = os.waitpid(pid, 0)                        # wait for tracee
    libc.ptrace(PTRACE_GETREGS, pid, 0, ctypes.byref(regs)) # get register values
    data = ReadProcessMemory(pid, regs.eip, 2)              # read 2 bytes from instruction pointer address

    # now check if this is a syscall
    if data[0] == a0 and data[1] == a1:
        print("HEUREKA! SYSCALL at " + hex(regs.eip) + ": " + syscalls[regs.eax])

    if WIFSTOPPED(status) == False: break # exit loop when tracee stopped

This generated the following output:这生成了以下输出:

HEUREKA! SYSCALL at 0xb7fae2c5: brk
HEUREKA! SYSCALL at 0xb7fa3944: access
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf689: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faa758: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf57e: read
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faa758: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf57e: read
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faa758: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf57e: read
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faa758: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf57e: read
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faa758: access
HEUREKA! SYSCALL at 0xb7faf4b5: openat
HEUREKA! SYSCALL at 0xb7faf57e: read
HEUREKA! SYSCALL at 0xb7faf419: fstat64
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7faf755: close
HEUREKA! SYSCALL at 0xb7faf7ae: mmap2
HEUREKA! SYSCALL at 0xb7f95bd9: set_thread_area
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf822: mprotect
HEUREKA! SYSCALL at 0xb7faf7ff: munmap
test.py

And here is the output of strace:这是 strace 的输出:

execve("/bin/ls", ["/bin/ls"], 0xbfef5e40 /* 45 vars */) = 0
brk(NULL)                               = 0x220c000
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f00000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=89915, ...}) = 0
mmap2(NULL, 89915, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7eea000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/i386-linux-gnu/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\0L\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=169960, ...}) = 0
mmap2(NULL, 179612, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7ebe000
mmap2(0xb7ee7000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0xb7ee7000
mmap2(0xb7ee9000, 3484, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7ee9000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/i386-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\3\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\20\220\1\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1942840, ...}) = 0
mmap2(NULL, 1948188, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7ce2000
mprotect(0xb7eb7000, 4096, PROT_NONE)   = 0
mmap2(0xb7eb8000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1d5000) = 0xb7eb8000
mmap2(0xb7ebb000, 10780, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7ebb000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/i386-linux-gnu/libpcre.so.3", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\360\16\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=480564, ...}) = 0
mmap2(NULL, 483512, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7c6b000
mmap2(0xb7ce0000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x74000) = 0xb7ce0000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/i386-linux-gnu/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0644, st_size=13796, ...}) = 0
mmap2(NULL, 16500, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7c66000
mmap2(0xb7c69000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0xb7c69000
close(3)                                = 0
access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/i386-linux-gnu/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\300P\0\0004\0\0\0"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=142820, ...}) = 0
mmap2(NULL, 123544, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7c47000
mmap2(0xb7c62000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1a000) = 0xb7c62000
mmap2(0xb7c64000, 4760, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7c64000
close(3)                                = 0
mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7c45000
set_thread_area({entry_number=-1, base_addr=0xb7c45780, limit=0x0fffff, seg_32bit=1, contents=0, read_exec_only=0, limit_in_pages=1, seg_not_present=0, useable=1}) = 0 (entry_number=6)
mprotect(0xb7eb8000, 8192, PROT_READ)   = 0
mprotect(0xb7c62000, 4096, PROT_READ)   = 0
mprotect(0xb7c69000, 4096, PROT_READ)   = 0
mprotect(0xb7ce0000, 4096, PROT_READ)   = 0
mprotect(0xb7ee7000, 4096, PROT_READ)   = 0
mprotect(0x469000, 4096, PROT_READ)     = 0
mprotect(0xb7f2d000, 4096, PROT_READ)   = 0
munmap(0xb7eea000, 89915)               = 0

Until here there is complete compliance with my own output, but the remaining syscalls never appear in my programm.直到这里完全符合我自己的输出,但剩余的系统调用从未出现在我的程序中。 So that's the question.所以这就是问题所在。 I hope someone knows the answer :P If you have any questions, please ask!我希望有人知道答案:P 如果您有任何问题,请提问!

set_tid_address(0xb7c457e8)             = 9767
set_robust_list(0xb7c457f0, 12)         = 0
rt_sigaction(SIGRTMIN, {sa_handler=0xb7c4baf0, sa_mask=[], sa_flags=SA_SIGINFO}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {sa_handler=0xb7c4bb80, sa_mask=[], sa_flags=SA_RESTART|SA_SIGINFO}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
ugetrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
uname({sysname="Linux", nodename="p200300D053D7310F22107AFFFE01D58C", ...}) = 0
statfs("/sys/fs/selinux", 0xbffeddb4)   = -1 ENOENT (No such file or directory)
statfs("/selinux", 0xbffeddb4)          = -1 ENOENT (No such file or directory)
brk(NULL)                               = 0x220c000
brk(0x222d000)                          = 0x222d000
brk(0x222e000)                          = 0x222e000
openat(AT_FDCWD, "/proc/filesystems", O_RDONLY|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
read(3, "nodev\tsysfs\nnodev\trootfs\nnodev\tr"..., 1024) = 401
read(3, "", 1024)                       = 0
close(3)                                = 0
brk(0x222d000)                          = 0x222d000
access("/etc/selinux/config", F_OK)     = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE|O_CLOEXEC) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=3365136, ...}) = 0
mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7a45000
close(3)                                = 0
ioctl(1, TCGETS, {B38400 opost isig icanon echo ...}) = 0
ioctl(1, TIOCGWINSZ, {ws_row=48, ws_col=198, ws_xpixel=0, ws_ypixel=0}) = 0
openat(AT_FDCWD, ".", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_CLOEXEC|O_DIRECTORY) = 3
fstat64(3, {st_mode=S_IFDIR|0775, st_size=4096, ...}) = 0
getdents64(3, /* 3 entries */, 32768)   = 80
getdents64(3, /* 0 entries */, 32768)   = 0
close(3)                                = 0
fstat64(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 1), ...}) = 0
write(1, "test.py\n", 8test.py
)                = 8
close(1)                                = 0
close(2)                                = 0
exit_group(0)                           = ?
+++ exited with 0 +++


If you're only looking for int 0x80 , you're going to miss normal 32-bit syscalls made with the sysenter instruction (normally via glibc calling into the VDSO page).如果您查找int 0x80 ,您将错过使用sysenter指令(通常通过 glibc 调用 VDSO 页面)进行的正常 32 位系统调用。 https://blog.packagecloud.io/eng/2016/04/05/the-definitive-guide-to-linux-system-calls/ . https://blog.packagecloud.io/eng/2016/04/05/the-definitive-guide-to-linux-system-calls/ (Also on old AMD CPUs, 32-bit syscall is also possible, and might be used by default if they're too old to support sysenter .) (同样在旧的 AMD CPU 上,也可以使用 32 位syscall ,如果它们太旧而无法支持sysenter ,则可能会默认使用。)

I guess the early ld.so code uses the legacy int 0x80 mechanism instead of calling into the VDSO.我猜早期的 ld.so 代码使用了遗留的int 0x80机制,而不是调用 VDSO。 (Which makes sense; the VDSO presents itself as an ELF shared object mapped into memory; until the dynamic linker sets up function-pointers into it, it can't use it.) (这是有道理的;VDSO 将自己呈现映射到内存中的 ELF 共享对象;直到动态链接器将函数指针设置到其中,它才能使用它。)

64-bit mode is simpler: everything uses syscall for the 64-bit ABI. 64 位模式更简单:一切都使用 64 位 ABI 的syscall


Note that checking machine code before or after an instruction executes could be spoofed by code trying to hide from your tracing.请注意,在指令执行之前或之后检查机器代码可能会被试图隐藏跟踪的代码欺骗。 A 2nd thread could cross-modify the machine code bytes after you look at it, before it executes.在执行之前,第二个线程可以在查看它之后交叉修改机器代码字节。 (Perhaps have one thread store a flag, which will cause another thread to store as soon as it notices. With the right timing, this could sneak in between your ptrace fetch and when you do the next single-step.) (也许有一个线程存储一个标志,这将导致另一个线程在它注意到时立即存储。如果时机合适,这可能会在您的 ptrace 获取和您执行下一个单步操作之间潜入。)

A similar race condition is a problem in real life for PTRACE_SYSCALL used by strace (or a sandbox / syscall logging or filter tool on code that may be trying to trick it) in 64-bit mode trying to figure out whether the 32 or 64-bit ABI was invoked (because the call numbers are different).在 64 位模式下, strace使用的PTRACE_SYSCALL (或沙箱/系统调用日志记录或可能试图欺骗它的代码过滤工具)在现实生活中PTRACE_SYSCALL类似的竞争条件,试图找出是 32 位模式还是 64 位模式。位 ABI 被调用(因为调用号不同)。 Can ptrace tell if an x86 system call used the 64-bit or 32-bit ABI? ptrace 能否判断 x86 系统调用使用的是 64 位还是 32 位 ABI? (or was, until Linux kernel 5.3 added PTRACE_GET_SYSCALL_INFO ). (或者是,直到 Linux 内核 5.3 添加了PTRACE_GET_SYSCALL_INFO )。

It is possible to invoke int 0x80 in 64-bit code, even though it's basically never a good idea: What is the explanation of this x86 Hello World using 32-bit int 0x80 Linux system calls from _start?可以调用int 0x80 64位代码,即使它基本上不是一个好主意: 这是什么86的Hello World的使用从_start 32位int 0x80的Linux系统调用的解释? has some details on what happens on the kernel side of a system call.有一些关于系统调用内核端发生的事情的细节。


Again, this is only a problem if you care about programs trying to obfuscate their activity from your tracer, eg as an anti-debugging measure .同样,如果您关心程序试图从您的跟踪器中混淆它们的活动,这只是一个问题,例如作为反调试措施 Having another thread overwrite code that's executing won't happen by accident.让另一个线程覆盖正在执行的代码不会偶然发生。 But it's something to be aware of when designing debugging / tracing tools.但在设计调试/跟踪工具时需要注意这一点。

The real danger comes if this code is used as a library where someone might try to build a sandboxing system-call filter out of it.如果将此代码用作库,有人可能会尝试从中构建沙盒系统调用过滤器,那么真正的危险就来了。 eg check paths in all file-access system calls, or reject open calls that aren't opening read-only.例如,检查所有文件访问系统调用中的路径,或拒绝未以只读方式open调用。 Then evading the tracing becomes a real security problem.那么逃避追踪就成为一个真正的安全问题。 (There are much better ways to do sandboxing in general, of course.) (当然,一般来说,有更好的沙盒方法。)

Thank you so much!!非常感谢!! You were right!你是对的!

I now updated the code a little bit.我现在稍微更新了代码。 I used the /proc/(pid)/auxv file to get the address of the sysenter routine in VDSO by the key AT_SYSINFO.我使用/proc/(pid)/auxv文件通过键 AT_SYSINFO 获取 VDSO 中 sysenter 例程的地址。 Now i could detect non-legacy syscalls by comparing this address with eip.现在我可以通过将此地址与 eip 进行比较来检测非遗留系统调用。 Actually pretty easy, learned something again ;P其实很简单,又学到了一些东西;P

Here is my updated code:这是我更新的代码:

import os                       # os interaction
from struct import pack, unpack # dealing with bytes (ptrace)
import ctypes                   # support c data structures

""" ========================================================== """

# 32 bit reg process structrue
class UserRegsStruct(ctypes.Structure):
    _fields_ = [
        ("ebx", ctypes.c_ulong),
        ("ecx", ctypes.c_ulong),
        ("edx", ctypes.c_ulong),
        ("esi", ctypes.c_ulong),
        ("edi", ctypes.c_ulong),
        ("ebp", ctypes.c_ulong),
        ("eax", ctypes.c_ulong),
        ("xds", ctypes.c_ulong),
        ("xes", ctypes.c_ulong),
        ("xfs", ctypes.c_ulong),
        ("xgs", ctypes.c_ulong),
        ("orig_eax", ctypes.c_ulong),
        ("eip", ctypes.c_ulong),
        ("xcs", ctypes.c_ulong),
        ("eflags", ctypes.c_ulong),
        ("esp", ctypes.c_ulong),
        ("xss", ctypes.c_ulong),
    ]

# ptrace constants
PTRACE_TRACEME = 0
PTRACE_PEEKDATA = 2
PTRACE_SINGLESTEP = 9
PTRACE_GETREGS = 12


AT_SYSINFO = 32     # for getting the syscall entry address by the auxv (/proc/(pid)/auxv

CPU_WORD_SIZE = 4   # size of cpu word size (32 bit = 4 bytes)

# for syscalls
libc = ctypes.CDLL('libc.so.6')

# check if child (tracee) is still running
def WIFSTOPPED(status):
    return (status & 0xff) == 0x7f

# read from process memory by PTRACE_PEEKDATA
def ReadProcessMemory(pid, address, size):

    # address must be aligned!!
    offset = address % CPU_WORD_SIZE
    if offset:
        address -= offset
        word = libc.ptrace(PTRACE_PEEKDATA, pid, address, 0)
        wordbytes = pack("i", word)
        subsize = min(CPU_WORD_SIZE - offset, size)
        data = wordbytes[offset:offset + subsize]
        size -= subsize
        address += CPU_WORD_SIZE
    else:
        data = bytes(0)

    while size:
        word = libc.ptrace(PTRACE_PEEKDATA, pid, address, 0)
        wordbytes = pack("i", word)
        if size < CPU_WORD_SIZE:
            data += wordbytes[:size]
            break
        data += wordbytes
        size -= CPU_WORD_SIZE
        address += CPU_WORD_SIZE

    return data

def GetSyscallEntry(pid):
    # find the syscall entry in vdso
    # read the auxv of the child process
    fd = open("/proc/" + str(pid) + "/auxv", "rb")
    while True:
        k = fd.read(4)
        v = fd.read(4)
        if not k or not v: break
        k = unpack('i', k)[0]
        v = unpack('i', v)[0]
        #print(str(k) + ":" + str(v))
        if k == AT_SYSINFO:
            sc_entry = ctypes.c_ulong(v).value
            #print("found syscall entry: " + hex(sc_entry))
            return sc_entry

""" ========================================================== """

# extract syscall names
fp = open("/usr/include/i386-linux-gnu/asm/unistd_32.h", "r")
syscalls = [0] * 400

for line in fp:
    if "__NR_" in line:
        a = line.rstrip().split(" ")
        name = a[1].split("NR_")[1]
        number = int(a[2])
        syscalls[number] = name

# "int 80" asm instruction = (0xCD 0x80)
a0 = 0xcd
a1 = 0x80

# create child tracee
pid = os.fork()

if pid == 0:    # in tracee
    libc.ptrace(PTRACE_TRACEME, 0, 0, 0)    # make child traceable
    os.execv("/bin/ls", [":-P"])            # run test programm
else:           # in tracer
    pid, status = os.waitpid(pid, 0)
    regs = UserRegsStruct()
    sc_entry = GetSyscallEntry(pid)         # get the syscall entry address in child vdso space
    print("child pid: " + str(pid))

# catch all syscalls
while True:

    libc.ptrace(PTRACE_SINGLESTEP, pid, 0, 0)               # execute next instruction
    pid, status = os.waitpid(pid, 0)                        # wait for tracee
    libc.ptrace(PTRACE_GETREGS, pid, 0, ctypes.byref(regs)) # get register values
    data = ReadProcessMemory(pid, regs.eip, 2)              # read 2 bytes from instruction pointer address

    # now check if this is a syscall
    if data[0] == a0 and data[1] == a1:
        print("HEUREKA! SYSCALL (legacy) at " + hex(regs.eip) + ": " + syscalls[regs.eax])

    if regs.eip == sc_entry:
        print("HEUREKA! SYSCALL (sysenter) at " + hex(regs.eip) + ": " + syscalls[regs.eax])

    if WIFSTOPPED(status) == False: break # exit loop when tracee stopped

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM