As a learning experiment, I am interested in creating a hashtable in assembly (x86-64 in NASM on OSX). One of the requirements is to be able to dynamically allocate/manage memory.
After looking through many resources on how to allocate memory in assembly, most of them recommend either brk
or mmap
syscalls. I haven't learned exactly how these worked yet because I found another implementation of memory allocation in BareMetal-OS that doesn't use any system calls (copied their code below).
My question is, how are they doing this? Can you explain the relevant instructions in their assembly that perform the memory allocation, for someone without a systems programming background and who is new to assembly? The reason for wanting to understand how to implement memory allocation in assembly is to be able to implement a hashtable in assembly.
Being new to assembly (I mainly do JavaScript), and having not found any detailed resources yet on memory allocation in assembly, I don't know where to start. It may be obvious to you, but you have the background, which I don't. I have done some assembly the past week or two, so I understand the basics about mov
on registers, and the jump commands, but don't yet understand the additional stuff they are doing to implement this memory stuff. My thinking is, if they can implement memory allocation in assembly without brk
or mmap
, then I want to do it that way because then I really am manipulating the memory directly without any system layers, and it seems like you can really fine-tune stuff.
Here is their code copied from GitHub:
https://github.com/ReturnInfinity/BareMetal-OS/blob/master/os/syscalls/memory.asm
# =============================================================================
# BareMetal -- a 64-bit OS written in Assembly for x86-64 systems
# Copyright (C) 2008-2014 Return Infinity -- see LICENSE.TXT
#
# Memory functions
# =============================================================================
align 16
db 'DEBUG: MEMORY '
align 16
# -----------------------------------------------------------------------------
# os_mem_allocate -- Allocates the requested number of 2 MiB pages
# IN: RCX = Number of pages to allocate
# OUT: RAX = Starting address (Set to 0 on failure)
# This function will only allocate continuous pages
os_mem_allocate:
push rsi
push rdx
push rbx
cmp rcx, 0
je os_mem_allocate_fail # At least 1 page must be allocated
# Here, we'll load the last existing page of memory in RSI.
# RAX and RSI instructions are purposefully interleaved.
xor rax, rax
mov rsi, os_MemoryMap # First available memory block
mov eax, [os_MemAmount] # Total memory in MiB from a double-word
mov rdx, rsi # Keep os_MemoryMap unmodified for later in RDX
shr eax, 1 # Divide actual memory by 2
sub rsi, 1
std # Set direction flag to backward
add rsi, rax # RSI now points to the last page
os_mem_allocate_start: # Find a free page of memory, from the end.
mov rbx, rcx # RBX is our temporary counter
os_mem_allocate_nextpage:
lodsb
cmp rsi, rdx # We have hit the start of the memory map, no more free pages
je os_mem_allocate_fail
cmp al, 1
jne os_mem_allocate_start # Page is taken, start counting from scratch
dec rbx # We found a page! Any page left to find?
jnz os_mem_allocate_nextpage
os_mem_allocate_mark: # We have a suitable free series of pages. Allocate them.
cld # Set direction flag to forward
xor rdi, rsi # We swap rdi and rsi to keep rdi contents.
xor rsi, rdi
xor rdi, rsi
# Instructions are purposefully swapped at some places here to avoid
# direct dependencies line after line.
push rcx # Keep RCX as is for the 'rep stosb' to come
add rdi, 1
mov al, 2
mov rbx, rdi # RBX points to the starting page
rep stosb
mov rdi, rsi # Restoring RDI
sub rbx, rdx # RBX now contains the memory page number
pop rcx # Restore RCX
# Only dependency left is between the two next lines.
shl rbx, 21 # Quick multiply by 2097152 (2 MiB) to get the starting memory address
mov rax, rbx # Return the starting address in RAX
jmp os_mem_allocate_end
os_mem_allocate_fail:
cld # Set direction flag to forward
xor rax, rax # Failure so set RAX to 0 (No pages allocated)
os_mem_allocate_end:
pop rbx
pop rdx
pop rsi
ret
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# os_mem_release -- Frees the requested number of 2 MiB pages
# IN: RAX = Starting address
# RCX = Number of pages to free
# OUT: RCX = Number of pages freed
os_mem_release:
push rdi
push rcx
push rax
shr rax, 21 # Quick divide by 2097152 (2 MiB) to get the starting page number
add rax, os_MemoryMap
mov rdi, rax
mov al, 1
rep stosb
pop rax
pop rcx
pop rdi
ret
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# os_mem_get_free -- Returns the number of 2 MiB pages that are available
# IN: Nothing
# OUT: RCX = Number of free 2 MiB pages
os_mem_get_free:
push rsi
push rbx
push rax
mov rsi, os_MemoryMap
xor rcx, rcx
xor rbx, rbx
os_mem_get_free_next:
lodsb
inc rcx
cmp rcx, 65536
je os_mem_get_free_end
cmp al, 1
jne os_mem_get_free_next
inc rbx
jmp os_mem_get_free_next
os_mem_get_free_end:
mov rcx, rbx
pop rax
pop rbx
pop rsi
ret
# -----------------------------------------------------------------------------
# -----------------------------------------------------------------------------
# os_mem_copy -- Copy a number of bytes
# IN: RSI = Source address
# RDI = Destination address
# RCX = Number of bytes to copy
# OUT: Nothing, all registers preserved
os_mem_copy:
push rdi
push rsi
push rcx
rep movsb # Optimize this!
pop rcx
pop rsi
pop rdi
ret
# -----------------------------------------------------------------------------
# =============================================================================
# EOF
Also note, I have read many resources on creating hashtables in C, one of which I have copied here (which has the C code, and corresponding assembly). However, pretty much all of the C examples use malloc
, which I want to avoid. I am trying to learn assembly without depending on C at all.
Also, this resource from Quora was helpful in pointing to the places in the malloc.c
source code where brk
and mmap
are used. However, I haven't studied that yet because of discovering the BareMetal-OS memory.asm
code, which seems to allocate memory without even using those syscalls. Hence the question, how are they doing that? Can you explain the relevant instructions in their assembly that perform the memory allocation?
Update
This book helps explain pretty much everything about the internals of memory below mmap
and brk
, it's all in the area of implementing operating systems. http://www.amazon.com/Modern-Operating-Systems-4th-Edition/dp/013359162X
In order to manage memory, your code needs to "own" some memory. The problem is that on any machine that has an operating system, the operating system owns all of the memory. So your code has to ask the operating system for some memory, which it can do with brk
, or mmap
, or malloc
.
So for example, if you want to write a memory manager in assembly, and you have a machine with 4GB of memory, it would not be unreasonable to request 1GB of memory from malloc
at the start of the program, and then manage that memory any way you like.
The assembly code from the BareMetal-OS really doesn't apply to your situation, because BareMetal is the operating system, and therefore doesn't need to ask anyone for memory. It already owns all of the memory, and can manage it anyway it likes.
Following on from other comments and answers, the reason BareMetal-OS can implement allocation in this manner is because it is relying on several additional function calls not present in the code posted or in general assembly compilers such as NASM, etc. Specifically, the calls relied on in the posted code are:
os_MemoryMap
os_MemAmount
They are either BareMetal-OS Specific calls or likely calls specific to some memory manager used by the person posting the code. Without some external library, (eg libc
or a memory manager lib), you are limited to the brk
instruction. ( 45 on x86
and 12 on x86_64
) Hopefully this adds another piece to the puzzle. Good luck.
This post explains the assembly code for the os_mem_allocate
function. The basic idea is that memory is allocated in 2MB chunks. There's an array of 65536 bytes ( os_MemoryMap
) that keeps track of which chunks are free and which are used. A value of 1 is a free chunk, a value of 2 is a used chunk. The total amount of memory that could be managed is 64K * 2MB = 128GB. Since most machines don't have that much memory there's another variable ( os_MemAmount
) that indicates the memory size of the machine (in MB).
The input to the os_mem_allocate
function is a count, ie how many 2MB chunks to allocate. The function is designed to only allocate contiguous chunks. For example, if the input request is 3, then the function attempts to allocate 6MB of memory, and does this by searching the array for three 1's in a row. The return value from the function is a pointer to the allocated memory, or 0 if the request could not be satisfied.
The input count is passed in rcx
. The code verifies that the request is for a non-zero number of chunks. An input of 0 results in a return value of 0.
os_mem_allocate:
push rsi # save some registers
push rdx
push rbx
cmp rcx, 0 # Is the count 0?
je os_mem_allocate_fail # If YES, then return 0
The code does a roundabout calculation to point rsi
to the last usable byte in the 65536 byte array. The last two lines of the following snippet are the most interesting. Setting the direction flag means that subsequent lodsb
instructions will decrement rsi
. And of course pointing rsi
to the last usable byte in the array is the whole point of the calculation.
xor rax, rax
mov rsi, os_MemoryMap # Get the address of the 65536 byte array into RSI
mov eax, [os_MemAmount] # Get the memory size in MB into EAX
mov rdx, rsi # Keep os_MemoryMap in RDX for later use
shr eax, 1 # Divide by 2 because os_MemAmount is in MB, but chunks are 2MB
sub rsi, 1 # in C syntax, we're calculating &array[amount/2-1], which is the address of the last usable byte in the array
std # Set direction flag to backward
add rsi, rax # RSI now points to the last byte
Next the code has a loop that searches for N contiguous free chunks, where N is the count that was passed to the function in rcx
. The loop scans backwards through the array looking for N 1's in a row. The loop succeeds if rbx
reaches 0. Any time the loop finds a 2 in the array, it resets rbx
back to N.
os_mem_allocate_start:
mov rbx, rcx # RBX is the number of contiguous free chunks we need to find
os_mem_allocate_nextpage:
lodsb # read a byte into AL, and decrement RSI
cmp rsi, rdx # if RSI has reached the beginning of the array
je os_mem_allocate_fail # then the loop has failed
cmp al, 1 # Is the chunk free?
jne os_mem_allocate_start # If NO, we need to restart the count
dec rbx # If YES, decrement the count
jnz os_mem_allocate_nextpage # If the count reaches zero we've succeeded, otherwise continue looping
At this point the code has found enough contiguous chunks to satisfy the request, so now it marks all of the chunks as "used" by setting the bytes in the array to 2. The direction flag is set to forward so that subsequent stosb
instructions will increment rdi
.
os_mem_allocate_mark: # We have a suitable free series of chunks, mark them as used
cld # Set direction flag to forward
xor rdi, rsi # We swap RDI and RSI to keep RDI contents, but
xor rsi, rdi # more importantly we want RDI to point to the
xor rdi, rsi # location in the array where we want to write 2's
push rcx # Save RCX since 'rep stosb' will modify it
add rdi, 1 # the previous loop decremented RSI too many times
mov al, 2 # the value 2 indicates a "used" chunk
mov rbx, rdi # RBX is going to be used to calculate the return value
rep stosb # store some 2's in the array, using the count in RCX
mov rdi, rsi # Restoring RDI
Finally, the function needs to come up with a pointer to return to the caller.
sub rbx, rdx # RBX is now an index into the 65536 byte array
pop rcx # Restore RCX
shl rbx, 21 # Multiply by 2MB to convert the index to a pointer
mov rax, rbx # Return the pointer in RAX
jmp os_mem_allocate_end
The next snippet handles errors by setting the return value to 0. Clearing the direction flag is important since by convention the direction is forward.
os_mem_allocate_fail:
cld # Set direction flag to forward
xor rax, rax # Failure so set RAX to 0 (No pages allocated)
Finally, restore the registers and return the pointer.
os_mem_allocate_end:
pop rbx
pop rdx
pop rsi
ret
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.