How do I fix “Segmentation fault (core dumped)” in assembly?

I am using ARMv8 for my coding. I am almost done my code, except I have run into a problem. When I run the code, I get the "Segmentation fault (core dumped)" Error. The issue arises because when the line commented with //THIS ONE A executes, it stores a very large number into x24, when it should be storing a number that is between 0-50. Because of this, at the lines labelled //THIS ONE B and C, the code tries to point to somewhere like x29 + 2^40 or so, instead of x29 + (0-50).

I have tried going through the code to find where the wrong number is stored into the i_s pointer, but I cannot find it. I also tried the code with changing x24 in lines B and C to x21, and it runs perfectly fine.

The part that confuses me the most is the fact that before this issue in the code, I have an almost identical line of code right after the testOut tag. The only difference is where it works, i store into x21, and where it doesn't work, it stores into x24. And there is no change to the value that i_s points to from the working load to the broken load.

NOTE: The lines in question are near the bottom of the code

define(SIZE, 50)
define(v_base_r, x19)       //stack location of index 0
define(ind_r, x20)      //index of array

i_size = 4
j_size = 4
min_size = 4
temp_size = 4
v_size = 50*4
alloc = -(16+i_size+j_size+min_size+temp_size+v_size) & -16
dealloc = -alloc

i_s = 16
j_s = 20
min_s = 24
temp_s = 28
v_s = 32

fmt1:       .string "v[%d]: %d\n"       //i, v[i]

fmt2:       .string "\nSorted array:\n"

fmt3:       .string "v[%d]: %d\n"       //i, v[i]
    .balign 4

    .global main

main:       stp x29, x30, [sp, alloc]!

    mov x29, sp

    add v_base_r, x29, v_s
mov ind_r, 0            //initialize index to 0
        b inittest
        bl rand
        and w0, w0, 0xFF
        str w0, [v_base_r, ind_r, lsl 2]//stores current rand()&&0xFF into v[ind_r]

        adrp x0, fmt1
        add x0, x0, :lo12:fmt1
        mov x1, ind_r
        ldr w2, [v_base_r, ind_r, lsl 2]

        bl printf           //Printing "v[index]: (value at index)"

        add ind_r, ind_r, 1     //repeats for index + 1

        cmp ind_r, SIZE
        b.lt init
mov x21, 0

    str x21, [x29, i_s]     //initialize i to 0

    b testOut
        str x21, [x29, min_s]       //x21 is still holding the value of i from testOut
        add x22, x21, 1
        str x22, [x29, j_s]     //initialize j as j = i+1

    b testIn
        ldr x21, [x29, min_s]
        ldr w23, [v_base_r, x22, lsl 2] //x22 still stores value of j from testIn
        ldr w24, [v_base_r, x21, lsl 2] //x23 and x24 store values in
                            //v[j] and v[min], respectively
        cmp w23, w24
        b.ge keep

    str x22, [x29, min_s]       //value of j (x22) is stored into min
        add x22, x22, 1         //x22 still stores j, so we can increment
        str x22, [x29, j_s]     //and then store as new j for next iteration
        ldr x22, [x29, j_s]
        cmp x22, SIZE           //j < SIZE

    b.lt forIn

    ldr x21, [x29, min_s]

    **ldr x24, [x29, i_s]**        //THIS ONE A

    ldr w23, [v_base_r, x21, lsl 2]
    str w23, [x29, temp_s]      //temp = v[min]

    **ldr w23, [v_base_r, x24, lsl 2]**        //THIS ONE B

    str w23, [v_base_r, x21, lsl 2] //v[min] = v[i]
    ldr w23, [x29, temp_s]

    **str w23, [v_base_r, x24, lsl 2]   //v[i] = temp**        //THIS ONE C

    add x22, x22, 1         //x22 still stores i, so we can increment
    str x22, [x29, i_s]     //and then store as new i for next iteration
        ldr x21, [x29, i_s]
        cmp x21, SIZE-1         //i < SIZE-1
        b.lt forOut

This isn't the best way to fix my issue, but it worked for me. So I guess that when i gave space to each variable I was storing on the stack, I allocated 4 per each integer; hence the following code:

i_size = 4
j_size = 4
min_size = 4
temp_size = 4
v_size = 50*4
alloc = -(16+i_size+j_size+min_size+temp_size+v_size) & -16
dealloc = -alloc

i_s = 16
j_s = 20
min_s = 24
temp_s = 28
v_s = 32

In between the two reads from i_s, I increment j_s by 1, in a loop that runs 50 times. When I examine i_s using x/4x $x29+16 the second hex code was incremented by 1 on each iteration. It incremented every time that the code executed the instruction str x22, [x29, j_s] so this made me realize what was wrong.

What ended up fixing my issue in the end, was that I changed the beginning block of code to this:

i_size = 8
j_size = 8
min_size = 8
temp_size = 8
v_size = 50*4
alloc = -(16+i_size+j_size+min_size+temp_size+v_size) & -16
dealloc = -alloc

i_s = 16
j_s = 24
min_s = 32
temp_s = 40
v_s = 48

so I ended up changing the size allocated to each integer from 4 to 8. Overkill, but I'm not sure what else to do to fix it.

Hello I was getting the same error of Segmentation fault (core dumped) when running the code below:


section .text
  global _start
_start: ;tells linker the entry point
  mov edx, len ;message length
  mov ecx, msg ;message wo write
  mov ebx, 1 ;file descriptor (stdout)
  mov eax, 4 ;system call number(sys_write) 
  int 0*80 ;call kernel

  mov edx, 9 ;message length
  mov ecx, s2 ;message to write
  mov ebx, 1 ;file descriptor(stdout)
  mov eax, 4 ;system call number (sys_write)
  int 0*80 ;call kernel

  mov eax, 1 ;system call number (sys_Exit)
  int 0*80 ;call kernel  

section .data
msg db 'Dispay 9 stars', 0xa ;a message
len equ $ - msg ;length of message
s2 times 9 db 'x'


Solution : I changed the ;call kernel from int 0*80 to int 80h and this cleared my error.

