Sat Mar 25 2023
Recently I started reading Reverse Engineering for Beginners
by Dennis Yurichev to learn x86 and arm64 assembly. Chapter 1.18.1 on conditional jumps has an interesting arm64 exercise, so I thought I’d go through it step by step. The sauce is as follows:
#include <stdio.h>
void f_signed (int a, int b)
{if (a>b)
"a>b\n");
printf (if (a==b)
"a==b\n");
printf (if (a<b)
"a<b\n");
printf (
};void f_unsigned (unsigned int a, unsigned int b)
{if (a>b)
"a>b\n");
printf (if (a==b)
"a==b\n");
printf (if (a<b)
"a<b\n");
printf (
};int main()
{1, 2);
f_signed(1, 2);
f_unsigned(return 0;
};
Dennis’s arm64 compiler (gcc Linaro 4.9) generates this “optimzed” code for the two f_*
functions:
int a, int b):
f_signed (
// W0=a, W1=bcmp w0, w1
if Greater Than (a>b)
bgt .L19 // Branch if Equal (a==b)
beq .L20 // Branch if Greater than or Equal (a>=b) (impossible here)
bge .L15 // Branch
// a<b"a<b"
adrp x0, .LC11 // add x0, x0, :lo12:.LC11
b puts.L19:
"a>b"
adrp x0, .LC9 // add x0, x0, :lo12:.LC9
b puts.L15: // impossible to get here
ret
.L20:
"a==b"
adrp x0, .LC10 // add x0, x0, :lo12:.LC10
b puts
int a, unsigned int b):
f_unsigned (unsigned sp, -48]!
stp x29, x30, [
// W0=a, W1=bcmp w0, w1
add x29, sp, 0
str x19, [sp,16]
mov w19, w0
if HIgher (a>b)
bhi .L25 // Branch cmp w19, w1
if Equal (a==b)
beq .L26 // Branch .L23:
if Carry Clear (if less than) (a<b)
bcc .L27 // Branch
// function epilogue, impossible to be heresp,16]
ldr x19, [sp], 48
ldp x29, x30, [ret
.L27:
sp,16]
ldr x19, ["a<b"
adrp x0, .LC11 // sp], 48
ldp x29, x30, [add x0, x0, :lo12:.LC11
b puts.L25:
"a>b"
adrp x0, .LC9 // str x1, [x29,40]
add x0, x0, :lo12:.LC9
bl puts
40]
ldr x1, [x29,cmp w19, w1
if Not Equal
bne .L23 // Branch .L26:
sp,16]
ldr x19, ["a==b"
adrp x0, .LC10 // sp], 48
ldp x29, x30, [add x0, x0, :lo12:.LC10
b puts
This code contains dead code[1] and redundant instructions which the author asks us to remove.
Before we start I had to add a few things to get the code to assemble and run (with aarch64-gcc 12.2.1):
.LC9:
"a>b"
.string .LC10:
"a==b"
.string .LC11:
"a<b"
.string .LC12:
"f_signed:"
.string .LC13:
"\nf_unsigned:" .string
.globl mainmain:
sp, -16]!
stp x29, x30, [mov x29, sp
"f_signed"
adrp x0, .LC12 // str x1, [x29,40]
add x0, x0, :lo12:.LC12
bl puts
mov w1, 2
mov w0, 1
bl f_signed
mov w1, 1
mov w0, 1
bl f_signed
mov w1, 1
mov w0, 2
bl f_signed
"f_unsigned"
adrp x0, .LC13 // str x1, [x29,40]
add x0, x0, :lo12:.LC13
bl puts
mov w1, 2
mov w0, 1
bl f_unsigned
mov w1, 1
mov w0, 1
bl f_unsigned
mov w1, 1
mov w0, 2
bl f_unsigned
mov w0, 0
sp], 16
ldp x29, x30, [ret
int, int)
f_signed(int, unsigned int) f_unsigned(unsigned
becomes just
f_signed f_unsigned
4
.balign f_signed:
Now we can assemble and run
qemu-aarch64:~$ gcc ex.s -o ex
qemu-aarch64:~$ ./ex
f_signed:
a<b
a==b
a>b
f_unsigned:
a<b
a==b
a>b
Let’s start easy by removing, and move the LDP (Load Pair) call restoring x29, x30 and the stack pointer into label .L25
. The bl
call in .L25
also needs to be converted to a b
call since bl
modifies the r14 return register making us return to the instruction immediately below instead of back to main. These changes results in the following diff:
--- ex1.18.1_arm64.s 2023-03-25 14:20:13.382373022 +0100 +++ tmp.s 2023-03-25 14:20:20.866261321 +0100 @@ -15,8 +15,6 @@ cmp w0, w1 bgt .L19 // Branch if Greater Than (a>b) beq .L20 // Branch if Equal (a==b) - bge .L15 // Branch if Greater than or Equal (a>=b) (impossible - here) // a<b adrp x0, .LC11 // "a<b" add x0, x0, :lo12:.LC11 @@ -25,8 +23,6 @@ adrp x0, .LC9 // "a>b" add x0, x0, :lo12:.LC9 b puts -.L15: // impossible to get here - ret .L20: adrp x0, .LC10 // "a==b" add x0, x0, :lo12:.LC10 @@ -42,12 +38,6 @@ bhi .L25 // Branch if HIgher (a>b) cmp w19, w1 beq .L26 // Branch if Equal (a==b) -.L23: - bcc .L27 // Branch if Carry Clear (if less than) (a<b) - // function epilogue, impossible to be here - ldr x19, [sp,16] - ldp x29, x30, [sp], 48 - ret .L27: ldr x19, [sp,16] adrp x0, .LC11 // "a<b" @@ -57,11 +47,9 @@ .L25: adrp x0, .LC9 // "a>b" str x1, [x29,40] + ldp x29, x30, [sp], 48 add x0, x0, :lo12:.LC9 - bl puts - ldr x1, [x29,40] - cmp w19, w1 - bne .L23 // Branch if Not Equal + b puts .L26: ldr x19, [sp,16] adrp x0, .LC10 // "a==b"Since the variables are stored in registers w0 and w1 and we don’t use the stack we can remove stp (Store Pair) call and the ldp calls, as well as the
add x29, sp, 0
which stores the stack frame in register x29:
--- tmp.s 2023-03-25 14:42:53.258266321 +0100 +++ tmp2.s 2023-03-25 14:46:42.466947431 +0100 @@ -29,10 +29,8 @@ b puts f_unsigned: - stp x29, x30, [sp, -48]! // W0=a, W1=b cmp w0, w1 - add x29, sp, 0 str x19, [sp,16] mov w19, w0 bhi .L25 // Branch if HIgher (a>b) @@ -41,19 +39,16 @@ .L27: ldr x19, [sp,16] adrp x0, .LC11 // "a<b" - ldp x29, x30, [sp], 48 add x0, x0, :lo12:.LC11 b puts .L25: adrp x0, .LC9 // "a>b" str x1, [x29,40] - ldp x29, x30, [sp], 48 add x0, x0, :lo12:.LC9 b puts .L26: ldr x19, [sp,16] adrp x0, .LC10 // "a==b" - ldp x29, x30, [sp], 48 add x0, x0, :lo12:.LC10 b puts
We don’t need to store variable a in register 19 either since we only need to do the comparison once:
--- tmp2.s 2023-03-25 14:46:42.466947431 +0100 +++ tmp3.s 2023-03-25 14:51:52.947461862 +0100 @@ -31,23 +31,17 @@ f_unsigned: // W0=a, W1=b cmp w0, w1 - str x19, [sp,16] - mov w19, w0 bhi .L25 // Branch if HIgher (a>b) - cmp w19, w1 beq .L26 // Branch if Equal (a==b) .L27: - ldr x19, [sp,16] adrp x0, .LC11 // "a<b" add x0, x0, :lo12:.LC11 b puts .L25: adrp x0, .LC9 // "a>b" - str x1, [x29,40] add x0, x0, :lo12:.LC9 b puts .L26: - ldr x19, [sp,16] adrp x0, .LC10 // "a==b" add x0, x0, :lo12:.LC10 b puts
Now we are left with the much smaller f functions:
f_signed:
cmp w0, w1 // W0=a, W1=b
if Greater Than (a>b)
bgt .L19 // Branch if Equal (a==b)
beq .L20 // Branch
// a<b"a<b"
adrp x0, .LC11 // add x0, x0, :lo12:.LC11
b puts.L19:
"a>b"
adrp x0, .LC9 // add x0, x0, :lo12:.LC9
b puts.L20:
"a==b"
adrp x0, .LC10 // add x0, x0, :lo12:.LC10
b puts
f_unsigned:
cmp w0, w1 // W0=a, W1=b
if HIgher (a>b)
bhi .L25 // Branch if Equal (a==b)
beq .L26 // Branch .L27:
"a<b"
adrp x0, .LC11 // add x0, x0, :lo12:.LC11
b puts.L25:
"a>b"
adrp x0, .LC9 // add x0, x0, :lo12:.LC9
b puts.L26:
"a==b"
adrp x0, .LC10 // add x0, x0, :lo12:.LC10
b puts
[1]: dead code is code that will never be executed