Over the weekend I wrote some ARM! Specifically a program to read two numbers from argv
, parse,
add, convert back to a string, and write back to stdout with a syscall.
I thought it would be peaceful in zen sort of way (it was not) and maybe be a little educational (it was).
This turned out to be a good exercise, going over reading and writing arrays to the stack, looping, function calls and syscalls.
Quick takeaways:
- A program is really just a giant mutable array of bytes in memory and some mutable registers. A debugger is pretty useful here, and separately, it’s interesting to code is just data being executed
- Compiling and running ARM on x86 (with qemu) demystified some questions I had about emulation and VMs
- I overwrote all my stack frames trying to write an array because I got the stack direction wrong. Not recommended
- But the stack seems stupid cheap (just add a register and copy, though not sure how expensive the copy is), vs running a whole bunch of allocator code + maybe a syscall
Full gist (quite brittle at points)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Building (on x86): | |
// | |
// arm-linux-gnueabi-as -o strtoi-add-argv.o strtoi-add-argv.s | |
// arm-linux-gnueabi-ld -o strtoi-add-argv strtoi-add-argv.o | |
// | |
// Running: | |
// qemu-arm strtoi-add-argv 100 23; echo $? | |
// | |
.data | |
Input: | |
.ascii "89\0" | |
NewlineString: | |
.ascii "\n\0" | |
TestString: | |
.ascii "Some string\n\0" | |
.text | |
.global _start | |
_start: | |
ldr r4, [sp] // argc | |
ldr r5, [sp, #8] // argv[1] | |
ldr r6, [sp, #12] // argv[2] | |
sub sp, sp, #12 // allocate a string buf | |
add r1, sp, #0 // r1 point to string buf | |
sub sp, sp, #16 // allocate some scratch space to save registers | |
// strtoi argv[1], stored in r5 | |
mov r0, r5 | |
bl strtoi | |
mov r5, r0 | |
// strtoi argv[2], stored in r6 | |
mov r0, r6 | |
bl strtoi | |
mov r6, r0 | |
// r0 = argv[1] + argv[2] | |
add r0, r5, r6 | |
bl itostr | |
mov r0, r1 | |
// ldr r0, =TestString | |
bl print | |
ldr r0, =NewlineString | |
bl print | |
// Exit | |
mov r7, #1 | |
mov r0, #1 // exit code | |
svc #0 | |
/* | |
strtoi: | |
r0: pointer to string | |
return: | |
r0: int | |
clobber: | |
r0 | |
*/ | |
strtoi: | |
push {r11, lr} | |
add r11, sp, #0 | |
sub sp, sp, #20 | |
push {r1,r2,r3,r4,r5} | |
mov r3, r0 // copy addr to r3, r0 is clobbered | |
mov r1, #0 // r1 is index, set to 0 | |
mov r2, #0 // r2 is running total | |
mov r4, #10 | |
strtoi_loop: | |
// load next char, exit loop if null | |
ldrb r0, [r3, r1] | |
cmp r0, #0 | |
beq strtoi_end | |
// mult current total by 10 | |
mul r5, r2, r4 | |
mov r2, r5 | |
// parse and add next char | |
// ascii -> int: x - '0' offset (48) | |
sub r0, r0, #48 | |
add r2, r2, r0 | |
// incr index by 1 byte and continue | |
add r1, r1, #1 | |
bl strtoi_loop | |
strtoi_end: | |
mov r0, r2 // copy total to output | |
pop {r1,r2,r3,r4,r5} | |
sub sp, r11, #0 | |
pop {r11, pc} | |
/* | |
itostr: | |
r0: int | |
r1: pointer to str | |
return: | |
clobber: | |
r0 | |
*/ | |
itostr: | |
push {r11, lr} | |
add r11, sp, #0 | |
sub sp, sp, #20 | |
push {r1,r2,r3,r4,r5} | |
mov r2, #0 // r2 is index, set to 0 | |
mov r3, r0 // r3 is remaining sum | |
itostr_loop: | |
// calc r3 mod 10 | |
// use r4 as temp for quotient, r5 for mod | |
mov r8, #10 | |
udiv r4, r3, r8 | |
mls r5, r4, r8, r3 | |
// convert to ascii and add to string | |
add r6, r5, #48 | |
// mov r6, #65 | |
strb r6, [r1, r2] | |
// Incr index for next | |
add r2, r2, #1 | |
// Use remaining sum for next iteration | |
mov r3, r4 | |
cmp r3, #0 | |
beq itostr_end | |
bl itostr_loop | |
itostr_end: | |
mov r4, #0 | |
strb r4, [r1, #4] | |
mov r0, r1 | |
bl reverse | |
pop {r1,r2,r3,r4,r5} | |
sub sp, r11, #0 | |
pop {r11, pc} | |
/* | |
reverse: reverse string in place | |
r0: pointer to str | |
return: | |
clobber: | |
r0-5 | |
*/ | |
reverse: | |
push {r11, lr} | |
add r11, sp, #0 | |
sub sp, sp, #4 | |
push {r0} | |
bl strlen | |
mov r3, r0 // length | |
pop {r0} | |
mov r1, #0 // r1 is start offset | |
mov r2, r3 // r2 is end offset | |
sub r2, r2, #1 | |
reverse_loop: | |
cmp r1, r2 | |
blge reverse_end | |
ldrb r3, [r0, r1] // load start | |
ldrb r4, [r0, r2] // load start | |
// swap | |
strb r4, [r0, r1] | |
strb r3, [r0, r2] | |
add r1, r1, #1 | |
sub r2, r2, #1 | |
bl reverse_loop | |
reverse_end: | |
sub sp, r11, #0 | |
pop {r11, pc} | |
// r0: address of message | |
// clobber r0-r2 | |
print: | |
push {r11, lr} | |
add r11, sp, #0 | |
sub sp, sp, #16 | |
push {r0} | |
bl strlen | |
mov r2, r0 // length | |
pop {r0} | |
mov r1, r0 // msg | |
// Write | |
mov r7, #4 // write syscall | |
mov r0, #1 // fd | |
svc #0 | |
sub sp, r11, #0 | |
pop {r11, pc} | |
/* | |
strlen: return count of c string | |
r0: pointer to string | |
return: | |
r0: int | |
clobber: | |
r0-2 | |
*/ | |
strlen: | |
push {r11, lr} | |
add r11, sp, #0 | |
mov r2, r0 // copy addr to r2, r0 is clobbered | |
mov r1, #0 // r1 is index, set to 0 | |
strlen_loop: | |
ldrb r0, [r2, r1] // load + parse next character | |
cmp r0, #0 | |
beq strlen_end | |
add r1, r1, #1 | |
bl strlen_loop | |
strlen_end: | |
mov r0, r1 // copy result to output | |
sub sp, r11, #0 | |
pop {r11, pc} |