Skip to main content
  1. /classes/
  2. Classes, Spring 2026/
  3. CS 4310 Spring 2026: Course Site/

Lecture Notes: 01-28 C to AMD64

·1120 words·6 mins·

Actually writing assembly in practice isn’t super common, but it’s useful to have a clear idea of how C is translated to assembly and how the result is structured.

So we’re going to quickly go through one moderately long example which will show many of the moving parts.

Here’s sum-arg-lens.c:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

void
swap(char* text, int ii, int jj)
{
    char tmp = text[ii];
    text[ii] = text[jj];
    text[jj] = tmp;
}

void
reverse(char* text, int ii, int jj)
{
    if (ii >= jj) {
        return;
    }

    swap(text, ii, jj);
    reverse(text, ii + 1, jj - 1);
}

char ltoa_buf[24];

char*
ltoa(long xx, int* nn)
{
    int ii = 0;
    while (xx > 0) {
        ltoa_buf[ii] = '0' + xx % 10;
        xx = xx / 10;
        ii = ii + 1;
    }

    ltoa_buf[ii] = 0;
    *nn = ii;

    reverse(ltoa_buf, 0, ii - 1);
    return ltoa_buf;
}

int
main(int argc, char* argv[])
{
    int sum = 0;
    for (int ii = 1; ii < argc; ++ii) {
        sum += strlen(argv[ii]);
    }

    int nn;
    char* buf = ltoa(sum, &nn);

    write(1, buf, nn);
    write(1, "\n", 1);

    return 0;
}

Some notes
#

Registers: rax, rcx, rdx, rbx, rdi, rsi, rbp, rsp, r9, …, r15

Size variants: rax, eax, ax, ah/al

Calling convention:

  • arguments go in, in order: rdi, rsi, rdx, rcx, r8, r9, (stack)
  • syscall args go in: rdi, rsi, rdx, r10, r8, r9
  • return value comes out in rax
  • (second return in rdx)
  • To call a varargs, function, you must first zero %al
  • Safe registers are: %rbx, %r12-%r15

We’ll do one function at a time
#

Here’s our Makefile.

CFLAGS := -no-pie -Wl,-z,noexecstack

sal: sal.c sal.S
        gcc $(CFLAGS) -o sal sal.c sal.S

clean:
        rm sal

Here’s the final assembly code.

/*
void
swap(char* text, int ii, int jj)
{
    char tmp = text[ii];
    text[ii] = text[jj];
    text[jj] = tmp;
}
Vars:
 - text, ii, jj are %rdi, %rsi, %rdx
 - tmp is %al
 - implicit temporary is %cl
 - leaf function, so we're fine just using temp regs
*/
    .text
    .global swap
swap:
    // leaf function, can skip stack stuff

    mov (%rdi,%rsi,1), %al  // t1 = text[ii]
    mov (%rdi,%rdx,1), %cl  // t2 = text[jj]
    mov %cl, (%rdi,%rsi,1)  // text[ii] = t2
    mov %al, (%rdi,%rdx,1)  // text[jj] = t1

    ret

/*
void
reverse(char* text, int ii, int jj)
{
    if (ii >= jj) {
        return;
    }

    swap(text, ii, jj);
    reverse(text, ii + 1, jj - 1);
}
Vars:
 - We do function calls, so we need to do stack management
   register preservation.
 - But our function calls basically just have the same args
   in the same place.
 - So we'll do the caller-save pattern to preserve temp
   registers across function calls.
 - text, ii, jj are %rdi, %rsi, %rdx
 - enter sets up stack and allocates stack space
   we don't need extra stack space
*/
    .global reverse
reverse:
    enter $0, $0

    // if ii >= jj: return
    cmp %rdx, %rsi  // swap args for AT&T asm
    jge swap_done

    // prepare to call swap
    // save temp registers on stack
    push %rdi
    push %rsi
    push %rdx
    // align stack to multiple of 16 before call
    sub $8, %rsp
    call swap
    // reverse align, restore temps
    add $8, %rsp
    pop %rdx
    pop %rsi
    pop %rdi

    // prepare to call reverse
    // variables aren't used again after call,
    // so we don't need to save them
    // and we can modify ii and jj in place
    inc %rsi
    dec %rdx
    call reverse
    // we can do TCO here if we want
    
swap_done:
    leave
    ret


/*
char ltoa_buf[24];
*/

    .data
    // .global ltoa_buf
ltoa_buf:
    .zero 24

/*
char*
ltoa(long xx, int* nn)
{
    if (xx == 0) {
        ltoa_buf[0] = '0';
        ltoa_buf[1] = 0;
        *nn = 1;
        return ltoa_buf;
    }

    int ii = 0;
    while (xx > 0) {
        ltoa_buf[ii] = '0' + xx % 10;
        xx = xx / 10;
        ii = ii + 1;
    }

    ltoa_buf[ii] = 0;
    *nn = ii;

    reverse(ltoa_buf, 0, ii - 1);
    return ltoa_buf;
}
Vars:
 - No function calls until reverse.
 - So we can almost leave args and assign locals to temp regs.
 - The div instruction uses %rax and %rdx
 - xx, nn, ii are %rdi, %rsi, %rcx
 - temps are %r10 and %r11
*/

    .text
    .global ltoa
ltoa:
    enter $0, $0   

    cmp $0, %rdi
    jne ltoa_nonzero

    mov $ltoa_buf, %r10
    movb $'0', (%r10)
    inc %r10
    movb $0, (%r10)
    movl $1, (%rsi)
    jmp ltoa_done

ltoa_nonzero:

    mov $0, %rcx
ltoa_while_cond:
    cmp $0, %rdi
    jng ltoa_while_done

    //call print_long

    mov $0, %rdx
    mov %rdi, %rax
    mov $10, %r11
    div %r11
    // %rax = xx / 10
    // %rdx = xx % 10
    add $'0', %rdx
    mov $ltoa_buf, %r10
    mov %dl, (%r10, %rcx, 1)
    mov %rax, %rdi

    inc %rcx
    jmp ltoa_while_cond

ltoa_while_done:
    mov $ltoa_buf, %r10
    movb $0, (%r10, %rcx, 1)

    mov %ecx, (%rsi)

    mov $ltoa_buf, %rdi
    mov $0, %rsi
    dec %rcx
    mov %rcx, %rdx
    call reverse

ltoa_done:
    mov $ltoa_buf, %rax
    leave
    ret

/*
int
main(int argc, char* argv[])
{
    int sum = 0;
    for (int ii = 1; ii < argc; ++ii) {
        sum += strlen(argv[ii]);
    }

    int nn;
    char* buf = ltoa(sum, &nn);

    write(1, buf, nn);
    write(1, "\n", 1);

    return 0;
}
Vars:
 - argc, argv, ii, sum need to survive strlen call
 - we'll put them in safe regisers
 - argc, argv, ii, sum are %r12, %r13, %r14, %r15
 - nn gets its address taken, it goes on stack
 - nn is 0(%rsp)
 - buf just goes straight from %rax to %rsi
*/

    .data
newline: .string "\n"

    .text
    .global main
main:
    push %r12
    push %r13
    push %r14
    push %r15
    // 4 registers are divisible by 16
    // nn is 14 bytes, but we allocate 16 to keep aligned
    enter $16, $0

    mov %rdi, %r12
    mov %rsi, %r13
    mov $0, %r15

    mov $1, %r14
main_for_cond:
    cmp %r12, %r14
    jnl main_for_done

    mov (%r13, %r14, 8), %rdi
    call strlen
    add %rax, %r15

    inc %r14
    jmp main_for_cond

main_for_done:
    mov %r15, %rdi
    lea 0(%rsp), %rsi
    call ltoa

debug1:

    // first three syscall arguments are the same
    mov $1, %rdi
    mov %rax, %rsi
    mov 0(%rsp), %rdx
    // write is syscall #1
    mov $1, %rax
    syscall

    mov $1, %rdi
    mov $newline, %rsi
    mov $1, %rdx
    mov $1, %rax
    syscall

    leave
    pop %r15
    pop %r14
    pop %r13
    pop %r12
    ret

// This debug function does't follow
// the standard calling convention.

    .data
lfmt: .string "long = %ld\n"

    .text
    .global print_long
print_long:
    push %rax
    push %rcx
    push %rdx
    push %rsi
    push %rdi
    push %r8
    push %r9
    push %r10
    push %r11
    enter $0, $0

    mov %rdi, %rsi
    mov $lfmt, %rdi
    mov $0, %al
    call printf

    leave
    pop %r11
    pop %r10
    pop %r9
    pop %r8
    pop %rdi
    pop %rsi
    pop %rdx
    pop %rcx
    pop %rax
    ret
Nat Tuck
Author
Nat Tuck