Code Generation

Helpers

Scratch Registers

/* scrach.h */
int scratch_alloc();
int scratch_free(int reg);
const char *scratch_name(int reg);

Label Generator

  • Use static variable to hold the string, so that the returned value can be reused by other functions
/* label.h */
int label_create();
static char *label_string(int label) {
    static char str[100];
    sprintf(str, "L%d", label);
    return str;
    // return strdup(str);
}

Symbols

/* symbol.h */
const char *symbol_codegen(struct symbol *s);
PARAM  2 "-24(%rbp)"
LOCAL  1 "-40(%rbp)"
GLOBAL x "x"

Generation

Expressions

  • Use scratch registers to hold the value of each expression node.
  • Post-order traversal of AST
  • Allocate regs for each of the leafs, move values into regs.
  • Generate code, and mark the reg in which the result of the node is stored.
  • Free the reg
  • Continue the traversal.
graph TB
MUL ---|Reg 0| a
MUL ---|Reg 1| 10
MOVQ a,    %rbx
MOVQ $10,  %r10
MOVQ %rbx, %rax
IMUL %r10
MOVQ %rax, %rbx
void expr_codegen(struct expr* e) {
    if (!e) return;
    switch (e->kind) {
        case EXPR_INT_LITERAL:
            e->reg = scratch_alloc();
            printf("MOVQ $%d, %s\n",
                   e->int_literal,
                   scratch_name(e->reg));
            break;
        case EXPR_NAME:
            symbol_codegen(...)
        case EXPR_ADD:
            expr_codegen(e->left);
            expr_codegen(e->right);
            printf("ADDQ %s, %s\n",
                    scratch_name(e->left->reg),
                    scratch_name(e->right->reg));
            e->reg = e->right->reg;
            scratch_free(e->left->reg);
            break;
        case EXPR_ASSIGN:
            expr_codegen(e->left);
            printf("MOVQ %s, %s\n",
                    scratch_name(e->left->reg),
                    symbol_codegen(e->right->symbol));
            e->reg = e->left->reg;
        case EXPR_INDEX:
            expr_codegen(e->left);
            expr_codegen(e->right);
            e->reg = scratch_alloc(); // Be sure to allocate after codegen children
            printf("MOVQ (%s, %s, 8), %s\n",
                    scratch_name(e->left->reg),
                    scratch_name(e->right->reg),
                    scratch_name(e->reg));
            scratch_free(e->left->reg);
            scratch_free(e->right->reg);
        case EXPR_CALL:
            // Codegen all the arguments before storing them into registers!
    }
}
  • Fully balanced tree will use more registers. layers will use registers in total.

Statement

stmt_codegen(struct stmt *s) {
    if (!s) return;
    switch (s->kind) {
        case STMT_EXPR:
            expr_codegen(s -> expr);
        case STMT_RETURN:
            expr_codegen(s->expr);
            printf("MOVQ %s, %%rax\n",
                    scratch_name(s->expr_>reg));
            printf("JMP .%s-epilogue)
        case STMT_FOR:
            expr_codegen(s->init);
            scratch_free(s->init->reg);
            t = label_create();
            b = label_create();
            printf(".%s:\n", label_name(t));
            expr_codegen(s->expr);
            printf("CMP %s, $0\n",
                    scratch_name(s->expr->reg));
            printf("JE .%s\n", label_name(b));
            scratch_free(s->expr->reg);
            stmt_codegetn(s->body);
            expr_codegent(s->next);
            scratch_free(s->next->reg);
            printf("JMP .%s\n", label_name(t));
            printf(".%s:\n", label_name(b));
        case STMT_PRINT:
    }
    scratch_free(s->expr->reg);
}
flowchart TB
i[init expr] -- top label: --> e[expr]
s[stmt] --> n[next expr] --> f[bottom label:]
n -- JMP top label --> e
e --> s
e -- JMP bottom label --> f

Declaration

  • Constant folding - evaluate expressions at compile time.
.data
x: .quad 0
f: .float 0
a: .quad 10 20 30
s: .string "hello"
s: .byte 104 101 108 108 111 0
.text
f:
.global f
PUSH %rbp
MOVQ %rsp, %rbp
# arguments
# local variables
# push callee saved registers
.f_epilogue
# pop callee saved regs
MOVQ %rbp, %rsp
POP %rbp
RET
  • For local variables, simply initialize them.
  • Creating string literals on the fly