242 lines
7.8 KiB
Python
242 lines
7.8 KiB
Python
#!/usr/bin/env python3
|
|
import re
|
|
|
|
# Valid registers and instructions.
|
|
valid_registers = {"a", "b", "c", "d", "e", "f"}
|
|
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
|
|
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
|
|
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
|
|
|
|
# Fixed pool of registers.
|
|
register_pool = ["a", "b", "c", "d", "e", "f"]
|
|
|
|
def allocate_register(var_name, context):
|
|
"""Allocate a register for a variable in the given function context."""
|
|
var_to_reg = context['var_to_reg']
|
|
for reg in register_pool:
|
|
if reg not in var_to_reg.values():
|
|
var_to_reg[var_name] = reg
|
|
return reg
|
|
raise Exception("Out of registers!")
|
|
|
|
def compile_expr(expr, dest, temp, context):
|
|
"""
|
|
Compile a simple expression (literals, variables, +, -) into assembly.
|
|
|
|
Parameters:
|
|
expr: string expression (e.g., "5", "x", "x + 3")
|
|
dest: destination register for the result.
|
|
temp: temporary register.
|
|
context: dictionary with function context (like var_to_reg).
|
|
"""
|
|
var_to_reg = context['var_to_reg']
|
|
instructions = []
|
|
tokens = re.split(r'(\+|\-)', expr)
|
|
tokens = [t.strip() for t in tokens if t.strip() != '']
|
|
if not tokens:
|
|
return instructions
|
|
|
|
# Process first term.
|
|
token = tokens[0]
|
|
if token.isdigit():
|
|
instructions.append(f"ldw {dest}, {token}")
|
|
else:
|
|
if token not in var_to_reg:
|
|
raise Exception(f"Variable '{token}' not declared")
|
|
src_reg = var_to_reg[token]
|
|
if src_reg != dest:
|
|
instructions.append(f"mov {dest}, {src_reg}")
|
|
i = 1
|
|
while i < len(tokens):
|
|
op = tokens[i]
|
|
operand = tokens[i+1]
|
|
if operand.isdigit():
|
|
instructions.append(f"ldw {temp}, {operand}")
|
|
if op == "+":
|
|
instructions.append(f"add {dest}, {temp}")
|
|
elif op == "-":
|
|
instructions.append(f"sub {dest}, {temp}")
|
|
else:
|
|
raise Exception(f"Unsupported operator '{op}'")
|
|
else:
|
|
if operand not in var_to_reg:
|
|
raise Exception(f"Variable '{operand}' not declared")
|
|
operand_reg = var_to_reg[operand]
|
|
if op == "+":
|
|
instructions.append(f"add {dest}, {operand_reg}")
|
|
elif op == "-":
|
|
instructions.append(f"sub {dest}, {operand_reg}")
|
|
else:
|
|
raise Exception(f"Unsupported operator '{op}'")
|
|
i += 2
|
|
return instructions
|
|
|
|
def compile_statement(line, context):
|
|
"""
|
|
Compile a single statement from our limited C language.
|
|
Supports:
|
|
- Variable declaration: e.g., "int x = 5;"
|
|
- Assignment: e.g., "x = x + 2;"
|
|
- Function call: e.g., "foo();"
|
|
- Return statement: e.g., "return x;"
|
|
"""
|
|
var_to_reg = context['var_to_reg']
|
|
instructions = []
|
|
line = line.strip().rstrip(';')
|
|
if not line:
|
|
return instructions
|
|
|
|
# Function call statement pattern: identifier followed by "()"
|
|
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
|
|
if m:
|
|
func_name = m.group(1)
|
|
instructions.append(f"jsr {func_name}")
|
|
return instructions
|
|
|
|
# Variable declaration.
|
|
if line.startswith("int "):
|
|
line = line[4:].strip() # Remove "int "
|
|
parts = line.split("=", 1)
|
|
if len(parts) != 2:
|
|
raise Exception("Invalid declaration syntax.")
|
|
var_name = parts[0].strip()
|
|
expr = parts[1].strip()
|
|
reg = allocate_register(var_name, context)
|
|
# Choose a temporary register different from the destination.
|
|
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
|
|
if temp is None:
|
|
temp = next((r for r in register_pool if r != reg), None)
|
|
instructions.extend(compile_expr(expr, reg, temp, context))
|
|
return instructions
|
|
|
|
# Return statement.
|
|
if line.startswith("return"):
|
|
ret_expr = line[6:].strip() # Remove "return"
|
|
if ret_expr:
|
|
# Convention: return value in register a.
|
|
temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
|
|
if temp is None:
|
|
temp = next((r for r in register_pool if r != "a"), None)
|
|
instructions.extend(compile_expr(ret_expr, "a", temp, context))
|
|
instructions.append("ret")
|
|
return instructions
|
|
|
|
# Assignment statement.
|
|
if "=" in line:
|
|
parts = line.split("=", 1)
|
|
var_name = parts[0].strip()
|
|
expr = parts[1].strip()
|
|
if var_name not in var_to_reg:
|
|
raise Exception(f"Variable '{var_name}' not declared")
|
|
dest = var_to_reg[var_name]
|
|
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
|
|
if temp is None:
|
|
temp = next((r for r in register_pool if r != dest), None)
|
|
instructions.extend(compile_expr(expr, dest, temp, context))
|
|
return instructions
|
|
|
|
raise Exception(f"Unrecognized statement: {line}")
|
|
|
|
def compile_function(func_name, lines):
|
|
"""
|
|
Compile a function given its name and body (as a list of lines).
|
|
Returns the assembly instructions for the function.
|
|
"""
|
|
# Create a fresh context for the function.
|
|
context = {"var_to_reg": {}}
|
|
instructions = []
|
|
# Function label.
|
|
instructions.append(f"{func_name}:")
|
|
for line in lines:
|
|
line = line.strip()
|
|
if not line or line.startswith("//"):
|
|
continue
|
|
stmt_instructions = compile_statement(line, context)
|
|
instructions.extend(stmt_instructions)
|
|
return instructions
|
|
|
|
def compile_c_to_asm(c_code):
|
|
"""
|
|
Compile a simple C program (with functions) into assembly.
|
|
The program must contain functions defined as:
|
|
|
|
int func_name() {
|
|
// statements
|
|
}
|
|
|
|
The compiled output will start at the main function (if defined).
|
|
"""
|
|
lines = c_code.splitlines()
|
|
functions = {}
|
|
current_func = None
|
|
current_lines = []
|
|
in_function = False
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped or stripped.startswith("//"):
|
|
continue
|
|
|
|
# Detect function start: "int funcName() {"
|
|
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
|
|
if m:
|
|
if in_function:
|
|
raise Exception("Nested functions not supported.")
|
|
current_func = m.group(1)
|
|
in_function = True
|
|
current_lines = []
|
|
continue
|
|
|
|
# Detect end of function: "}"
|
|
if stripped == "}":
|
|
if not in_function:
|
|
raise Exception("Unexpected '}'")
|
|
functions[current_func] = compile_function(current_func, current_lines)
|
|
in_function = False
|
|
current_func = None
|
|
current_lines = []
|
|
continue
|
|
|
|
# Inside a function, add the line.
|
|
if in_function:
|
|
current_lines.append(stripped)
|
|
else:
|
|
# Outside any function; for simplicity, ignore global declarations.
|
|
continue
|
|
|
|
# Build the final assembly code.
|
|
# If "main" is defined, list it first.
|
|
asm_lines = []
|
|
if "main" in functions:
|
|
asm_lines.extend(functions["main"])
|
|
for fname, code in functions.items():
|
|
if fname != "main":
|
|
asm_lines.extend(code)
|
|
else:
|
|
for fname, code in functions.items():
|
|
asm_lines.extend(code)
|
|
return asm_lines
|
|
|
|
# Example usage.
|
|
if __name__ == "__main__":
|
|
sample_c = """
|
|
// sample C program with functions.
|
|
int main() {
|
|
int x = 5;
|
|
int y = 10;
|
|
x = x + y;
|
|
foo();
|
|
|
|
}
|
|
|
|
int foo() {
|
|
int a = 3;
|
|
int b = 7;
|
|
a = a + b;
|
|
return a;
|
|
}
|
|
"""
|
|
asm_output = compile_c_to_asm(sample_c)
|
|
for inst in asm_output:
|
|
print(inst)
|