Python-Cpu-Emulator/c-to-asm.py
2025-03-28 10:10:24 -05:00

242 lines
7.8 KiB
Python

#!/usr/bin/env python3
import re
# Valid registers and instructions.
valid_registers = {"a", "b", "c", "d", "e", "f"}
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
# Fixed pool of registers.
register_pool = ["a", "b", "c", "d", "e", "f"]
def allocate_register(var_name, context):
"""Allocate a register for a variable in the given function context."""
var_to_reg = context['var_to_reg']
for reg in register_pool:
if reg not in var_to_reg.values():
var_to_reg[var_name] = reg
return reg
raise Exception("Out of registers!")
def compile_expr(expr, dest, temp, context):
"""
Compile a simple expression (literals, variables, +, -) into assembly.
Parameters:
expr: string expression (e.g., "5", "x", "x + 3")
dest: destination register for the result.
temp: temporary register.
context: dictionary with function context (like var_to_reg).
"""
var_to_reg = context['var_to_reg']
instructions = []
tokens = re.split(r'(\+|\-)', expr)
tokens = [t.strip() for t in tokens if t.strip() != '']
if not tokens:
return instructions
# Process first term.
token = tokens[0]
if token.isdigit():
instructions.append(f"ldw {dest}, {token}")
else:
if token not in var_to_reg:
raise Exception(f"Variable '{token}' not declared")
src_reg = var_to_reg[token]
if src_reg != dest:
instructions.append(f"mov {dest}, {src_reg}")
i = 1
while i < len(tokens):
op = tokens[i]
operand = tokens[i+1]
if operand.isdigit():
instructions.append(f"ldw {temp}, {operand}")
if op == "+":
instructions.append(f"add {dest}, {temp}")
elif op == "-":
instructions.append(f"sub {dest}, {temp}")
else:
raise Exception(f"Unsupported operator '{op}'")
else:
if operand not in var_to_reg:
raise Exception(f"Variable '{operand}' not declared")
operand_reg = var_to_reg[operand]
if op == "+":
instructions.append(f"add {dest}, {operand_reg}")
elif op == "-":
instructions.append(f"sub {dest}, {operand_reg}")
else:
raise Exception(f"Unsupported operator '{op}'")
i += 2
return instructions
def compile_statement(line, context):
"""
Compile a single statement from our limited C language.
Supports:
- Variable declaration: e.g., "int x = 5;"
- Assignment: e.g., "x = x + 2;"
- Function call: e.g., "foo();"
- Return statement: e.g., "return x;"
"""
var_to_reg = context['var_to_reg']
instructions = []
line = line.strip().rstrip(';')
if not line:
return instructions
# Function call statement pattern: identifier followed by "()"
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
if m:
func_name = m.group(1)
instructions.append(f"jsr {func_name}")
return instructions
# Variable declaration.
if line.startswith("int "):
line = line[4:].strip() # Remove "int "
parts = line.split("=", 1)
if len(parts) != 2:
raise Exception("Invalid declaration syntax.")
var_name = parts[0].strip()
expr = parts[1].strip()
reg = allocate_register(var_name, context)
# Choose a temporary register different from the destination.
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != reg), None)
instructions.extend(compile_expr(expr, reg, temp, context))
return instructions
# Return statement.
if line.startswith("return"):
ret_expr = line[6:].strip() # Remove "return"
if ret_expr:
# Convention: return value in register a.
temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != "a"), None)
instructions.extend(compile_expr(ret_expr, "a", temp, context))
instructions.append("ret")
return instructions
# Assignment statement.
if "=" in line:
parts = line.split("=", 1)
var_name = parts[0].strip()
expr = parts[1].strip()
if var_name not in var_to_reg:
raise Exception(f"Variable '{var_name}' not declared")
dest = var_to_reg[var_name]
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != dest), None)
instructions.extend(compile_expr(expr, dest, temp, context))
return instructions
raise Exception(f"Unrecognized statement: {line}")
def compile_function(func_name, lines):
"""
Compile a function given its name and body (as a list of lines).
Returns the assembly instructions for the function.
"""
# Create a fresh context for the function.
context = {"var_to_reg": {}}
instructions = []
# Function label.
instructions.append(f"{func_name}:")
for line in lines:
line = line.strip()
if not line or line.startswith("//"):
continue
stmt_instructions = compile_statement(line, context)
instructions.extend(stmt_instructions)
return instructions
def compile_c_to_asm(c_code):
"""
Compile a simple C program (with functions) into assembly.
The program must contain functions defined as:
int func_name() {
// statements
}
The compiled output will start at the main function (if defined).
"""
lines = c_code.splitlines()
functions = {}
current_func = None
current_lines = []
in_function = False
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("//"):
continue
# Detect function start: "int funcName() {"
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
if m:
if in_function:
raise Exception("Nested functions not supported.")
current_func = m.group(1)
in_function = True
current_lines = []
continue
# Detect end of function: "}"
if stripped == "}":
if not in_function:
raise Exception("Unexpected '}'")
functions[current_func] = compile_function(current_func, current_lines)
in_function = False
current_func = None
current_lines = []
continue
# Inside a function, add the line.
if in_function:
current_lines.append(stripped)
else:
# Outside any function; for simplicity, ignore global declarations.
continue
# Build the final assembly code.
# If "main" is defined, list it first.
asm_lines = []
if "main" in functions:
asm_lines.extend(functions["main"])
for fname, code in functions.items():
if fname != "main":
asm_lines.extend(code)
else:
for fname, code in functions.items():
asm_lines.extend(code)
return asm_lines
# Example usage.
if __name__ == "__main__":
sample_c = """
// sample C program with functions.
int main() {
int x = 5;
int y = 10;
x = x + y;
foo();
}
int foo() {
int a = 3;
int b = 7;
a = a + b;
return a;
}
"""
asm_output = compile_c_to_asm(sample_c)
for inst in asm_output:
print(inst)