did nothing...

This commit is contained in:
OusmBlueNinja 2025-03-28 10:10:24 -05:00
parent b88b8bffc9
commit a86ffb54db
6 changed files with 783 additions and 565 deletions

Binary file not shown.

View File

@ -1,548 +1,241 @@
#!/usr/bin/env python3
import re
import os
from termcolor import colored
# Valid registers and instructions.
valid_registers = {"a", "b", "c", "d", "e", "f"}
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
class Variable:
def __init__(self, name, address, var_type="int"):
self.name = name
self.address = address
self.type = var_type
# Fixed pool of registers.
register_pool = ["a", "b", "c", "d", "e", "f"]
class Compiler:
def __init__(self):
# 1024 bytes total: 0x000 to 0x3FF
self.data_ptr = 0x400
self.variables = {}
self.struct_definitions = {}
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
self.defines = {} # For #define macros
self.typedefs = {} # For typedef
self.label_counter = 0
self.block_stack = [] # For if/while blocks
def allocate_register(var_name, context):
"""Allocate a register for a variable in the given function context."""
var_to_reg = context['var_to_reg']
for reg in register_pool:
if reg not in var_to_reg.values():
var_to_reg[var_name] = reg
return reg
raise Exception("Out of registers!")
def new_label(self, prefix):
lbl = f"{prefix}{self.label_counter}"
self.label_counter += 1
return lbl
def preprocess(self, filename):
lines = self._read_file_recursive(filename)
processed_lines = self._apply_defines(lines)
return processed_lines
def _read_file_recursive(self, filename, included_files=None):
if included_files is None:
included_files = set()
if filename in included_files:
# Prevent infinite recursion on includes
return []
included_files.add(filename)
result_lines = []
try:
with open(filename, "r") as f:
for line in f:
line_stripped = line.strip()
def compile_expr(expr, dest, temp, context):
"""
Compile a simple expression (literals, variables, +, -) into assembly.
# #include "file"
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
if inc_match:
inc_file = inc_match.group(1)
included_content = self._read_file_recursive(inc_file, included_files)
result_lines.extend(included_content)
continue
# #define KEY VALUE
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
if def_match:
key = def_match.group(1)
value = def_match.group(2)
self.defines[key] = value
continue
# typedef oldtype newtype;
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
if tmatch:
oldt = tmatch.group(1)
newt = tmatch.group(2)
# Resolve oldt if it's also a typedef
oldt = self.apply_typedef(oldt)
self.typedefs[newt] = oldt
continue
result_lines.append(line)
except FileNotFoundError as e:
print(colored(f"{filename}:0: error: {e}", "red"))
Parameters:
expr: string expression (e.g., "5", "x", "x + 3")
dest: destination register for the result.
temp: temporary register.
context: dictionary with function context (like var_to_reg).
"""
var_to_reg = context['var_to_reg']
instructions = []
tokens = re.split(r'(\+|\-)', expr)
tokens = [t.strip() for t in tokens if t.strip() != '']
if not tokens:
return instructions
return result_lines
def _apply_defines(self, lines):
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
processed = []
for line in lines:
parts = token_pattern.split(line)
# parts: tokens and separators
for i, part in enumerate(parts):
if part in self.defines:
print(f"Replaced {part} with {self.defines[part]}")
part = self.defines[part]
parts[i] = part
new_line = "".join(parts)
processed.append(new_line)
return processed
def apply_typedef(self, t):
if t in self.typedefs:
return self.typedefs[t]
return t
def allocate_bytes(self, count):
start_addr = self.data_ptr - (count - 1)
if start_addr < 0x000:
raise Exception("Out of memory!")
self.data_ptr = start_addr - 1
return start_addr
def allocate_var(self, name, var_type="int"):
var_type = self.apply_typedef(var_type)
if name in self.variables:
return self.variables[name]
if var_type.startswith("struct:"):
sname = var_type.split(":")[1]
fields = self.struct_definitions[sname]
length = len(fields) # each 1 byte
start_addr = self.allocate_bytes(length)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
# Process first term.
token = tokens[0]
if token.isdigit():
instructions.append(f"ldw {dest}, {token}")
else:
if token not in var_to_reg:
raise Exception(f"Variable '{token}' not declared")
src_reg = var_to_reg[token]
if src_reg != dest:
instructions.append(f"mov {dest}, {src_reg}")
i = 1
while i < len(tokens):
op = tokens[i]
operand = tokens[i+1]
if operand.isdigit():
instructions.append(f"ldw {temp}, {operand}")
if op == "+":
instructions.append(f"add {dest}, {temp}")
elif op == "-":
instructions.append(f"sub {dest}, {temp}")
else:
raise Exception(f"Unsupported operator '{op}'")
else:
start_addr = self.allocate_bytes(1)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
def allocate_array(self, name, length, var_type="int"):
var_type = self.apply_typedef(var_type)
arr_start = self.allocate_bytes(length)
var_addr = self.allocate_bytes(1)
var = Variable(name, var_addr, "array")
self.variables[name] = var
return var, arr_start
def store_string(self, string_value):
string_value = string_value.replace('\\n', '\n')
length = len(string_value) + 1
start_addr = self.allocate_bytes(length)
asm = []
current_addr = start_addr
for ch in string_value:
ascii_val = ord(ch)
asm.append(f"ldw a, {ascii_val}")
asm.append(f"str a, 0x{current_addr:X}")
current_addr += 1
asm.append("ldw a, 0")
asm.append(f"str a, 0x{current_addr:X}")
return asm, start_addr
def get_struct_field_offset(self, struct_type, field_name):
sname = struct_type.split(":")[1]
fields = self.struct_definitions[sname]
for i, (fname, ftype) in enumerate(fields):
if fname == field_name:
return i
raise Exception(f"Field {field_name} not found in {struct_type}")
def parse_condition(self, cond_str):
# cond_str like "a == b" or "a != b"
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
if not m:
raise Exception("Unsupported condition: " + cond_str)
var1, op, var2 = m.groups()
return var1, op, var2
def compile_condition(self, var1, op, var2):
asm = []
v1 = self.allocate_var(var1)
v2 = self.allocate_var(var2)
asm.append(f"ldr a, 0x{v1.address:X}")
asm.append(f"ldr b, 0x{v2.address:X}")
# a = a - b
asm.append("sub a, b")
return asm, op
def extract_comment(self, line):
comment_index = line.find('//')
if comment_index != -1:
code_part = line[:comment_index]
comment_part = line[comment_index+2:].strip()
return code_part, comment_part
return line, None
def compile_line(self, code_part):
line = code_part.strip()
asm = []
if self.in_struct_def:
if line.startswith("};"):
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
return asm
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
if mfield:
fname = mfield.group(1)
ftype = "int"
self.current_struct_fields.append((fname, ftype))
return asm
# struct definition start
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
if msd:
self.in_struct_def = True
self.current_struct_name = msd.group(1)
self.current_struct_fields = []
return asm
# struct var declaration
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
if msv:
sname, varname = msv.groups()
var_type = "struct:" + sname
self.allocate_var(varname, var_type)
return asm
# if statement
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
if mif:
cond_str = mif.group(1)
var1, op, var2 = self.parse_condition(cond_str)
end_label = self.new_label("endif")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
# if '==': jump if not zero a != 0
# if '!=': jump if zero a == 0
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
if operand not in var_to_reg:
raise Exception(f"Variable '{operand}' not declared")
operand_reg = var_to_reg[operand]
if op == "+":
instructions.append(f"add {dest}, {operand_reg}")
elif op == "-":
instructions.append(f"sub {dest}, {operand_reg}")
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('if', end_label))
return asm
raise Exception(f"Unsupported operator '{op}'")
i += 2
return instructions
# while statement
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
if mwhile:
cond_str = mwhile.group(1)
var1, op, var2 = self.parse_condition(cond_str)
start_label = self.new_label("whilestart")
end_label = self.new_label("whileend")
asm.append(start_label + ":")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('while', start_label, end_label))
return asm
def compile_statement(line, context):
"""
Compile a single statement from our limited C language.
Supports:
- Variable declaration: e.g., "int x = 5;"
- Assignment: e.g., "x = x + 2;"
- Function call: e.g., "foo();"
- Return statement: e.g., "return x;"
"""
var_to_reg = context['var_to_reg']
instructions = []
line = line.strip().rstrip(';')
if not line:
return instructions
# end of block
if line == "}":
if not self.block_stack:
return asm
blk = self.block_stack.pop()
if blk[0] == 'if':
end_label = blk[1]
asm.append(end_label + ":")
elif blk[0] == 'while':
start_label = blk[1]
end_label = blk[2]
# jump back to start
asm.append(f"jmp {start_label}")
asm.append(end_label + ":")
return asm
# Function call statement pattern: identifier followed by "()"
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
if m:
func_name = m.group(1)
instructions.append(f"jsr {func_name}")
return instructions
# p.x = number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
# Variable declaration.
if line.startswith("int "):
line = line[4:].strip() # Remove "int "
parts = line.split("=", 1)
if len(parts) != 2:
raise Exception("Invalid declaration syntax.")
var_name = parts[0].strip()
expr = parts[1].strip()
reg = allocate_register(var_name, context)
# Choose a temporary register different from the destination.
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != reg), None)
instructions.extend(compile_expr(expr, reg, temp, context))
return instructions
# Return statement.
if line.startswith("return"):
ret_expr = line[6:].strip() # Remove "return"
if ret_expr:
# Convention: return value in register a.
temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != "a"), None)
instructions.extend(compile_expr(ret_expr, "a", temp, context))
instructions.append("ret")
return instructions
# Assignment statement.
if "=" in line:
parts = line.split("=", 1)
var_name = parts[0].strip()
expr = parts[1].strip()
if var_name not in var_to_reg:
raise Exception(f"Variable '{var_name}' not declared")
dest = var_to_reg[var_name]
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != dest), None)
instructions.extend(compile_expr(expr, dest, temp, context))
return instructions
raise Exception(f"Unrecognized statement: {line}")
def compile_function(func_name, lines):
"""
Compile a function given its name and body (as a list of lines).
Returns the assembly instructions for the function.
"""
# Create a fresh context for the function.
context = {"var_to_reg": {}}
instructions = []
# Function label.
instructions.append(f"{func_name}:")
for line in lines:
line = line.strip()
if not line or line.startswith("//"):
continue
stmt_instructions = compile_statement(line, context)
instructions.extend(stmt_instructions)
return instructions
def compile_c_to_asm(c_code):
"""
Compile a simple C program (with functions) into assembly.
The program must contain functions defined as:
int func_name() {
// statements
}
The compiled output will start at the main function (if defined).
"""
lines = c_code.splitlines()
functions = {}
current_func = None
current_lines = []
in_function = False
for line in lines:
stripped = line.strip()
if not stripped or stripped.startswith("//"):
continue
# Detect function start: "int funcName() {"
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
if m:
varname, fieldname, value = m.groups()
value = int(value)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
if in_function:
raise Exception("Nested functions not supported.")
current_func = m.group(1)
in_function = True
current_lines = []
continue
# p.x = var + number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, fieldname, srcvar, number = m.groups()
number = int(number)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append(f"ldw d, {number}")
asm.append("add c, d")
asm.append("stb c, a")
return asm
# Detect end of function: "}"
if stripped == "}":
if not in_function:
raise Exception("Unexpected '}'")
functions[current_func] = compile_function(current_func, current_lines)
in_function = False
current_func = None
current_lines = []
continue
# p.x = srcvar;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
if m:
varname, fieldname, srcvar = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append("stb c, a")
return asm
# Inside a function, add the line.
if in_function:
current_lines.append(stripped)
else:
# Outside any function; for simplicity, ignore global declarations.
continue
# x = p.x;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
if m:
dst, varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
vd = self.allocate_var(dst)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb c, a")
asm.append(f"str c, 0x{vd.address:X}")
return asm
# print_int(p.x);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x01")
return asm
# int arr[10];
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname = m.group(1)
length = int(m.group(2))
arr_var, start_addr = self.allocate_array(varname, length)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{arr_var.address:X}")
return asm
# int x = number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname = m.group(1)
value = int(m.group(2))
var = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{var.address:X}")
return asm
# int y = x + number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# char *msg = "Hello\n";
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
if m:
varname, string_val = m.groups()
v = self.allocate_var(varname, "char*")
code, start_addr = self.store_string(string_val)
asm.extend(code)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, value = m.groups()
value = int(value)
v = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = var2 + number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# var[index] = number;
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
if m:
arr, index, value = m.groups()
index = int(index)
value = int(value)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# x = arr[index];
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname, arr, index = m.groups()
index = int(index)
v = self.allocate_var(varname, "int")
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb d, a")
asm.append(f"str d, 0x{v.address:X}")
return asm
# print_char(var);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x00")
return asm
# print_char(arr[index]);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
if m:
arr, index = m.groups()
index = int(index)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x00")
return asm
# print_int(var);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x01")
return asm
# print_string(var);
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname, "char*")
asm.append("ldw d, 0")
asm.append(f"ldr b, 0x{v.address:X}")
asm.append("ldw c, 1")
asm.append("string_loop:")
asm.append("ldb a, b")
asm.append("beq a, d, string_end")
asm.append("int 0x00")
asm.append("add b, c")
asm.append("jmp string_loop")
asm.append("string_end:")
return asm
# return number;
m = re.match(r'return\s+(\d+)\s*;', line)
if m:
asm.append("int 0xFF")
return asm
# Unrecognized line or empty
return asm
def compile_c(self, c_code):
# First, parse everything to detect structs and typedef done in preprocess
all_lines = c_code.split('\n')
# struct definitions might appear outside main
for cline in all_lines:
self.compile_line(cline)
# Extract lines inside main
lines = []
in_main = False
for cline in all_lines:
cline = cline.rstrip()
if 'int main(' in cline:
in_main = True
continue
if in_main:
if cline.startswith('}'):
in_main = False
break
lines.append(cline)
asm = ["main:"]
for line in lines:
code_part, comment_part = self.extract_comment(line)
instructions = self.compile_line(code_part)
if instructions:
for i, instr in enumerate(instructions):
if i == 0 and comment_part:
asm.append(f" {instr} ; {comment_part}")
else:
asm.append(f" {instr}")
else:
if comment_part:
asm.append(f" ; {comment_part}")
return asm
# Build the final assembly code.
# If "main" is defined, list it first.
asm_lines = []
if "main" in functions:
asm_lines.extend(functions["main"])
for fname, code in functions.items():
if fname != "main":
asm_lines.extend(code)
else:
for fname, code in functions.items():
asm_lines.extend(code)
return asm_lines
# Example usage.
if __name__ == "__main__":
compiler = Compiler()
preprocessed_lines = compiler.preprocess("main.c")
c_code = "\n".join(preprocessed_lines)
asm_code = compiler.compile_c(c_code)
with open("test.asm", "w") as out:
for line in asm_code:
out.write(line + "\n")
sample_c = """
// sample C program with functions.
int main() {
int x = 5;
int y = 10;
x = x + y;
foo();
}
int foo() {
int a = 3;
int b = 7;
a = a + b;
return a;
}
"""
asm_output = compile_c_to_asm(sample_c)
for inst in asm_output:
print(inst)

View File

@ -1 +1 @@
program = [1,0,87,5,0,1004,1,0,101,5,0,1005,1,0,108,5,0,1006,1,0,99,5,0,1007,1,0,111,5,0,1008,1,0,109,5,0,1009,1,0,101,5,0,1010,1,0,33,5,0,1011,1,0,10,5,0,1012,1,0,0,5,0,1013,1,0,1004,5,0,1024,1,3,0,6,1,1024,1,2,1,21,0,1,9,0,3,91,10,0,0,3,1,2,17,75,0,10,255,0]
program = [1,0,5,1,1,10,3,0,1,13,15,0,14,0,0,1,0,3,1,1,7,3,0,1,14,0,0]

2
std.h
View File

@ -1,6 +1,6 @@
#define true 1
#define false 0
#define TEST 256
struct vec2 {
int x;

View File

@ -1,34 +1,11 @@
main:
ldw a, 87
str a, 0x3EC
ldw a, 101
str a, 0x3ED
ldw a, 108
str a, 0x3EE
ldw a, 99
str a, 0x3EF
ldw a, 111
str a, 0x3F0
ldw a, 109
str a, 0x3F1
ldw a, 101
str a, 0x3F2
ldw a, 33
str a, 0x3F3
ldw a, 10
str a, 0x3F4
ldw a, 0
str a, 0x3F5
ldw a, 0x3EC
str a, 0x400
ldw d, 0
ldr b, 0x400
ldw c, 1
string_loop:
ldb a, b
beq a, d, string_end
int 0x00
add b, c
jmp string_loop
string_end:
int 0xFF
ldw a, 5
ldw b, 10
add a, b
jsr foo
ret
foo:
ldw a, 3
ldw b, 7
add a, b
retZ

548
tests/c-test02.py Normal file
View File

@ -0,0 +1,548 @@
import re
import os
from termcolor import colored
class Variable:
def __init__(self, name, address, var_type="int"):
self.name = name
self.address = address
self.type = var_type
class Compiler:
def __init__(self):
# 1024 bytes total: 0x000 to 0x3FF
self.data_ptr = 0x400
self.variables = {}
self.struct_definitions = {}
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
self.defines = {} # For #define macros
self.typedefs = {} # For typedef
self.label_counter = 0
self.block_stack = [] # For if/while blocks
def new_label(self, prefix):
lbl = f"{prefix}{self.label_counter}"
self.label_counter += 1
return lbl
def preprocess(self, filename):
lines = self._read_file_recursive(filename)
processed_lines = self._apply_defines(lines)
return processed_lines
def _read_file_recursive(self, filename, included_files=None):
if included_files is None:
included_files = set()
if filename in included_files:
# Prevent infinite recursion on includes
return []
included_files.add(filename)
result_lines = []
try:
with open(filename, "r") as f:
for line in f:
line_stripped = line.strip()
# #include "file"
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
if inc_match:
inc_file = inc_match.group(1)
included_content = self._read_file_recursive(inc_file, included_files)
result_lines.extend(included_content)
continue
# #define KEY VALUE
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
if def_match:
key = def_match.group(1)
value = def_match.group(2)
self.defines[key] = value
continue
# typedef oldtype newtype;
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
if tmatch:
oldt = tmatch.group(1)
newt = tmatch.group(2)
# Resolve oldt if it's also a typedef
oldt = self.apply_typedef(oldt)
self.typedefs[newt] = oldt
continue
result_lines.append(line)
except FileNotFoundError as e:
print(colored(f"{filename}:0: error: {e}", "red"))
return result_lines
def _apply_defines(self, lines):
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
processed = []
for line in lines:
parts = token_pattern.split(line)
# parts: tokens and separators
for i, part in enumerate(parts):
if part in self.defines:
print(f"Replaced {part} with {self.defines[part]}")
part = self.defines[part]
parts[i] = part
new_line = "".join(parts)
processed.append(new_line)
return processed
def apply_typedef(self, t):
if t in self.typedefs:
return self.typedefs[t]
return t
def allocate_bytes(self, count):
start_addr = self.data_ptr - (count - 1)
if start_addr < 0x000:
raise Exception("Out of memory!")
self.data_ptr = start_addr - 1
return start_addr
def allocate_var(self, name, var_type="int"):
var_type = self.apply_typedef(var_type)
if name in self.variables:
return self.variables[name]
if var_type.startswith("struct:"):
sname = var_type.split(":")[1]
fields = self.struct_definitions[sname]
length = len(fields) # each 1 byte
start_addr = self.allocate_bytes(length)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
else:
start_addr = self.allocate_bytes(1)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
def allocate_array(self, name, length, var_type="int"):
var_type = self.apply_typedef(var_type)
arr_start = self.allocate_bytes(length)
var_addr = self.allocate_bytes(1)
var = Variable(name, var_addr, "array")
self.variables[name] = var
return var, arr_start
def store_string(self, string_value):
string_value = string_value.replace('\\n', '\n')
length = len(string_value) + 1
start_addr = self.allocate_bytes(length)
asm = []
current_addr = start_addr
for ch in string_value:
ascii_val = ord(ch)
asm.append(f"ldw a, {ascii_val}")
asm.append(f"str a, 0x{current_addr:X}")
current_addr += 1
asm.append("ldw a, 0")
asm.append(f"str a, 0x{current_addr:X}")
return asm, start_addr
def get_struct_field_offset(self, struct_type, field_name):
sname = struct_type.split(":")[1]
fields = self.struct_definitions[sname]
for i, (fname, ftype) in enumerate(fields):
if fname == field_name:
return i
raise Exception(f"Field {field_name} not found in {struct_type}")
def parse_condition(self, cond_str):
# cond_str like "a == b" or "a != b"
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
if not m:
raise Exception("Unsupported condition: " + cond_str)
var1, op, var2 = m.groups()
return var1, op, var2
def compile_condition(self, var1, op, var2):
asm = []
v1 = self.allocate_var(var1)
v2 = self.allocate_var(var2)
asm.append(f"ldr a, 0x{v1.address:X}")
asm.append(f"ldr b, 0x{v2.address:X}")
# a = a - b
asm.append("sub a, b")
return asm, op
def extract_comment(self, line):
comment_index = line.find('//')
if comment_index != -1:
code_part = line[:comment_index]
comment_part = line[comment_index+2:].strip()
return code_part, comment_part
return line, None
def compile_line(self, code_part):
line = code_part.strip()
asm = []
if self.in_struct_def:
if line.startswith("};"):
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
return asm
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
if mfield:
fname = mfield.group(1)
ftype = "int"
self.current_struct_fields.append((fname, ftype))
return asm
# struct definition start
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
if msd:
self.in_struct_def = True
self.current_struct_name = msd.group(1)
self.current_struct_fields = []
return asm
# struct var declaration
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
if msv:
sname, varname = msv.groups()
var_type = "struct:" + sname
self.allocate_var(varname, var_type)
return asm
# if statement
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
if mif:
cond_str = mif.group(1)
var1, op, var2 = self.parse_condition(cond_str)
end_label = self.new_label("endif")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
# if '==': jump if not zero a != 0
# if '!=': jump if zero a == 0
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('if', end_label))
return asm
# while statement
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
if mwhile:
cond_str = mwhile.group(1)
var1, op, var2 = self.parse_condition(cond_str)
start_label = self.new_label("whilestart")
end_label = self.new_label("whileend")
asm.append(start_label + ":")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('while', start_label, end_label))
return asm
# end of block
if line == "}":
if not self.block_stack:
return asm
blk = self.block_stack.pop()
if blk[0] == 'if':
end_label = blk[1]
asm.append(end_label + ":")
elif blk[0] == 'while':
start_label = blk[1]
end_label = blk[2]
# jump back to start
asm.append(f"jmp {start_label}")
asm.append(end_label + ":")
return asm
# p.x = number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, fieldname, value = m.groups()
value = int(value)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# p.x = var + number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, fieldname, srcvar, number = m.groups()
number = int(number)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append(f"ldw d, {number}")
asm.append("add c, d")
asm.append("stb c, a")
return asm
# p.x = srcvar;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
if m:
varname, fieldname, srcvar = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append("stb c, a")
return asm
# x = p.x;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
if m:
dst, varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
vd = self.allocate_var(dst)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb c, a")
asm.append(f"str c, 0x{vd.address:X}")
return asm
# print_int(p.x);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x01")
return asm
# int arr[10];
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname = m.group(1)
length = int(m.group(2))
arr_var, start_addr = self.allocate_array(varname, length)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{arr_var.address:X}")
return asm
# int x = number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname = m.group(1)
value = int(m.group(2))
var = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{var.address:X}")
return asm
# int y = x + number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# char *msg = "Hello\n";
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
if m:
varname, string_val = m.groups()
v = self.allocate_var(varname, "char*")
code, start_addr = self.store_string(string_val)
asm.extend(code)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, value = m.groups()
value = int(value)
v = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = var2 + number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# var[index] = number;
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
if m:
arr, index, value = m.groups()
index = int(index)
value = int(value)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# x = arr[index];
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname, arr, index = m.groups()
index = int(index)
v = self.allocate_var(varname, "int")
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb d, a")
asm.append(f"str d, 0x{v.address:X}")
return asm
# print_char(var);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x00")
return asm
# print_char(arr[index]);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
if m:
arr, index = m.groups()
index = int(index)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x00")
return asm
# print_int(var);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x01")
return asm
# print_string(var);
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname, "char*")
asm.append("ldw d, 0")
asm.append(f"ldr b, 0x{v.address:X}")
asm.append("ldw c, 1")
asm.append("string_loop:")
asm.append("ldb a, b")
asm.append("beq a, d, string_end")
asm.append("int 0x00")
asm.append("add b, c")
asm.append("jmp string_loop")
asm.append("string_end:")
return asm
# return number;
m = re.match(r'return\s+(\d+)\s*;', line)
if m:
asm.append("int 0xFF")
return asm
# Unrecognized line or empty
return asm
def compile_c(self, c_code):
# First, parse everything to detect structs and typedef done in preprocess
all_lines = c_code.split('\n')
# struct definitions might appear outside main
for cline in all_lines:
self.compile_line(cline)
# Extract lines inside main
lines = []
in_main = False
for cline in all_lines:
cline = cline.rstrip()
if 'int main(' in cline:
in_main = True
continue
if in_main:
if cline.startswith('}'):
in_main = False
break
lines.append(cline)
asm = ["main:"]
for line in lines:
code_part, comment_part = self.extract_comment(line)
instructions = self.compile_line(code_part)
if instructions:
for i, instr in enumerate(instructions):
if i == 0 and comment_part:
asm.append(f" {instr} ; {comment_part}")
else:
asm.append(f" {instr}")
else:
if comment_part:
asm.append(f" ; {comment_part}")
return asm
if __name__ == "__main__":
compiler = Compiler()
preprocessed_lines = compiler.preprocess("main.c")
c_code = "\n".join(preprocessed_lines)
asm_code = compiler.compile_c(c_code)
with open("test.asm", "w") as out:
for line in asm_code:
out.write(line + "\n")