did nothing...

This commit is contained in:
OusmBlueNinja 2025-03-28 10:10:24 -05:00
parent b88b8bffc9
commit a86ffb54db
6 changed files with 783 additions and 565 deletions

Binary file not shown.

View File

@ -1,548 +1,241 @@
#!/usr/bin/env python3
import re import re
import os
from termcolor import colored
# Valid registers and instructions.
valid_registers = {"a", "b", "c", "d", "e", "f"}
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
class Variable: # Fixed pool of registers.
def __init__(self, name, address, var_type="int"): register_pool = ["a", "b", "c", "d", "e", "f"]
self.name = name
self.address = address
self.type = var_type
class Compiler: def allocate_register(var_name, context):
def __init__(self): """Allocate a register for a variable in the given function context."""
# 1024 bytes total: 0x000 to 0x3FF var_to_reg = context['var_to_reg']
self.data_ptr = 0x400 for reg in register_pool:
self.variables = {} if reg not in var_to_reg.values():
self.struct_definitions = {} var_to_reg[var_name] = reg
self.in_struct_def = False return reg
self.current_struct_name = None raise Exception("Out of registers!")
self.current_struct_fields = []
self.defines = {} # For #define macros
self.typedefs = {} # For typedef
self.label_counter = 0
self.block_stack = [] # For if/while blocks
def new_label(self, prefix): def compile_expr(expr, dest, temp, context):
lbl = f"{prefix}{self.label_counter}" """
self.label_counter += 1 Compile a simple expression (literals, variables, +, -) into assembly.
return lbl
def preprocess(self, filename): Parameters:
lines = self._read_file_recursive(filename) expr: string expression (e.g., "5", "x", "x + 3")
processed_lines = self._apply_defines(lines) dest: destination register for the result.
return processed_lines temp: temporary register.
context: dictionary with function context (like var_to_reg).
"""
var_to_reg = context['var_to_reg']
instructions = []
tokens = re.split(r'(\+|\-)', expr)
tokens = [t.strip() for t in tokens if t.strip() != '']
if not tokens:
return instructions
def _read_file_recursive(self, filename, included_files=None): # Process first term.
if included_files is None: token = tokens[0]
included_files = set() if token.isdigit():
instructions.append(f"ldw {dest}, {token}")
else:
if token not in var_to_reg:
raise Exception(f"Variable '{token}' not declared")
src_reg = var_to_reg[token]
if src_reg != dest:
instructions.append(f"mov {dest}, {src_reg}")
i = 1
while i < len(tokens):
op = tokens[i]
operand = tokens[i+1]
if operand.isdigit():
instructions.append(f"ldw {temp}, {operand}")
if op == "+":
instructions.append(f"add {dest}, {temp}")
elif op == "-":
instructions.append(f"sub {dest}, {temp}")
else:
raise Exception(f"Unsupported operator '{op}'")
else:
if operand not in var_to_reg:
raise Exception(f"Variable '{operand}' not declared")
operand_reg = var_to_reg[operand]
if op == "+":
instructions.append(f"add {dest}, {operand_reg}")
elif op == "-":
instructions.append(f"sub {dest}, {operand_reg}")
else:
raise Exception(f"Unsupported operator '{op}'")
i += 2
return instructions
if filename in included_files: def compile_statement(line, context):
# Prevent infinite recursion on includes """
return [] Compile a single statement from our limited C language.
Supports:
- Variable declaration: e.g., "int x = 5;"
- Assignment: e.g., "x = x + 2;"
- Function call: e.g., "foo();"
- Return statement: e.g., "return x;"
"""
var_to_reg = context['var_to_reg']
instructions = []
line = line.strip().rstrip(';')
if not line:
return instructions
included_files.add(filename) # Function call statement pattern: identifier followed by "()"
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
if m:
func_name = m.group(1)
instructions.append(f"jsr {func_name}")
return instructions
result_lines = [] # Variable declaration.
try: if line.startswith("int "):
with open(filename, "r") as f: line = line[4:].strip() # Remove "int "
for line in f: parts = line.split("=", 1)
line_stripped = line.strip() if len(parts) != 2:
raise Exception("Invalid declaration syntax.")
var_name = parts[0].strip()
expr = parts[1].strip()
reg = allocate_register(var_name, context)
# Choose a temporary register different from the destination.
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != reg), None)
instructions.extend(compile_expr(expr, reg, temp, context))
return instructions
# #include "file" # Return statement.
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped) if line.startswith("return"):
if inc_match: ret_expr = line[6:].strip() # Remove "return"
inc_file = inc_match.group(1) if ret_expr:
included_content = self._read_file_recursive(inc_file, included_files) # Convention: return value in register a.
result_lines.extend(included_content) temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
continue if temp is None:
temp = next((r for r in register_pool if r != "a"), None)
instructions.extend(compile_expr(ret_expr, "a", temp, context))
instructions.append("ret")
return instructions
# #define KEY VALUE # Assignment statement.
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped) if "=" in line:
if def_match: parts = line.split("=", 1)
key = def_match.group(1) var_name = parts[0].strip()
value = def_match.group(2) expr = parts[1].strip()
self.defines[key] = value if var_name not in var_to_reg:
continue raise Exception(f"Variable '{var_name}' not declared")
dest = var_to_reg[var_name]
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
if temp is None:
temp = next((r for r in register_pool if r != dest), None)
instructions.extend(compile_expr(expr, dest, temp, context))
return instructions
# typedef oldtype newtype; raise Exception(f"Unrecognized statement: {line}")
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
if tmatch:
oldt = tmatch.group(1)
newt = tmatch.group(2)
# Resolve oldt if it's also a typedef
oldt = self.apply_typedef(oldt)
self.typedefs[newt] = oldt
continue
result_lines.append(line) def compile_function(func_name, lines):
except FileNotFoundError as e: """
print(colored(f"{filename}:0: error: {e}", "red")) Compile a function given its name and body (as a list of lines).
Returns the assembly instructions for the function.
"""
return result_lines # Create a fresh context for the function.
context = {"var_to_reg": {}}
def _apply_defines(self, lines): instructions = []
# Function label.
token_pattern = re.compile(r'([A-Za-z0-9_]+)') instructions.append(f"{func_name}:")
processed = []
for line in lines: for line in lines:
parts = token_pattern.split(line) line = line.strip()
# parts: tokens and separators if not line or line.startswith("//"):
for i, part in enumerate(parts):
if part in self.defines:
print(f"Replaced {part} with {self.defines[part]}")
part = self.defines[part]
parts[i] = part
new_line = "".join(parts)
processed.append(new_line)
return processed
def apply_typedef(self, t):
if t in self.typedefs:
return self.typedefs[t]
return t
def allocate_bytes(self, count):
start_addr = self.data_ptr - (count - 1)
if start_addr < 0x000:
raise Exception("Out of memory!")
self.data_ptr = start_addr - 1
return start_addr
def allocate_var(self, name, var_type="int"):
var_type = self.apply_typedef(var_type)
if name in self.variables:
return self.variables[name]
if var_type.startswith("struct:"):
sname = var_type.split(":")[1]
fields = self.struct_definitions[sname]
length = len(fields) # each 1 byte
start_addr = self.allocate_bytes(length)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
else:
start_addr = self.allocate_bytes(1)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
def allocate_array(self, name, length, var_type="int"):
var_type = self.apply_typedef(var_type)
arr_start = self.allocate_bytes(length)
var_addr = self.allocate_bytes(1)
var = Variable(name, var_addr, "array")
self.variables[name] = var
return var, arr_start
def store_string(self, string_value):
string_value = string_value.replace('\\n', '\n')
length = len(string_value) + 1
start_addr = self.allocate_bytes(length)
asm = []
current_addr = start_addr
for ch in string_value:
ascii_val = ord(ch)
asm.append(f"ldw a, {ascii_val}")
asm.append(f"str a, 0x{current_addr:X}")
current_addr += 1
asm.append("ldw a, 0")
asm.append(f"str a, 0x{current_addr:X}")
return asm, start_addr
def get_struct_field_offset(self, struct_type, field_name):
sname = struct_type.split(":")[1]
fields = self.struct_definitions[sname]
for i, (fname, ftype) in enumerate(fields):
if fname == field_name:
return i
raise Exception(f"Field {field_name} not found in {struct_type}")
def parse_condition(self, cond_str):
# cond_str like "a == b" or "a != b"
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
if not m:
raise Exception("Unsupported condition: " + cond_str)
var1, op, var2 = m.groups()
return var1, op, var2
def compile_condition(self, var1, op, var2):
asm = []
v1 = self.allocate_var(var1)
v2 = self.allocate_var(var2)
asm.append(f"ldr a, 0x{v1.address:X}")
asm.append(f"ldr b, 0x{v2.address:X}")
# a = a - b
asm.append("sub a, b")
return asm, op
def extract_comment(self, line):
comment_index = line.find('//')
if comment_index != -1:
code_part = line[:comment_index]
comment_part = line[comment_index+2:].strip()
return code_part, comment_part
return line, None
def compile_line(self, code_part):
line = code_part.strip()
asm = []
if self.in_struct_def:
if line.startswith("};"):
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
return asm
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
if mfield:
fname = mfield.group(1)
ftype = "int"
self.current_struct_fields.append((fname, ftype))
return asm
# struct definition start
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
if msd:
self.in_struct_def = True
self.current_struct_name = msd.group(1)
self.current_struct_fields = []
return asm
# struct var declaration
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
if msv:
sname, varname = msv.groups()
var_type = "struct:" + sname
self.allocate_var(varname, var_type)
return asm
# if statement
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
if mif:
cond_str = mif.group(1)
var1, op, var2 = self.parse_condition(cond_str)
end_label = self.new_label("endif")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
# if '==': jump if not zero a != 0
# if '!=': jump if zero a == 0
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('if', end_label))
return asm
# while statement
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
if mwhile:
cond_str = mwhile.group(1)
var1, op, var2 = self.parse_condition(cond_str)
start_label = self.new_label("whilestart")
end_label = self.new_label("whileend")
asm.append(start_label + ":")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('while', start_label, end_label))
return asm
# end of block
if line == "}":
if not self.block_stack:
return asm
blk = self.block_stack.pop()
if blk[0] == 'if':
end_label = blk[1]
asm.append(end_label + ":")
elif blk[0] == 'while':
start_label = blk[1]
end_label = blk[2]
# jump back to start
asm.append(f"jmp {start_label}")
asm.append(end_label + ":")
return asm
# p.x = number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, fieldname, value = m.groups()
value = int(value)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# p.x = var + number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, fieldname, srcvar, number = m.groups()
number = int(number)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append(f"ldw d, {number}")
asm.append("add c, d")
asm.append("stb c, a")
return asm
# p.x = srcvar;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
if m:
varname, fieldname, srcvar = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append("stb c, a")
return asm
# x = p.x;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
if m:
dst, varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
vd = self.allocate_var(dst)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb c, a")
asm.append(f"str c, 0x{vd.address:X}")
return asm
# print_int(p.x);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x01")
return asm
# int arr[10];
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname = m.group(1)
length = int(m.group(2))
arr_var, start_addr = self.allocate_array(varname, length)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{arr_var.address:X}")
return asm
# int x = number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname = m.group(1)
value = int(m.group(2))
var = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{var.address:X}")
return asm
# int y = x + number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# char *msg = "Hello\n";
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
if m:
varname, string_val = m.groups()
v = self.allocate_var(varname, "char*")
code, start_addr = self.store_string(string_val)
asm.extend(code)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, value = m.groups()
value = int(value)
v = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = var2 + number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# var[index] = number;
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
if m:
arr, index, value = m.groups()
index = int(index)
value = int(value)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# x = arr[index];
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname, arr, index = m.groups()
index = int(index)
v = self.allocate_var(varname, "int")
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb d, a")
asm.append(f"str d, 0x{v.address:X}")
return asm
# print_char(var);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x00")
return asm
# print_char(arr[index]);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
if m:
arr, index = m.groups()
index = int(index)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x00")
return asm
# print_int(var);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x01")
return asm
# print_string(var);
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname, "char*")
asm.append("ldw d, 0")
asm.append(f"ldr b, 0x{v.address:X}")
asm.append("ldw c, 1")
asm.append("string_loop:")
asm.append("ldb a, b")
asm.append("beq a, d, string_end")
asm.append("int 0x00")
asm.append("add b, c")
asm.append("jmp string_loop")
asm.append("string_end:")
return asm
# return number;
m = re.match(r'return\s+(\d+)\s*;', line)
if m:
asm.append("int 0xFF")
return asm
# Unrecognized line or empty
return asm
def compile_c(self, c_code):
# First, parse everything to detect structs and typedef done in preprocess
all_lines = c_code.split('\n')
# struct definitions might appear outside main
for cline in all_lines:
self.compile_line(cline)
# Extract lines inside main
lines = []
in_main = False
for cline in all_lines:
cline = cline.rstrip()
if 'int main(' in cline:
in_main = True
continue continue
if in_main: stmt_instructions = compile_statement(line, context)
if cline.startswith('}'): instructions.extend(stmt_instructions)
in_main = False return instructions
break
lines.append(cline) def compile_c_to_asm(c_code):
"""
Compile a simple C program (with functions) into assembly.
The program must contain functions defined as:
int func_name() {
// statements
}
The compiled output will start at the main function (if defined).
"""
lines = c_code.splitlines()
functions = {}
current_func = None
current_lines = []
in_function = False
asm = ["main:"]
for line in lines: for line in lines:
code_part, comment_part = self.extract_comment(line) stripped = line.strip()
instructions = self.compile_line(code_part) if not stripped or stripped.startswith("//"):
if instructions: continue
for i, instr in enumerate(instructions):
if i == 0 and comment_part:
asm.append(f" {instr} ; {comment_part}")
else:
asm.append(f" {instr}")
else:
if comment_part:
asm.append(f" ; {comment_part}")
return asm # Detect function start: "int funcName() {"
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
if m:
if in_function:
raise Exception("Nested functions not supported.")
current_func = m.group(1)
in_function = True
current_lines = []
continue
# Detect end of function: "}"
if stripped == "}":
if not in_function:
raise Exception("Unexpected '}'")
functions[current_func] = compile_function(current_func, current_lines)
in_function = False
current_func = None
current_lines = []
continue
# Inside a function, add the line.
if in_function:
current_lines.append(stripped)
else:
# Outside any function; for simplicity, ignore global declarations.
continue
# Build the final assembly code.
# If "main" is defined, list it first.
asm_lines = []
if "main" in functions:
asm_lines.extend(functions["main"])
for fname, code in functions.items():
if fname != "main":
asm_lines.extend(code)
else:
for fname, code in functions.items():
asm_lines.extend(code)
return asm_lines
# Example usage.
if __name__ == "__main__": if __name__ == "__main__":
compiler = Compiler() sample_c = """
preprocessed_lines = compiler.preprocess("main.c") // sample C program with functions.
c_code = "\n".join(preprocessed_lines) int main() {
asm_code = compiler.compile_c(c_code) int x = 5;
int y = 10;
x = x + y;
foo();
with open("test.asm", "w") as out: }
for line in asm_code:
out.write(line + "\n") int foo() {
int a = 3;
int b = 7;
a = a + b;
return a;
}
"""
asm_output = compile_c_to_asm(sample_c)
for inst in asm_output:
print(inst)

View File

@ -1 +1 @@
program = [1,0,87,5,0,1004,1,0,101,5,0,1005,1,0,108,5,0,1006,1,0,99,5,0,1007,1,0,111,5,0,1008,1,0,109,5,0,1009,1,0,101,5,0,1010,1,0,33,5,0,1011,1,0,10,5,0,1012,1,0,0,5,0,1013,1,0,1004,5,0,1024,1,3,0,6,1,1024,1,2,1,21,0,1,9,0,3,91,10,0,0,3,1,2,17,75,0,10,255,0] program = [1,0,5,1,1,10,3,0,1,13,15,0,14,0,0,1,0,3,1,1,7,3,0,1,14,0,0]

2
std.h
View File

@ -1,6 +1,6 @@
#define true 1 #define true 1
#define false 0 #define false 0
#define TEST 256
struct vec2 { struct vec2 {
int x; int x;

View File

@ -1,34 +1,11 @@
main: main:
ldw a, 87 ldw a, 5
str a, 0x3EC ldw b, 10
ldw a, 101 add a, b
str a, 0x3ED jsr foo
ldw a, 108 ret
str a, 0x3EE foo:
ldw a, 99 ldw a, 3
str a, 0x3EF ldw b, 7
ldw a, 111 add a, b
str a, 0x3F0 retZ
ldw a, 109
str a, 0x3F1
ldw a, 101
str a, 0x3F2
ldw a, 33
str a, 0x3F3
ldw a, 10
str a, 0x3F4
ldw a, 0
str a, 0x3F5
ldw a, 0x3EC
str a, 0x400
ldw d, 0
ldr b, 0x400
ldw c, 1
string_loop:
ldb a, b
beq a, d, string_end
int 0x00
add b, c
jmp string_loop
string_end:
int 0xFF

548
tests/c-test02.py Normal file
View File

@ -0,0 +1,548 @@
import re
import os
from termcolor import colored
class Variable:
def __init__(self, name, address, var_type="int"):
self.name = name
self.address = address
self.type = var_type
class Compiler:
def __init__(self):
# 1024 bytes total: 0x000 to 0x3FF
self.data_ptr = 0x400
self.variables = {}
self.struct_definitions = {}
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
self.defines = {} # For #define macros
self.typedefs = {} # For typedef
self.label_counter = 0
self.block_stack = [] # For if/while blocks
def new_label(self, prefix):
lbl = f"{prefix}{self.label_counter}"
self.label_counter += 1
return lbl
def preprocess(self, filename):
lines = self._read_file_recursive(filename)
processed_lines = self._apply_defines(lines)
return processed_lines
def _read_file_recursive(self, filename, included_files=None):
if included_files is None:
included_files = set()
if filename in included_files:
# Prevent infinite recursion on includes
return []
included_files.add(filename)
result_lines = []
try:
with open(filename, "r") as f:
for line in f:
line_stripped = line.strip()
# #include "file"
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
if inc_match:
inc_file = inc_match.group(1)
included_content = self._read_file_recursive(inc_file, included_files)
result_lines.extend(included_content)
continue
# #define KEY VALUE
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
if def_match:
key = def_match.group(1)
value = def_match.group(2)
self.defines[key] = value
continue
# typedef oldtype newtype;
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
if tmatch:
oldt = tmatch.group(1)
newt = tmatch.group(2)
# Resolve oldt if it's also a typedef
oldt = self.apply_typedef(oldt)
self.typedefs[newt] = oldt
continue
result_lines.append(line)
except FileNotFoundError as e:
print(colored(f"{filename}:0: error: {e}", "red"))
return result_lines
def _apply_defines(self, lines):
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
processed = []
for line in lines:
parts = token_pattern.split(line)
# parts: tokens and separators
for i, part in enumerate(parts):
if part in self.defines:
print(f"Replaced {part} with {self.defines[part]}")
part = self.defines[part]
parts[i] = part
new_line = "".join(parts)
processed.append(new_line)
return processed
def apply_typedef(self, t):
if t in self.typedefs:
return self.typedefs[t]
return t
def allocate_bytes(self, count):
start_addr = self.data_ptr - (count - 1)
if start_addr < 0x000:
raise Exception("Out of memory!")
self.data_ptr = start_addr - 1
return start_addr
def allocate_var(self, name, var_type="int"):
var_type = self.apply_typedef(var_type)
if name in self.variables:
return self.variables[name]
if var_type.startswith("struct:"):
sname = var_type.split(":")[1]
fields = self.struct_definitions[sname]
length = len(fields) # each 1 byte
start_addr = self.allocate_bytes(length)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
else:
start_addr = self.allocate_bytes(1)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
def allocate_array(self, name, length, var_type="int"):
var_type = self.apply_typedef(var_type)
arr_start = self.allocate_bytes(length)
var_addr = self.allocate_bytes(1)
var = Variable(name, var_addr, "array")
self.variables[name] = var
return var, arr_start
def store_string(self, string_value):
string_value = string_value.replace('\\n', '\n')
length = len(string_value) + 1
start_addr = self.allocate_bytes(length)
asm = []
current_addr = start_addr
for ch in string_value:
ascii_val = ord(ch)
asm.append(f"ldw a, {ascii_val}")
asm.append(f"str a, 0x{current_addr:X}")
current_addr += 1
asm.append("ldw a, 0")
asm.append(f"str a, 0x{current_addr:X}")
return asm, start_addr
def get_struct_field_offset(self, struct_type, field_name):
sname = struct_type.split(":")[1]
fields = self.struct_definitions[sname]
for i, (fname, ftype) in enumerate(fields):
if fname == field_name:
return i
raise Exception(f"Field {field_name} not found in {struct_type}")
def parse_condition(self, cond_str):
# cond_str like "a == b" or "a != b"
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
if not m:
raise Exception("Unsupported condition: " + cond_str)
var1, op, var2 = m.groups()
return var1, op, var2
def compile_condition(self, var1, op, var2):
asm = []
v1 = self.allocate_var(var1)
v2 = self.allocate_var(var2)
asm.append(f"ldr a, 0x{v1.address:X}")
asm.append(f"ldr b, 0x{v2.address:X}")
# a = a - b
asm.append("sub a, b")
return asm, op
def extract_comment(self, line):
comment_index = line.find('//')
if comment_index != -1:
code_part = line[:comment_index]
comment_part = line[comment_index+2:].strip()
return code_part, comment_part
return line, None
def compile_line(self, code_part):
line = code_part.strip()
asm = []
if self.in_struct_def:
if line.startswith("};"):
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
return asm
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
if mfield:
fname = mfield.group(1)
ftype = "int"
self.current_struct_fields.append((fname, ftype))
return asm
# struct definition start
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
if msd:
self.in_struct_def = True
self.current_struct_name = msd.group(1)
self.current_struct_fields = []
return asm
# struct var declaration
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
if msv:
sname, varname = msv.groups()
var_type = "struct:" + sname
self.allocate_var(varname, var_type)
return asm
# if statement
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
if mif:
cond_str = mif.group(1)
var1, op, var2 = self.parse_condition(cond_str)
end_label = self.new_label("endif")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
# if '==': jump if not zero a != 0
# if '!=': jump if zero a == 0
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('if', end_label))
return asm
# while statement
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
if mwhile:
cond_str = mwhile.group(1)
var1, op, var2 = self.parse_condition(cond_str)
start_label = self.new_label("whilestart")
end_label = self.new_label("whileend")
asm.append(start_label + ":")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('while', start_label, end_label))
return asm
# end of block
if line == "}":
if not self.block_stack:
return asm
blk = self.block_stack.pop()
if blk[0] == 'if':
end_label = blk[1]
asm.append(end_label + ":")
elif blk[0] == 'while':
start_label = blk[1]
end_label = blk[2]
# jump back to start
asm.append(f"jmp {start_label}")
asm.append(end_label + ":")
return asm
# p.x = number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, fieldname, value = m.groups()
value = int(value)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# p.x = var + number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, fieldname, srcvar, number = m.groups()
number = int(number)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append(f"ldw d, {number}")
asm.append("add c, d")
asm.append("stb c, a")
return asm
# p.x = srcvar;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
if m:
varname, fieldname, srcvar = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append("stb c, a")
return asm
# x = p.x;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
if m:
dst, varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
vd = self.allocate_var(dst)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb c, a")
asm.append(f"str c, 0x{vd.address:X}")
return asm
# print_int(p.x);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x01")
return asm
# int arr[10];
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname = m.group(1)
length = int(m.group(2))
arr_var, start_addr = self.allocate_array(varname, length)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{arr_var.address:X}")
return asm
# int x = number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname = m.group(1)
value = int(m.group(2))
var = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{var.address:X}")
return asm
# int y = x + number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# char *msg = "Hello\n";
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
if m:
varname, string_val = m.groups()
v = self.allocate_var(varname, "char*")
code, start_addr = self.store_string(string_val)
asm.extend(code)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, value = m.groups()
value = int(value)
v = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = var2 + number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# var[index] = number;
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
if m:
arr, index, value = m.groups()
index = int(index)
value = int(value)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# x = arr[index];
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname, arr, index = m.groups()
index = int(index)
v = self.allocate_var(varname, "int")
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb d, a")
asm.append(f"str d, 0x{v.address:X}")
return asm
# print_char(var);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x00")
return asm
# print_char(arr[index]);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
if m:
arr, index = m.groups()
index = int(index)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x00")
return asm
# print_int(var);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x01")
return asm
# print_string(var);
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname, "char*")
asm.append("ldw d, 0")
asm.append(f"ldr b, 0x{v.address:X}")
asm.append("ldw c, 1")
asm.append("string_loop:")
asm.append("ldb a, b")
asm.append("beq a, d, string_end")
asm.append("int 0x00")
asm.append("add b, c")
asm.append("jmp string_loop")
asm.append("string_end:")
return asm
# return number;
m = re.match(r'return\s+(\d+)\s*;', line)
if m:
asm.append("int 0xFF")
return asm
# Unrecognized line or empty
return asm
def compile_c(self, c_code):
# First, parse everything to detect structs and typedef done in preprocess
all_lines = c_code.split('\n')
# struct definitions might appear outside main
for cline in all_lines:
self.compile_line(cline)
# Extract lines inside main
lines = []
in_main = False
for cline in all_lines:
cline = cline.rstrip()
if 'int main(' in cline:
in_main = True
continue
if in_main:
if cline.startswith('}'):
in_main = False
break
lines.append(cline)
asm = ["main:"]
for line in lines:
code_part, comment_part = self.extract_comment(line)
instructions = self.compile_line(code_part)
if instructions:
for i, instr in enumerate(instructions):
if i == 0 and comment_part:
asm.append(f" {instr} ; {comment_part}")
else:
asm.append(f" {instr}")
else:
if comment_part:
asm.append(f" ; {comment_part}")
return asm
if __name__ == "__main__":
compiler = Compiler()
preprocessed_lines = compiler.preprocess("main.c")
c_code = "\n".join(preprocessed_lines)
asm_code = compiler.compile_c(c_code)
with open("test.asm", "w") as out:
for line in asm_code:
out.write(line + "\n")