From a86ffb54dbbe13f6777a8f2a54adc9c50ee7e8d8 Mon Sep 17 00:00:00 2001 From: OusmBlueNinja <89956790+OusmBlueNinja@users.noreply.github.com> Date: Fri, 28 Mar 2025 10:10:24 -0500 Subject: [PATCH] did nothing... --- __pycache__/program.cpython-311.pyc | Bin 0 -> 336 bytes c-to-asm.py | 753 ++++++++-------------------- program.py | 2 +- std.h | 2 +- test.asm | 43 +- tests/c-test02.py | 548 ++++++++++++++++++++ 6 files changed, 783 insertions(+), 565 deletions(-) create mode 100644 __pycache__/program.cpython-311.pyc create mode 100644 tests/c-test02.py diff --git a/__pycache__/program.cpython-311.pyc b/__pycache__/program.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..16c15f6ab5836f401548955bfddc462b616c806a GIT binary patch literal 336 zcmZ3^%ge<81mAZ)OLqg(k3k$5V1hC}^8p#t8PXY27#1-`F{CgCGiWktO21?T3cm!Y zdC3YSia^T2#7mHoFPTAHFaeTy$qOR*VPY>qMnYuaN+4V|kOaagb`TdVhET_eqSsH8 z@fLeQQGR++V(vn&Ma4121*v(-G5&d}E=8GTsWC43$)&lec_qa$ z?wKVSrAaXXl_eSZdAiO8rMj-Ur8$Wu`9(2M!}JO&e{tC4=BJeAq}mmMoDTAEu?Ud( Vz|6?Vc!SIB0vI*0gJ2QJhXBwyNV5O{ literal 0 HcmV?d00001 diff --git a/c-to-asm.py b/c-to-asm.py index 70e98b5..2ec3156 100644 --- a/c-to-asm.py +++ b/c-to-asm.py @@ -1,548 +1,241 @@ +#!/usr/bin/env python3 import re -import os -from termcolor import colored +# Valid registers and instructions. +valid_registers = {"a", "b", "c", "d", "e", "f"} +valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int", + "push", "pop", "jsr", "ret", "xor", "and", "jmp", + "mul", "div", "bne", "beq", "blt", "ldb", "stb"} -class Variable: - def __init__(self, name, address, var_type="int"): - self.name = name - self.address = address - self.type = var_type +# Fixed pool of registers. +register_pool = ["a", "b", "c", "d", "e", "f"] -class Compiler: - def __init__(self): - # 1024 bytes total: 0x000 to 0x3FF - self.data_ptr = 0x400 - self.variables = {} - self.struct_definitions = {} - self.in_struct_def = False - self.current_struct_name = None - self.current_struct_fields = [] - self.defines = {} # For #define macros - self.typedefs = {} # For typedef - self.label_counter = 0 - self.block_stack = [] # For if/while blocks +def allocate_register(var_name, context): + """Allocate a register for a variable in the given function context.""" + var_to_reg = context['var_to_reg'] + for reg in register_pool: + if reg not in var_to_reg.values(): + var_to_reg[var_name] = reg + return reg + raise Exception("Out of registers!") - def new_label(self, prefix): - lbl = f"{prefix}{self.label_counter}" - self.label_counter += 1 - return lbl - - def preprocess(self, filename): - lines = self._read_file_recursive(filename) - processed_lines = self._apply_defines(lines) - return processed_lines - - def _read_file_recursive(self, filename, included_files=None): - if included_files is None: - included_files = set() - - if filename in included_files: - # Prevent infinite recursion on includes - return [] - - included_files.add(filename) - - result_lines = [] - try: - with open(filename, "r") as f: - for line in f: - line_stripped = line.strip() +def compile_expr(expr, dest, temp, context): + """ + Compile a simple expression (literals, variables, +, -) into assembly. - # #include "file" - inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped) - if inc_match: - inc_file = inc_match.group(1) - included_content = self._read_file_recursive(inc_file, included_files) - result_lines.extend(included_content) - continue - - # #define KEY VALUE - def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped) - if def_match: - key = def_match.group(1) - value = def_match.group(2) - self.defines[key] = value - continue - - # typedef oldtype newtype; - tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped) - if tmatch: - oldt = tmatch.group(1) - newt = tmatch.group(2) - # Resolve oldt if it's also a typedef - oldt = self.apply_typedef(oldt) - self.typedefs[newt] = oldt - continue - - result_lines.append(line) - except FileNotFoundError as e: - print(colored(f"{filename}:0: error: {e}", "red")) - + Parameters: + expr: string expression (e.g., "5", "x", "x + 3") + dest: destination register for the result. + temp: temporary register. + context: dictionary with function context (like var_to_reg). + """ + var_to_reg = context['var_to_reg'] + instructions = [] + tokens = re.split(r'(\+|\-)', expr) + tokens = [t.strip() for t in tokens if t.strip() != ''] + if not tokens: + return instructions - return result_lines - - def _apply_defines(self, lines): - - token_pattern = re.compile(r'([A-Za-z0-9_]+)') - - processed = [] - for line in lines: - parts = token_pattern.split(line) - # parts: tokens and separators - for i, part in enumerate(parts): - if part in self.defines: - print(f"Replaced {part} with {self.defines[part]}") - part = self.defines[part] - parts[i] = part - new_line = "".join(parts) - processed.append(new_line) - return processed - - def apply_typedef(self, t): - if t in self.typedefs: - return self.typedefs[t] - return t - - def allocate_bytes(self, count): - start_addr = self.data_ptr - (count - 1) - if start_addr < 0x000: - raise Exception("Out of memory!") - self.data_ptr = start_addr - 1 - return start_addr - - def allocate_var(self, name, var_type="int"): - var_type = self.apply_typedef(var_type) - if name in self.variables: - return self.variables[name] - - if var_type.startswith("struct:"): - sname = var_type.split(":")[1] - fields = self.struct_definitions[sname] - length = len(fields) # each 1 byte - start_addr = self.allocate_bytes(length) - var = Variable(name, start_addr, var_type) - self.variables[name] = var - return var + # Process first term. + token = tokens[0] + if token.isdigit(): + instructions.append(f"ldw {dest}, {token}") + else: + if token not in var_to_reg: + raise Exception(f"Variable '{token}' not declared") + src_reg = var_to_reg[token] + if src_reg != dest: + instructions.append(f"mov {dest}, {src_reg}") + i = 1 + while i < len(tokens): + op = tokens[i] + operand = tokens[i+1] + if operand.isdigit(): + instructions.append(f"ldw {temp}, {operand}") + if op == "+": + instructions.append(f"add {dest}, {temp}") + elif op == "-": + instructions.append(f"sub {dest}, {temp}") + else: + raise Exception(f"Unsupported operator '{op}'") else: - start_addr = self.allocate_bytes(1) - var = Variable(name, start_addr, var_type) - self.variables[name] = var - return var - - def allocate_array(self, name, length, var_type="int"): - var_type = self.apply_typedef(var_type) - arr_start = self.allocate_bytes(length) - var_addr = self.allocate_bytes(1) - var = Variable(name, var_addr, "array") - self.variables[name] = var - return var, arr_start - - def store_string(self, string_value): - string_value = string_value.replace('\\n', '\n') - length = len(string_value) + 1 - start_addr = self.allocate_bytes(length) - asm = [] - current_addr = start_addr - for ch in string_value: - ascii_val = ord(ch) - asm.append(f"ldw a, {ascii_val}") - asm.append(f"str a, 0x{current_addr:X}") - current_addr += 1 - asm.append("ldw a, 0") - asm.append(f"str a, 0x{current_addr:X}") - return asm, start_addr - - def get_struct_field_offset(self, struct_type, field_name): - sname = struct_type.split(":")[1] - fields = self.struct_definitions[sname] - for i, (fname, ftype) in enumerate(fields): - if fname == field_name: - return i - raise Exception(f"Field {field_name} not found in {struct_type}") - - def parse_condition(self, cond_str): - # cond_str like "a == b" or "a != b" - m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip()) - if not m: - raise Exception("Unsupported condition: " + cond_str) - var1, op, var2 = m.groups() - return var1, op, var2 - - def compile_condition(self, var1, op, var2): - asm = [] - v1 = self.allocate_var(var1) - v2 = self.allocate_var(var2) - asm.append(f"ldr a, 0x{v1.address:X}") - asm.append(f"ldr b, 0x{v2.address:X}") - # a = a - b - asm.append("sub a, b") - return asm, op - - def extract_comment(self, line): - comment_index = line.find('//') - if comment_index != -1: - code_part = line[:comment_index] - comment_part = line[comment_index+2:].strip() - return code_part, comment_part - return line, None - - def compile_line(self, code_part): - line = code_part.strip() - asm = [] - - if self.in_struct_def: - if line.startswith("};"): - self.struct_definitions[self.current_struct_name] = self.current_struct_fields - self.in_struct_def = False - self.current_struct_name = None - self.current_struct_fields = [] - return asm - mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line) - if mfield: - fname = mfield.group(1) - ftype = "int" - self.current_struct_fields.append((fname, ftype)) - return asm - - # struct definition start - msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line) - if msd: - self.in_struct_def = True - self.current_struct_name = msd.group(1) - self.current_struct_fields = [] - return asm - - # struct var declaration - msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line) - if msv: - sname, varname = msv.groups() - var_type = "struct:" + sname - self.allocate_var(varname, var_type) - return asm - - # if statement - mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line) - if mif: - cond_str = mif.group(1) - var1, op, var2 = self.parse_condition(cond_str) - end_label = self.new_label("endif") - cond_code, cmp_op = self.compile_condition(var1, op, var2) - asm.extend(cond_code) - # if '==': jump if not zero a != 0 - # if '!=': jump if zero a == 0 - if cmp_op == '==': - asm.append("bne a, 0, " + end_label) + if operand not in var_to_reg: + raise Exception(f"Variable '{operand}' not declared") + operand_reg = var_to_reg[operand] + if op == "+": + instructions.append(f"add {dest}, {operand_reg}") + elif op == "-": + instructions.append(f"sub {dest}, {operand_reg}") else: - asm.append("beq a, 0, " + end_label) - self.block_stack.append(('if', end_label)) - return asm + raise Exception(f"Unsupported operator '{op}'") + i += 2 + return instructions - # while statement - mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line) - if mwhile: - cond_str = mwhile.group(1) - var1, op, var2 = self.parse_condition(cond_str) - start_label = self.new_label("whilestart") - end_label = self.new_label("whileend") - asm.append(start_label + ":") - cond_code, cmp_op = self.compile_condition(var1, op, var2) - asm.extend(cond_code) - if cmp_op == '==': - asm.append("bne a, 0, " + end_label) - else: - asm.append("beq a, 0, " + end_label) - self.block_stack.append(('while', start_label, end_label)) - return asm +def compile_statement(line, context): + """ + Compile a single statement from our limited C language. + Supports: + - Variable declaration: e.g., "int x = 5;" + - Assignment: e.g., "x = x + 2;" + - Function call: e.g., "foo();" + - Return statement: e.g., "return x;" + """ + var_to_reg = context['var_to_reg'] + instructions = [] + line = line.strip().rstrip(';') + if not line: + return instructions - # end of block - if line == "}": - if not self.block_stack: - return asm - blk = self.block_stack.pop() - if blk[0] == 'if': - end_label = blk[1] - asm.append(end_label + ":") - elif blk[0] == 'while': - start_label = blk[1] - end_label = blk[2] - # jump back to start - asm.append(f"jmp {start_label}") - asm.append(end_label + ":") - return asm + # Function call statement pattern: identifier followed by "()" + m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line) + if m: + func_name = m.group(1) + instructions.append(f"jsr {func_name}") + return instructions - # p.x = number; - m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) + # Variable declaration. + if line.startswith("int "): + line = line[4:].strip() # Remove "int " + parts = line.split("=", 1) + if len(parts) != 2: + raise Exception("Invalid declaration syntax.") + var_name = parts[0].strip() + expr = parts[1].strip() + reg = allocate_register(var_name, context) + # Choose a temporary register different from the destination. + temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None) + if temp is None: + temp = next((r for r in register_pool if r != reg), None) + instructions.extend(compile_expr(expr, reg, temp, context)) + return instructions + + # Return statement. + if line.startswith("return"): + ret_expr = line[6:].strip() # Remove "return" + if ret_expr: + # Convention: return value in register a. + temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None) + if temp is None: + temp = next((r for r in register_pool if r != "a"), None) + instructions.extend(compile_expr(ret_expr, "a", temp, context)) + instructions.append("ret") + return instructions + + # Assignment statement. + if "=" in line: + parts = line.split("=", 1) + var_name = parts[0].strip() + expr = parts[1].strip() + if var_name not in var_to_reg: + raise Exception(f"Variable '{var_name}' not declared") + dest = var_to_reg[var_name] + temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None) + if temp is None: + temp = next((r for r in register_pool if r != dest), None) + instructions.extend(compile_expr(expr, dest, temp, context)) + return instructions + + raise Exception(f"Unrecognized statement: {line}") + +def compile_function(func_name, lines): + """ + Compile a function given its name and body (as a list of lines). + Returns the assembly instructions for the function. + """ + # Create a fresh context for the function. + context = {"var_to_reg": {}} + instructions = [] + # Function label. + instructions.append(f"{func_name}:") + for line in lines: + line = line.strip() + if not line or line.startswith("//"): + continue + stmt_instructions = compile_statement(line, context) + instructions.extend(stmt_instructions) + return instructions + +def compile_c_to_asm(c_code): + """ + Compile a simple C program (with functions) into assembly. + The program must contain functions defined as: + + int func_name() { + // statements + } + + The compiled output will start at the main function (if defined). + """ + lines = c_code.splitlines() + functions = {} + current_func = None + current_lines = [] + in_function = False + + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith("//"): + continue + + # Detect function start: "int funcName() {" + m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped) if m: - varname, fieldname, value = m.groups() - value = int(value) - v = self.allocate_var(varname) - offset = self.get_struct_field_offset(v.type, fieldname) - asm.append(f"ldr a, 0x{v.address:X}") - if offset != 0: - asm.append(f"ldw b, {offset}") - asm.append("add a, b") - asm.append(f"ldw c, {value}") - asm.append("stb c, a") - return asm + if in_function: + raise Exception("Nested functions not supported.") + current_func = m.group(1) + in_function = True + current_lines = [] + continue - # p.x = var + number; - m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) - if m: - varname, fieldname, srcvar, number = m.groups() - number = int(number) - v = self.allocate_var(varname) - offset = self.get_struct_field_offset(v.type, fieldname) - asm.append(f"ldr a, 0x{v.address:X}") - if offset != 0: - asm.append(f"ldw b, {offset}") - asm.append("add a, b") - v2 = self.allocate_var(srcvar) - asm.append(f"ldr c, 0x{v2.address:X}") - asm.append(f"ldw d, {number}") - asm.append("add c, d") - asm.append("stb c, a") - return asm + # Detect end of function: "}" + if stripped == "}": + if not in_function: + raise Exception("Unexpected '}'") + functions[current_func] = compile_function(current_func, current_lines) + in_function = False + current_func = None + current_lines = [] + continue - # p.x = srcvar; - m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line) - if m: - varname, fieldname, srcvar = m.groups() - v = self.allocate_var(varname) - offset = self.get_struct_field_offset(v.type, fieldname) - asm.append(f"ldr a, 0x{v.address:X}") - if offset != 0: - asm.append(f"ldw b, {offset}") - asm.append("add a, b") - v2 = self.allocate_var(srcvar) - asm.append(f"ldr c, 0x{v2.address:X}") - asm.append("stb c, a") - return asm + # Inside a function, add the line. + if in_function: + current_lines.append(stripped) + else: + # Outside any function; for simplicity, ignore global declarations. + continue - # x = p.x; - m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line) - if m: - dst, varname, fieldname = m.groups() - v = self.allocate_var(varname) - offset = self.get_struct_field_offset(v.type, fieldname) - vd = self.allocate_var(dst) - asm.append(f"ldr a, 0x{v.address:X}") - if offset != 0: - asm.append(f"ldw b, {offset}") - asm.append("add a, b") - asm.append("ldb c, a") - asm.append(f"str c, 0x{vd.address:X}") - return asm - - # print_int(p.x); - m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line) - if m: - varname, fieldname = m.groups() - v = self.allocate_var(varname) - offset = self.get_struct_field_offset(v.type, fieldname) - asm.append(f"ldr a, 0x{v.address:X}") - if offset != 0: - asm.append(f"ldw b, {offset}") - asm.append("add a, b") - asm.append("ldb a, a") - asm.append("int 0x01") - return asm - - # int arr[10]; - m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) - if m: - varname = m.group(1) - length = int(m.group(2)) - arr_var, start_addr = self.allocate_array(varname, length) - asm.append(f"ldw a, 0x{start_addr:X}") - asm.append(f"str a, 0x{arr_var.address:X}") - return asm - - # int x = number; - m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) - if m: - varname = m.group(1) - value = int(m.group(2)) - var = self.allocate_var(varname, "int") - asm.append(f"ldw a, {value}") - asm.append(f"str a, 0x{var.address:X}") - return asm - - # int y = x + number; - m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) - if m: - varname, var2, number = m.groups() - number = int(number) - v1 = self.allocate_var(varname, "int") - v2 = self.allocate_var(var2, "int") - asm.append(f"ldr a, 0x{v2.address:X}") - asm.append(f"ldw b, {number}") - asm.append("add a, b") - asm.append(f"str a, 0x{v1.address:X}") - return asm - - # char *msg = "Hello\n"; - m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line) - if m: - varname, string_val = m.groups() - v = self.allocate_var(varname, "char*") - code, start_addr = self.store_string(string_val) - asm.extend(code) - asm.append(f"ldw a, 0x{start_addr:X}") - asm.append(f"str a, 0x{v.address:X}") - return asm - - # var = number; - m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) - if m: - varname, value = m.groups() - value = int(value) - v = self.allocate_var(varname, "int") - asm.append(f"ldw a, {value}") - asm.append(f"str a, 0x{v.address:X}") - return asm - - # var = var2 + number; - m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) - if m: - varname, var2, number = m.groups() - number = int(number) - v1 = self.allocate_var(varname, "int") - v2 = self.allocate_var(var2, "int") - asm.append(f"ldr a, 0x{v2.address:X}") - asm.append(f"ldw b, {number}") - asm.append("add a, b") - asm.append(f"str a, 0x{v1.address:X}") - return asm - - # var[index] = number; - m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line) - if m: - arr, index, value = m.groups() - index = int(index) - value = int(value) - arr_var = self.allocate_var(arr) - asm.append(f"ldr a, 0x{arr_var.address:X}") - asm.append(f"ldw b, {index}") - asm.append("add a, b") - asm.append(f"ldw c, {value}") - asm.append("stb c, a") - return asm - - # x = arr[index]; - m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) - if m: - varname, arr, index = m.groups() - index = int(index) - v = self.allocate_var(varname, "int") - arr_var = self.allocate_var(arr) - asm.append(f"ldr a, 0x{arr_var.address:X}") - asm.append(f"ldw b, {index}") - asm.append("add a, b") - asm.append("ldb d, a") - asm.append(f"str d, 0x{v.address:X}") - return asm - - # print_char(var); - m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line) - if m: - varname = m.group(1) - v = self.allocate_var(varname) - asm.append(f"ldr a, 0x{v.address:X}") - asm.append("int 0x00") - return asm - - # print_char(arr[index]); - m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line) - if m: - arr, index = m.groups() - index = int(index) - arr_var = self.allocate_var(arr) - asm.append(f"ldr a, 0x{arr_var.address:X}") - asm.append(f"ldw b, {index}") - asm.append("add a, b") - asm.append("ldb a, a") - asm.append("int 0x00") - return asm - - # print_int(var); - m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line) - if m: - varname = m.group(1) - v = self.allocate_var(varname) - asm.append(f"ldr a, 0x{v.address:X}") - asm.append("int 0x01") - return asm - - # print_string(var); - m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line) - if m: - varname = m.group(1) - v = self.allocate_var(varname, "char*") - asm.append("ldw d, 0") - asm.append(f"ldr b, 0x{v.address:X}") - asm.append("ldw c, 1") - asm.append("string_loop:") - asm.append("ldb a, b") - asm.append("beq a, d, string_end") - asm.append("int 0x00") - asm.append("add b, c") - asm.append("jmp string_loop") - asm.append("string_end:") - return asm - - # return number; - m = re.match(r'return\s+(\d+)\s*;', line) - if m: - asm.append("int 0xFF") - return asm - - # Unrecognized line or empty - return asm - - def compile_c(self, c_code): - # First, parse everything to detect structs and typedef done in preprocess - all_lines = c_code.split('\n') - # struct definitions might appear outside main - for cline in all_lines: - self.compile_line(cline) - - # Extract lines inside main - lines = [] - in_main = False - for cline in all_lines: - cline = cline.rstrip() - if 'int main(' in cline: - in_main = True - continue - if in_main: - if cline.startswith('}'): - in_main = False - break - lines.append(cline) - - asm = ["main:"] - for line in lines: - code_part, comment_part = self.extract_comment(line) - instructions = self.compile_line(code_part) - if instructions: - for i, instr in enumerate(instructions): - if i == 0 and comment_part: - asm.append(f" {instr} ; {comment_part}") - else: - asm.append(f" {instr}") - else: - if comment_part: - asm.append(f" ; {comment_part}") - - return asm + # Build the final assembly code. + # If "main" is defined, list it first. + asm_lines = [] + if "main" in functions: + asm_lines.extend(functions["main"]) + for fname, code in functions.items(): + if fname != "main": + asm_lines.extend(code) + else: + for fname, code in functions.items(): + asm_lines.extend(code) + return asm_lines +# Example usage. if __name__ == "__main__": - compiler = Compiler() - preprocessed_lines = compiler.preprocess("main.c") - c_code = "\n".join(preprocessed_lines) - asm_code = compiler.compile_c(c_code) - - with open("test.asm", "w") as out: - for line in asm_code: - out.write(line + "\n") + sample_c = """ + // sample C program with functions. + int main() { + int x = 5; + int y = 10; + x = x + y; + foo(); + + } + + int foo() { + int a = 3; + int b = 7; + a = a + b; + return a; + } + """ + asm_output = compile_c_to_asm(sample_c) + for inst in asm_output: + print(inst) diff --git a/program.py b/program.py index 66bb3e2..e7cb60c 100644 --- a/program.py +++ b/program.py @@ -1 +1 @@ -program = [1,0,87,5,0,1004,1,0,101,5,0,1005,1,0,108,5,0,1006,1,0,99,5,0,1007,1,0,111,5,0,1008,1,0,109,5,0,1009,1,0,101,5,0,1010,1,0,33,5,0,1011,1,0,10,5,0,1012,1,0,0,5,0,1013,1,0,1004,5,0,1024,1,3,0,6,1,1024,1,2,1,21,0,1,9,0,3,91,10,0,0,3,1,2,17,75,0,10,255,0] \ No newline at end of file +program = [1,0,5,1,1,10,3,0,1,13,15,0,14,0,0,1,0,3,1,1,7,3,0,1,14,0,0] \ No newline at end of file diff --git a/std.h b/std.h index 2bc2643..c1db181 100644 --- a/std.h +++ b/std.h @@ -1,6 +1,6 @@ #define true 1 #define false 0 -#define TEST 256 + struct vec2 { int x; diff --git a/test.asm b/test.asm index 3479949..deb9f81 100644 --- a/test.asm +++ b/test.asm @@ -1,34 +1,11 @@ main: - ldw a, 87 - str a, 0x3EC - ldw a, 101 - str a, 0x3ED - ldw a, 108 - str a, 0x3EE - ldw a, 99 - str a, 0x3EF - ldw a, 111 - str a, 0x3F0 - ldw a, 109 - str a, 0x3F1 - ldw a, 101 - str a, 0x3F2 - ldw a, 33 - str a, 0x3F3 - ldw a, 10 - str a, 0x3F4 - ldw a, 0 - str a, 0x3F5 - ldw a, 0x3EC - str a, 0x400 - ldw d, 0 - ldr b, 0x400 - ldw c, 1 - string_loop: - ldb a, b - beq a, d, string_end - int 0x00 - add b, c - jmp string_loop - string_end: - int 0xFF + ldw a, 5 + ldw b, 10 + add a, b + jsr foo + ret +foo: + ldw a, 3 + ldw b, 7 + add a, b + retZ \ No newline at end of file diff --git a/tests/c-test02.py b/tests/c-test02.py new file mode 100644 index 0000000..70e98b5 --- /dev/null +++ b/tests/c-test02.py @@ -0,0 +1,548 @@ +import re +import os +from termcolor import colored + + +class Variable: + def __init__(self, name, address, var_type="int"): + self.name = name + self.address = address + self.type = var_type + +class Compiler: + def __init__(self): + # 1024 bytes total: 0x000 to 0x3FF + self.data_ptr = 0x400 + self.variables = {} + self.struct_definitions = {} + self.in_struct_def = False + self.current_struct_name = None + self.current_struct_fields = [] + self.defines = {} # For #define macros + self.typedefs = {} # For typedef + self.label_counter = 0 + self.block_stack = [] # For if/while blocks + + def new_label(self, prefix): + lbl = f"{prefix}{self.label_counter}" + self.label_counter += 1 + return lbl + + def preprocess(self, filename): + lines = self._read_file_recursive(filename) + processed_lines = self._apply_defines(lines) + return processed_lines + + def _read_file_recursive(self, filename, included_files=None): + if included_files is None: + included_files = set() + + if filename in included_files: + # Prevent infinite recursion on includes + return [] + + included_files.add(filename) + + result_lines = [] + try: + with open(filename, "r") as f: + for line in f: + line_stripped = line.strip() + + # #include "file" + inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped) + if inc_match: + inc_file = inc_match.group(1) + included_content = self._read_file_recursive(inc_file, included_files) + result_lines.extend(included_content) + continue + + # #define KEY VALUE + def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped) + if def_match: + key = def_match.group(1) + value = def_match.group(2) + self.defines[key] = value + continue + + # typedef oldtype newtype; + tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped) + if tmatch: + oldt = tmatch.group(1) + newt = tmatch.group(2) + # Resolve oldt if it's also a typedef + oldt = self.apply_typedef(oldt) + self.typedefs[newt] = oldt + continue + + result_lines.append(line) + except FileNotFoundError as e: + print(colored(f"{filename}:0: error: {e}", "red")) + + + return result_lines + + def _apply_defines(self, lines): + + token_pattern = re.compile(r'([A-Za-z0-9_]+)') + + processed = [] + for line in lines: + parts = token_pattern.split(line) + # parts: tokens and separators + for i, part in enumerate(parts): + if part in self.defines: + print(f"Replaced {part} with {self.defines[part]}") + part = self.defines[part] + parts[i] = part + new_line = "".join(parts) + processed.append(new_line) + return processed + + def apply_typedef(self, t): + if t in self.typedefs: + return self.typedefs[t] + return t + + def allocate_bytes(self, count): + start_addr = self.data_ptr - (count - 1) + if start_addr < 0x000: + raise Exception("Out of memory!") + self.data_ptr = start_addr - 1 + return start_addr + + def allocate_var(self, name, var_type="int"): + var_type = self.apply_typedef(var_type) + if name in self.variables: + return self.variables[name] + + if var_type.startswith("struct:"): + sname = var_type.split(":")[1] + fields = self.struct_definitions[sname] + length = len(fields) # each 1 byte + start_addr = self.allocate_bytes(length) + var = Variable(name, start_addr, var_type) + self.variables[name] = var + return var + else: + start_addr = self.allocate_bytes(1) + var = Variable(name, start_addr, var_type) + self.variables[name] = var + return var + + def allocate_array(self, name, length, var_type="int"): + var_type = self.apply_typedef(var_type) + arr_start = self.allocate_bytes(length) + var_addr = self.allocate_bytes(1) + var = Variable(name, var_addr, "array") + self.variables[name] = var + return var, arr_start + + def store_string(self, string_value): + string_value = string_value.replace('\\n', '\n') + length = len(string_value) + 1 + start_addr = self.allocate_bytes(length) + asm = [] + current_addr = start_addr + for ch in string_value: + ascii_val = ord(ch) + asm.append(f"ldw a, {ascii_val}") + asm.append(f"str a, 0x{current_addr:X}") + current_addr += 1 + asm.append("ldw a, 0") + asm.append(f"str a, 0x{current_addr:X}") + return asm, start_addr + + def get_struct_field_offset(self, struct_type, field_name): + sname = struct_type.split(":")[1] + fields = self.struct_definitions[sname] + for i, (fname, ftype) in enumerate(fields): + if fname == field_name: + return i + raise Exception(f"Field {field_name} not found in {struct_type}") + + def parse_condition(self, cond_str): + # cond_str like "a == b" or "a != b" + m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip()) + if not m: + raise Exception("Unsupported condition: " + cond_str) + var1, op, var2 = m.groups() + return var1, op, var2 + + def compile_condition(self, var1, op, var2): + asm = [] + v1 = self.allocate_var(var1) + v2 = self.allocate_var(var2) + asm.append(f"ldr a, 0x{v1.address:X}") + asm.append(f"ldr b, 0x{v2.address:X}") + # a = a - b + asm.append("sub a, b") + return asm, op + + def extract_comment(self, line): + comment_index = line.find('//') + if comment_index != -1: + code_part = line[:comment_index] + comment_part = line[comment_index+2:].strip() + return code_part, comment_part + return line, None + + def compile_line(self, code_part): + line = code_part.strip() + asm = [] + + if self.in_struct_def: + if line.startswith("};"): + self.struct_definitions[self.current_struct_name] = self.current_struct_fields + self.in_struct_def = False + self.current_struct_name = None + self.current_struct_fields = [] + return asm + mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line) + if mfield: + fname = mfield.group(1) + ftype = "int" + self.current_struct_fields.append((fname, ftype)) + return asm + + # struct definition start + msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line) + if msd: + self.in_struct_def = True + self.current_struct_name = msd.group(1) + self.current_struct_fields = [] + return asm + + # struct var declaration + msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line) + if msv: + sname, varname = msv.groups() + var_type = "struct:" + sname + self.allocate_var(varname, var_type) + return asm + + # if statement + mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line) + if mif: + cond_str = mif.group(1) + var1, op, var2 = self.parse_condition(cond_str) + end_label = self.new_label("endif") + cond_code, cmp_op = self.compile_condition(var1, op, var2) + asm.extend(cond_code) + # if '==': jump if not zero a != 0 + # if '!=': jump if zero a == 0 + if cmp_op == '==': + asm.append("bne a, 0, " + end_label) + else: + asm.append("beq a, 0, " + end_label) + self.block_stack.append(('if', end_label)) + return asm + + # while statement + mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line) + if mwhile: + cond_str = mwhile.group(1) + var1, op, var2 = self.parse_condition(cond_str) + start_label = self.new_label("whilestart") + end_label = self.new_label("whileend") + asm.append(start_label + ":") + cond_code, cmp_op = self.compile_condition(var1, op, var2) + asm.extend(cond_code) + if cmp_op == '==': + asm.append("bne a, 0, " + end_label) + else: + asm.append("beq a, 0, " + end_label) + self.block_stack.append(('while', start_label, end_label)) + return asm + + # end of block + if line == "}": + if not self.block_stack: + return asm + blk = self.block_stack.pop() + if blk[0] == 'if': + end_label = blk[1] + asm.append(end_label + ":") + elif blk[0] == 'while': + start_label = blk[1] + end_label = blk[2] + # jump back to start + asm.append(f"jmp {start_label}") + asm.append(end_label + ":") + return asm + + # p.x = number; + m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) + if m: + varname, fieldname, value = m.groups() + value = int(value) + v = self.allocate_var(varname) + offset = self.get_struct_field_offset(v.type, fieldname) + asm.append(f"ldr a, 0x{v.address:X}") + if offset != 0: + asm.append(f"ldw b, {offset}") + asm.append("add a, b") + asm.append(f"ldw c, {value}") + asm.append("stb c, a") + return asm + + # p.x = var + number; + m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) + if m: + varname, fieldname, srcvar, number = m.groups() + number = int(number) + v = self.allocate_var(varname) + offset = self.get_struct_field_offset(v.type, fieldname) + asm.append(f"ldr a, 0x{v.address:X}") + if offset != 0: + asm.append(f"ldw b, {offset}") + asm.append("add a, b") + v2 = self.allocate_var(srcvar) + asm.append(f"ldr c, 0x{v2.address:X}") + asm.append(f"ldw d, {number}") + asm.append("add c, d") + asm.append("stb c, a") + return asm + + # p.x = srcvar; + m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line) + if m: + varname, fieldname, srcvar = m.groups() + v = self.allocate_var(varname) + offset = self.get_struct_field_offset(v.type, fieldname) + asm.append(f"ldr a, 0x{v.address:X}") + if offset != 0: + asm.append(f"ldw b, {offset}") + asm.append("add a, b") + v2 = self.allocate_var(srcvar) + asm.append(f"ldr c, 0x{v2.address:X}") + asm.append("stb c, a") + return asm + + # x = p.x; + m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line) + if m: + dst, varname, fieldname = m.groups() + v = self.allocate_var(varname) + offset = self.get_struct_field_offset(v.type, fieldname) + vd = self.allocate_var(dst) + asm.append(f"ldr a, 0x{v.address:X}") + if offset != 0: + asm.append(f"ldw b, {offset}") + asm.append("add a, b") + asm.append("ldb c, a") + asm.append(f"str c, 0x{vd.address:X}") + return asm + + # print_int(p.x); + m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line) + if m: + varname, fieldname = m.groups() + v = self.allocate_var(varname) + offset = self.get_struct_field_offset(v.type, fieldname) + asm.append(f"ldr a, 0x{v.address:X}") + if offset != 0: + asm.append(f"ldw b, {offset}") + asm.append("add a, b") + asm.append("ldb a, a") + asm.append("int 0x01") + return asm + + # int arr[10]; + m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) + if m: + varname = m.group(1) + length = int(m.group(2)) + arr_var, start_addr = self.allocate_array(varname, length) + asm.append(f"ldw a, 0x{start_addr:X}") + asm.append(f"str a, 0x{arr_var.address:X}") + return asm + + # int x = number; + m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) + if m: + varname = m.group(1) + value = int(m.group(2)) + var = self.allocate_var(varname, "int") + asm.append(f"ldw a, {value}") + asm.append(f"str a, 0x{var.address:X}") + return asm + + # int y = x + number; + m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) + if m: + varname, var2, number = m.groups() + number = int(number) + v1 = self.allocate_var(varname, "int") + v2 = self.allocate_var(var2, "int") + asm.append(f"ldr a, 0x{v2.address:X}") + asm.append(f"ldw b, {number}") + asm.append("add a, b") + asm.append(f"str a, 0x{v1.address:X}") + return asm + + # char *msg = "Hello\n"; + m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line) + if m: + varname, string_val = m.groups() + v = self.allocate_var(varname, "char*") + code, start_addr = self.store_string(string_val) + asm.extend(code) + asm.append(f"ldw a, 0x{start_addr:X}") + asm.append(f"str a, 0x{v.address:X}") + return asm + + # var = number; + m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) + if m: + varname, value = m.groups() + value = int(value) + v = self.allocate_var(varname, "int") + asm.append(f"ldw a, {value}") + asm.append(f"str a, 0x{v.address:X}") + return asm + + # var = var2 + number; + m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) + if m: + varname, var2, number = m.groups() + number = int(number) + v1 = self.allocate_var(varname, "int") + v2 = self.allocate_var(var2, "int") + asm.append(f"ldr a, 0x{v2.address:X}") + asm.append(f"ldw b, {number}") + asm.append("add a, b") + asm.append(f"str a, 0x{v1.address:X}") + return asm + + # var[index] = number; + m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line) + if m: + arr, index, value = m.groups() + index = int(index) + value = int(value) + arr_var = self.allocate_var(arr) + asm.append(f"ldr a, 0x{arr_var.address:X}") + asm.append(f"ldw b, {index}") + asm.append("add a, b") + asm.append(f"ldw c, {value}") + asm.append("stb c, a") + return asm + + # x = arr[index]; + m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) + if m: + varname, arr, index = m.groups() + index = int(index) + v = self.allocate_var(varname, "int") + arr_var = self.allocate_var(arr) + asm.append(f"ldr a, 0x{arr_var.address:X}") + asm.append(f"ldw b, {index}") + asm.append("add a, b") + asm.append("ldb d, a") + asm.append(f"str d, 0x{v.address:X}") + return asm + + # print_char(var); + m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line) + if m: + varname = m.group(1) + v = self.allocate_var(varname) + asm.append(f"ldr a, 0x{v.address:X}") + asm.append("int 0x00") + return asm + + # print_char(arr[index]); + m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line) + if m: + arr, index = m.groups() + index = int(index) + arr_var = self.allocate_var(arr) + asm.append(f"ldr a, 0x{arr_var.address:X}") + asm.append(f"ldw b, {index}") + asm.append("add a, b") + asm.append("ldb a, a") + asm.append("int 0x00") + return asm + + # print_int(var); + m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line) + if m: + varname = m.group(1) + v = self.allocate_var(varname) + asm.append(f"ldr a, 0x{v.address:X}") + asm.append("int 0x01") + return asm + + # print_string(var); + m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line) + if m: + varname = m.group(1) + v = self.allocate_var(varname, "char*") + asm.append("ldw d, 0") + asm.append(f"ldr b, 0x{v.address:X}") + asm.append("ldw c, 1") + asm.append("string_loop:") + asm.append("ldb a, b") + asm.append("beq a, d, string_end") + asm.append("int 0x00") + asm.append("add b, c") + asm.append("jmp string_loop") + asm.append("string_end:") + return asm + + # return number; + m = re.match(r'return\s+(\d+)\s*;', line) + if m: + asm.append("int 0xFF") + return asm + + # Unrecognized line or empty + return asm + + def compile_c(self, c_code): + # First, parse everything to detect structs and typedef done in preprocess + all_lines = c_code.split('\n') + # struct definitions might appear outside main + for cline in all_lines: + self.compile_line(cline) + + # Extract lines inside main + lines = [] + in_main = False + for cline in all_lines: + cline = cline.rstrip() + if 'int main(' in cline: + in_main = True + continue + if in_main: + if cline.startswith('}'): + in_main = False + break + lines.append(cline) + + asm = ["main:"] + for line in lines: + code_part, comment_part = self.extract_comment(line) + instructions = self.compile_line(code_part) + if instructions: + for i, instr in enumerate(instructions): + if i == 0 and comment_part: + asm.append(f" {instr} ; {comment_part}") + else: + asm.append(f" {instr}") + else: + if comment_part: + asm.append(f" ; {comment_part}") + + return asm + +if __name__ == "__main__": + compiler = Compiler() + preprocessed_lines = compiler.preprocess("main.c") + c_code = "\n".join(preprocessed_lines) + asm_code = compiler.compile_c(c_code) + + with open("test.asm", "w") as out: + for line in asm_code: + out.write(line + "\n")