import re import os from termcolor import colored class Variable: def __init__(self, name, address, var_type="int"): self.name = name self.address = address self.type = var_type class Compiler: def __init__(self): # 1024 bytes total: 0x000 to 0x3FF self.data_ptr = 0x400 self.variables = {} self.struct_definitions = {} self.in_struct_def = False self.current_struct_name = None self.current_struct_fields = [] self.defines = {} # For #define macros self.typedefs = {} # For typedef self.label_counter = 0 self.block_stack = [] # For if/while blocks def new_label(self, prefix): lbl = f"{prefix}{self.label_counter}" self.label_counter += 1 return lbl def preprocess(self, filename): lines = self._read_file_recursive(filename) processed_lines = self._apply_defines(lines) return processed_lines def _read_file_recursive(self, filename, included_files=None): if included_files is None: included_files = set() if filename in included_files: # Prevent infinite recursion on includes return [] included_files.add(filename) result_lines = [] try: with open(filename, "r") as f: for line in f: line_stripped = line.strip() # #include "file" inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped) if inc_match: inc_file = inc_match.group(1) included_content = self._read_file_recursive(inc_file, included_files) result_lines.extend(included_content) continue # #define KEY VALUE def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped) if def_match: key = def_match.group(1) value = def_match.group(2) self.defines[key] = value continue # typedef oldtype newtype; tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped) if tmatch: oldt = tmatch.group(1) newt = tmatch.group(2) # Resolve oldt if it's also a typedef oldt = self.apply_typedef(oldt) self.typedefs[newt] = oldt continue result_lines.append(line) except FileNotFoundError as e: print(colored(f"{filename}:0: error: {e}", "red")) return result_lines def _apply_defines(self, lines): token_pattern = re.compile(r'([A-Za-z0-9_]+)') processed = [] for line in lines: parts = token_pattern.split(line) # parts: tokens and separators for i, part in enumerate(parts): if part in self.defines: print(f"Replaced {part} with {self.defines[part]}") part = self.defines[part] parts[i] = part new_line = "".join(parts) processed.append(new_line) return processed def apply_typedef(self, t): if t in self.typedefs: return self.typedefs[t] return t def allocate_bytes(self, count): start_addr = self.data_ptr - (count - 1) if start_addr < 0x000: raise Exception("Out of memory!") self.data_ptr = start_addr - 1 return start_addr def allocate_var(self, name, var_type="int"): var_type = self.apply_typedef(var_type) if name in self.variables: return self.variables[name] if var_type.startswith("struct:"): sname = var_type.split(":")[1] fields = self.struct_definitions[sname] length = len(fields) # each 1 byte start_addr = self.allocate_bytes(length) var = Variable(name, start_addr, var_type) self.variables[name] = var return var else: start_addr = self.allocate_bytes(1) var = Variable(name, start_addr, var_type) self.variables[name] = var return var def allocate_array(self, name, length, var_type="int"): var_type = self.apply_typedef(var_type) arr_start = self.allocate_bytes(length) var_addr = self.allocate_bytes(1) var = Variable(name, var_addr, "array") self.variables[name] = var return var, arr_start def store_string(self, string_value): string_value = string_value.replace('\\n', '\n') length = len(string_value) + 1 start_addr = self.allocate_bytes(length) asm = [] current_addr = start_addr for ch in string_value: ascii_val = ord(ch) asm.append(f"ldw a, {ascii_val}") asm.append(f"str a, 0x{current_addr:X}") current_addr += 1 asm.append("ldw a, 0") asm.append(f"str a, 0x{current_addr:X}") return asm, start_addr def get_struct_field_offset(self, struct_type, field_name): sname = struct_type.split(":")[1] fields = self.struct_definitions[sname] for i, (fname, ftype) in enumerate(fields): if fname == field_name: return i raise Exception(f"Field {field_name} not found in {struct_type}") def parse_condition(self, cond_str): # cond_str like "a == b" or "a != b" m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip()) if not m: raise Exception("Unsupported condition: " + cond_str) var1, op, var2 = m.groups() return var1, op, var2 def compile_condition(self, var1, op, var2): asm = [] v1 = self.allocate_var(var1) v2 = self.allocate_var(var2) asm.append(f"ldr a, 0x{v1.address:X}") asm.append(f"ldr b, 0x{v2.address:X}") # a = a - b asm.append("sub a, b") return asm, op def extract_comment(self, line): comment_index = line.find('//') if comment_index != -1: code_part = line[:comment_index] comment_part = line[comment_index+2:].strip() return code_part, comment_part return line, None def compile_line(self, code_part): line = code_part.strip() asm = [] if self.in_struct_def: if line.startswith("};"): self.struct_definitions[self.current_struct_name] = self.current_struct_fields self.in_struct_def = False self.current_struct_name = None self.current_struct_fields = [] return asm mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line) if mfield: fname = mfield.group(1) ftype = "int" self.current_struct_fields.append((fname, ftype)) return asm # struct definition start msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line) if msd: self.in_struct_def = True self.current_struct_name = msd.group(1) self.current_struct_fields = [] return asm # struct var declaration msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line) if msv: sname, varname = msv.groups() var_type = "struct:" + sname self.allocate_var(varname, var_type) return asm # if statement mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line) if mif: cond_str = mif.group(1) var1, op, var2 = self.parse_condition(cond_str) end_label = self.new_label("endif") cond_code, cmp_op = self.compile_condition(var1, op, var2) asm.extend(cond_code) # if '==': jump if not zero a != 0 # if '!=': jump if zero a == 0 if cmp_op == '==': asm.append("bne a, 0, " + end_label) else: asm.append("beq a, 0, " + end_label) self.block_stack.append(('if', end_label)) return asm # while statement mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line) if mwhile: cond_str = mwhile.group(1) var1, op, var2 = self.parse_condition(cond_str) start_label = self.new_label("whilestart") end_label = self.new_label("whileend") asm.append(start_label + ":") cond_code, cmp_op = self.compile_condition(var1, op, var2) asm.extend(cond_code) if cmp_op == '==': asm.append("bne a, 0, " + end_label) else: asm.append("beq a, 0, " + end_label) self.block_stack.append(('while', start_label, end_label)) return asm # end of block if line == "}": if not self.block_stack: return asm blk = self.block_stack.pop() if blk[0] == 'if': end_label = blk[1] asm.append(end_label + ":") elif blk[0] == 'while': start_label = blk[1] end_label = blk[2] # jump back to start asm.append(f"jmp {start_label}") asm.append(end_label + ":") return asm # p.x = number; m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) if m: varname, fieldname, value = m.groups() value = int(value) v = self.allocate_var(varname) offset = self.get_struct_field_offset(v.type, fieldname) asm.append(f"ldr a, 0x{v.address:X}") if offset != 0: asm.append(f"ldw b, {offset}") asm.append("add a, b") asm.append(f"ldw c, {value}") asm.append("stb c, a") return asm # p.x = var + number; m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) if m: varname, fieldname, srcvar, number = m.groups() number = int(number) v = self.allocate_var(varname) offset = self.get_struct_field_offset(v.type, fieldname) asm.append(f"ldr a, 0x{v.address:X}") if offset != 0: asm.append(f"ldw b, {offset}") asm.append("add a, b") v2 = self.allocate_var(srcvar) asm.append(f"ldr c, 0x{v2.address:X}") asm.append(f"ldw d, {number}") asm.append("add c, d") asm.append("stb c, a") return asm # p.x = srcvar; m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line) if m: varname, fieldname, srcvar = m.groups() v = self.allocate_var(varname) offset = self.get_struct_field_offset(v.type, fieldname) asm.append(f"ldr a, 0x{v.address:X}") if offset != 0: asm.append(f"ldw b, {offset}") asm.append("add a, b") v2 = self.allocate_var(srcvar) asm.append(f"ldr c, 0x{v2.address:X}") asm.append("stb c, a") return asm # x = p.x; m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line) if m: dst, varname, fieldname = m.groups() v = self.allocate_var(varname) offset = self.get_struct_field_offset(v.type, fieldname) vd = self.allocate_var(dst) asm.append(f"ldr a, 0x{v.address:X}") if offset != 0: asm.append(f"ldw b, {offset}") asm.append("add a, b") asm.append("ldb c, a") asm.append(f"str c, 0x{vd.address:X}") return asm # print_int(p.x); m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line) if m: varname, fieldname = m.groups() v = self.allocate_var(varname) offset = self.get_struct_field_offset(v.type, fieldname) asm.append(f"ldr a, 0x{v.address:X}") if offset != 0: asm.append(f"ldw b, {offset}") asm.append("add a, b") asm.append("ldb a, a") asm.append("int 0x01") return asm # int arr[10]; m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) if m: varname = m.group(1) length = int(m.group(2)) arr_var, start_addr = self.allocate_array(varname, length) asm.append(f"ldw a, 0x{start_addr:X}") asm.append(f"str a, 0x{arr_var.address:X}") return asm # int x = number; m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) if m: varname = m.group(1) value = int(m.group(2)) var = self.allocate_var(varname, "int") asm.append(f"ldw a, {value}") asm.append(f"str a, 0x{var.address:X}") return asm # int y = x + number; m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) if m: varname, var2, number = m.groups() number = int(number) v1 = self.allocate_var(varname, "int") v2 = self.allocate_var(var2, "int") asm.append(f"ldr a, 0x{v2.address:X}") asm.append(f"ldw b, {number}") asm.append("add a, b") asm.append(f"str a, 0x{v1.address:X}") return asm # char *msg = "Hello\n"; m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line) if m: varname, string_val = m.groups() v = self.allocate_var(varname, "char*") code, start_addr = self.store_string(string_val) asm.extend(code) asm.append(f"ldw a, 0x{start_addr:X}") asm.append(f"str a, 0x{v.address:X}") return asm # var = number; m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line) if m: varname, value = m.groups() value = int(value) v = self.allocate_var(varname, "int") asm.append(f"ldw a, {value}") asm.append(f"str a, 0x{v.address:X}") return asm # var = var2 + number; m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line) if m: varname, var2, number = m.groups() number = int(number) v1 = self.allocate_var(varname, "int") v2 = self.allocate_var(var2, "int") asm.append(f"ldr a, 0x{v2.address:X}") asm.append(f"ldw b, {number}") asm.append("add a, b") asm.append(f"str a, 0x{v1.address:X}") return asm # var[index] = number; m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line) if m: arr, index, value = m.groups() index = int(index) value = int(value) arr_var = self.allocate_var(arr) asm.append(f"ldr a, 0x{arr_var.address:X}") asm.append(f"ldw b, {index}") asm.append("add a, b") asm.append(f"ldw c, {value}") asm.append("stb c, a") return asm # x = arr[index]; m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line) if m: varname, arr, index = m.groups() index = int(index) v = self.allocate_var(varname, "int") arr_var = self.allocate_var(arr) asm.append(f"ldr a, 0x{arr_var.address:X}") asm.append(f"ldw b, {index}") asm.append("add a, b") asm.append("ldb d, a") asm.append(f"str d, 0x{v.address:X}") return asm # print_char(var); m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line) if m: varname = m.group(1) v = self.allocate_var(varname) asm.append(f"ldr a, 0x{v.address:X}") asm.append("int 0x00") return asm # print_char(arr[index]); m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line) if m: arr, index = m.groups() index = int(index) arr_var = self.allocate_var(arr) asm.append(f"ldr a, 0x{arr_var.address:X}") asm.append(f"ldw b, {index}") asm.append("add a, b") asm.append("ldb a, a") asm.append("int 0x00") return asm # print_int(var); m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line) if m: varname = m.group(1) v = self.allocate_var(varname) asm.append(f"ldr a, 0x{v.address:X}") asm.append("int 0x01") return asm # print_string(var); m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line) if m: varname = m.group(1) v = self.allocate_var(varname, "char*") asm.append("ldw d, 0") asm.append(f"ldr b, 0x{v.address:X}") asm.append("ldw c, 1") asm.append("string_loop:") asm.append("ldb a, b") asm.append("beq a, d, string_end") asm.append("int 0x00") asm.append("add b, c") asm.append("jmp string_loop") asm.append("string_end:") return asm # return number; m = re.match(r'return\s+(\d+)\s*;', line) if m: asm.append("int 0xFF") return asm # Unrecognized line or empty return asm def compile_c(self, c_code): # First, parse everything to detect structs and typedef done in preprocess all_lines = c_code.split('\n') # struct definitions might appear outside main for cline in all_lines: self.compile_line(cline) # Extract lines inside main lines = [] in_main = False for cline in all_lines: cline = cline.rstrip() if 'int main(' in cline: in_main = True continue if in_main: if cline.startswith('}'): in_main = False break lines.append(cline) asm = ["main:"] for line in lines: code_part, comment_part = self.extract_comment(line) instructions = self.compile_line(code_part) if instructions: for i, instr in enumerate(instructions): if i == 0 and comment_part: asm.append(f" {instr} ; {comment_part}") else: asm.append(f" {instr}") else: if comment_part: asm.append(f" ; {comment_part}") return asm if __name__ == "__main__": compiler = Compiler() preprocessed_lines = compiler.preprocess("main.c") c_code = "\n".join(preprocessed_lines) asm_code = compiler.compile_c(c_code) with open("test.asm", "w") as out: for line in asm_code: out.write(line + "\n")