Python-Cpu-Emulator/c-to-asm.py
OusmBlueNinja b88b8bffc9 Main
2024-12-23 23:17:07 -06:00

549 lines
19 KiB
Python

import re
import os
from termcolor import colored
class Variable:
def __init__(self, name, address, var_type="int"):
self.name = name
self.address = address
self.type = var_type
class Compiler:
def __init__(self):
# 1024 bytes total: 0x000 to 0x3FF
self.data_ptr = 0x400
self.variables = {}
self.struct_definitions = {}
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
self.defines = {} # For #define macros
self.typedefs = {} # For typedef
self.label_counter = 0
self.block_stack = [] # For if/while blocks
def new_label(self, prefix):
lbl = f"{prefix}{self.label_counter}"
self.label_counter += 1
return lbl
def preprocess(self, filename):
lines = self._read_file_recursive(filename)
processed_lines = self._apply_defines(lines)
return processed_lines
def _read_file_recursive(self, filename, included_files=None):
if included_files is None:
included_files = set()
if filename in included_files:
# Prevent infinite recursion on includes
return []
included_files.add(filename)
result_lines = []
try:
with open(filename, "r") as f:
for line in f:
line_stripped = line.strip()
# #include "file"
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
if inc_match:
inc_file = inc_match.group(1)
included_content = self._read_file_recursive(inc_file, included_files)
result_lines.extend(included_content)
continue
# #define KEY VALUE
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
if def_match:
key = def_match.group(1)
value = def_match.group(2)
self.defines[key] = value
continue
# typedef oldtype newtype;
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
if tmatch:
oldt = tmatch.group(1)
newt = tmatch.group(2)
# Resolve oldt if it's also a typedef
oldt = self.apply_typedef(oldt)
self.typedefs[newt] = oldt
continue
result_lines.append(line)
except FileNotFoundError as e:
print(colored(f"{filename}:0: error: {e}", "red"))
return result_lines
def _apply_defines(self, lines):
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
processed = []
for line in lines:
parts = token_pattern.split(line)
# parts: tokens and separators
for i, part in enumerate(parts):
if part in self.defines:
print(f"Replaced {part} with {self.defines[part]}")
part = self.defines[part]
parts[i] = part
new_line = "".join(parts)
processed.append(new_line)
return processed
def apply_typedef(self, t):
if t in self.typedefs:
return self.typedefs[t]
return t
def allocate_bytes(self, count):
start_addr = self.data_ptr - (count - 1)
if start_addr < 0x000:
raise Exception("Out of memory!")
self.data_ptr = start_addr - 1
return start_addr
def allocate_var(self, name, var_type="int"):
var_type = self.apply_typedef(var_type)
if name in self.variables:
return self.variables[name]
if var_type.startswith("struct:"):
sname = var_type.split(":")[1]
fields = self.struct_definitions[sname]
length = len(fields) # each 1 byte
start_addr = self.allocate_bytes(length)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
else:
start_addr = self.allocate_bytes(1)
var = Variable(name, start_addr, var_type)
self.variables[name] = var
return var
def allocate_array(self, name, length, var_type="int"):
var_type = self.apply_typedef(var_type)
arr_start = self.allocate_bytes(length)
var_addr = self.allocate_bytes(1)
var = Variable(name, var_addr, "array")
self.variables[name] = var
return var, arr_start
def store_string(self, string_value):
string_value = string_value.replace('\\n', '\n')
length = len(string_value) + 1
start_addr = self.allocate_bytes(length)
asm = []
current_addr = start_addr
for ch in string_value:
ascii_val = ord(ch)
asm.append(f"ldw a, {ascii_val}")
asm.append(f"str a, 0x{current_addr:X}")
current_addr += 1
asm.append("ldw a, 0")
asm.append(f"str a, 0x{current_addr:X}")
return asm, start_addr
def get_struct_field_offset(self, struct_type, field_name):
sname = struct_type.split(":")[1]
fields = self.struct_definitions[sname]
for i, (fname, ftype) in enumerate(fields):
if fname == field_name:
return i
raise Exception(f"Field {field_name} not found in {struct_type}")
def parse_condition(self, cond_str):
# cond_str like "a == b" or "a != b"
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
if not m:
raise Exception("Unsupported condition: " + cond_str)
var1, op, var2 = m.groups()
return var1, op, var2
def compile_condition(self, var1, op, var2):
asm = []
v1 = self.allocate_var(var1)
v2 = self.allocate_var(var2)
asm.append(f"ldr a, 0x{v1.address:X}")
asm.append(f"ldr b, 0x{v2.address:X}")
# a = a - b
asm.append("sub a, b")
return asm, op
def extract_comment(self, line):
comment_index = line.find('//')
if comment_index != -1:
code_part = line[:comment_index]
comment_part = line[comment_index+2:].strip()
return code_part, comment_part
return line, None
def compile_line(self, code_part):
line = code_part.strip()
asm = []
if self.in_struct_def:
if line.startswith("};"):
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
self.in_struct_def = False
self.current_struct_name = None
self.current_struct_fields = []
return asm
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
if mfield:
fname = mfield.group(1)
ftype = "int"
self.current_struct_fields.append((fname, ftype))
return asm
# struct definition start
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
if msd:
self.in_struct_def = True
self.current_struct_name = msd.group(1)
self.current_struct_fields = []
return asm
# struct var declaration
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
if msv:
sname, varname = msv.groups()
var_type = "struct:" + sname
self.allocate_var(varname, var_type)
return asm
# if statement
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
if mif:
cond_str = mif.group(1)
var1, op, var2 = self.parse_condition(cond_str)
end_label = self.new_label("endif")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
# if '==': jump if not zero a != 0
# if '!=': jump if zero a == 0
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('if', end_label))
return asm
# while statement
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
if mwhile:
cond_str = mwhile.group(1)
var1, op, var2 = self.parse_condition(cond_str)
start_label = self.new_label("whilestart")
end_label = self.new_label("whileend")
asm.append(start_label + ":")
cond_code, cmp_op = self.compile_condition(var1, op, var2)
asm.extend(cond_code)
if cmp_op == '==':
asm.append("bne a, 0, " + end_label)
else:
asm.append("beq a, 0, " + end_label)
self.block_stack.append(('while', start_label, end_label))
return asm
# end of block
if line == "}":
if not self.block_stack:
return asm
blk = self.block_stack.pop()
if blk[0] == 'if':
end_label = blk[1]
asm.append(end_label + ":")
elif blk[0] == 'while':
start_label = blk[1]
end_label = blk[2]
# jump back to start
asm.append(f"jmp {start_label}")
asm.append(end_label + ":")
return asm
# p.x = number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, fieldname, value = m.groups()
value = int(value)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# p.x = var + number;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, fieldname, srcvar, number = m.groups()
number = int(number)
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append(f"ldw d, {number}")
asm.append("add c, d")
asm.append("stb c, a")
return asm
# p.x = srcvar;
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
if m:
varname, fieldname, srcvar = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
v2 = self.allocate_var(srcvar)
asm.append(f"ldr c, 0x{v2.address:X}")
asm.append("stb c, a")
return asm
# x = p.x;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
if m:
dst, varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
vd = self.allocate_var(dst)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb c, a")
asm.append(f"str c, 0x{vd.address:X}")
return asm
# print_int(p.x);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname, fieldname = m.groups()
v = self.allocate_var(varname)
offset = self.get_struct_field_offset(v.type, fieldname)
asm.append(f"ldr a, 0x{v.address:X}")
if offset != 0:
asm.append(f"ldw b, {offset}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x01")
return asm
# int arr[10];
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname = m.group(1)
length = int(m.group(2))
arr_var, start_addr = self.allocate_array(varname, length)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{arr_var.address:X}")
return asm
# int x = number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname = m.group(1)
value = int(m.group(2))
var = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{var.address:X}")
return asm
# int y = x + number;
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# char *msg = "Hello\n";
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
if m:
varname, string_val = m.groups()
v = self.allocate_var(varname, "char*")
code, start_addr = self.store_string(string_val)
asm.extend(code)
asm.append(f"ldw a, 0x{start_addr:X}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
if m:
varname, value = m.groups()
value = int(value)
v = self.allocate_var(varname, "int")
asm.append(f"ldw a, {value}")
asm.append(f"str a, 0x{v.address:X}")
return asm
# var = var2 + number;
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
if m:
varname, var2, number = m.groups()
number = int(number)
v1 = self.allocate_var(varname, "int")
v2 = self.allocate_var(var2, "int")
asm.append(f"ldr a, 0x{v2.address:X}")
asm.append(f"ldw b, {number}")
asm.append("add a, b")
asm.append(f"str a, 0x{v1.address:X}")
return asm
# var[index] = number;
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
if m:
arr, index, value = m.groups()
index = int(index)
value = int(value)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append(f"ldw c, {value}")
asm.append("stb c, a")
return asm
# x = arr[index];
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
if m:
varname, arr, index = m.groups()
index = int(index)
v = self.allocate_var(varname, "int")
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb d, a")
asm.append(f"str d, 0x{v.address:X}")
return asm
# print_char(var);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x00")
return asm
# print_char(arr[index]);
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
if m:
arr, index = m.groups()
index = int(index)
arr_var = self.allocate_var(arr)
asm.append(f"ldr a, 0x{arr_var.address:X}")
asm.append(f"ldw b, {index}")
asm.append("add a, b")
asm.append("ldb a, a")
asm.append("int 0x00")
return asm
# print_int(var);
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname)
asm.append(f"ldr a, 0x{v.address:X}")
asm.append("int 0x01")
return asm
# print_string(var);
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
if m:
varname = m.group(1)
v = self.allocate_var(varname, "char*")
asm.append("ldw d, 0")
asm.append(f"ldr b, 0x{v.address:X}")
asm.append("ldw c, 1")
asm.append("string_loop:")
asm.append("ldb a, b")
asm.append("beq a, d, string_end")
asm.append("int 0x00")
asm.append("add b, c")
asm.append("jmp string_loop")
asm.append("string_end:")
return asm
# return number;
m = re.match(r'return\s+(\d+)\s*;', line)
if m:
asm.append("int 0xFF")
return asm
# Unrecognized line or empty
return asm
def compile_c(self, c_code):
# First, parse everything to detect structs and typedef done in preprocess
all_lines = c_code.split('\n')
# struct definitions might appear outside main
for cline in all_lines:
self.compile_line(cline)
# Extract lines inside main
lines = []
in_main = False
for cline in all_lines:
cline = cline.rstrip()
if 'int main(' in cline:
in_main = True
continue
if in_main:
if cline.startswith('}'):
in_main = False
break
lines.append(cline)
asm = ["main:"]
for line in lines:
code_part, comment_part = self.extract_comment(line)
instructions = self.compile_line(code_part)
if instructions:
for i, instr in enumerate(instructions):
if i == 0 and comment_part:
asm.append(f" {instr} ; {comment_part}")
else:
asm.append(f" {instr}")
else:
if comment_part:
asm.append(f" ; {comment_part}")
return asm
if __name__ == "__main__":
compiler = Compiler()
preprocessed_lines = compiler.preprocess("main.c")
c_code = "\n".join(preprocessed_lines)
asm_code = compiler.compile_c(c_code)
with open("test.asm", "w") as out:
for line in asm_code:
out.write(line + "\n")