549 lines
20 KiB
Python
549 lines
20 KiB
Python
import re
|
|
import os
|
|
from termcolor import colored
|
|
|
|
|
|
class Variable:
|
|
def __init__(self, name, address, var_type="int"):
|
|
self.name = name
|
|
self.address = address
|
|
self.type = var_type
|
|
|
|
class Compiler:
|
|
def __init__(self):
|
|
# 1024 bytes total: 0x000 to 0x3FF
|
|
self.data_ptr = 0x400
|
|
self.variables = {}
|
|
self.struct_definitions = {}
|
|
self.in_struct_def = False
|
|
self.current_struct_name = None
|
|
self.current_struct_fields = []
|
|
self.defines = {} # For #define macros
|
|
self.typedefs = {} # For typedef
|
|
self.label_counter = 0
|
|
self.block_stack = [] # For if/while blocks
|
|
|
|
def new_label(self, prefix):
|
|
lbl = f"{prefix}{self.label_counter}"
|
|
self.label_counter += 1
|
|
return lbl
|
|
|
|
def preprocess(self, filename):
|
|
lines = self._read_file_recursive(filename)
|
|
processed_lines = self._apply_defines(lines)
|
|
return processed_lines
|
|
|
|
def _read_file_recursive(self, filename, included_files=None):
|
|
if included_files is None:
|
|
included_files = set()
|
|
|
|
if filename in included_files:
|
|
# Prevent infinite recursion on includes
|
|
return []
|
|
|
|
included_files.add(filename)
|
|
|
|
result_lines = []
|
|
try:
|
|
with open(filename, "r") as f:
|
|
for line in f:
|
|
line_stripped = line.strip()
|
|
|
|
# #include "file"
|
|
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
|
|
if inc_match:
|
|
inc_file = inc_match.group(1)
|
|
included_content = self._read_file_recursive(inc_file, included_files)
|
|
result_lines.extend(included_content)
|
|
continue
|
|
|
|
# #define KEY VALUE
|
|
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
|
|
if def_match:
|
|
key = def_match.group(1)
|
|
value = def_match.group(2)
|
|
self.defines[key] = value
|
|
continue
|
|
|
|
# typedef oldtype newtype;
|
|
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
|
|
if tmatch:
|
|
oldt = tmatch.group(1)
|
|
newt = tmatch.group(2)
|
|
# Resolve oldt if it's also a typedef
|
|
oldt = self.apply_typedef(oldt)
|
|
self.typedefs[newt] = oldt
|
|
continue
|
|
|
|
result_lines.append(line)
|
|
except FileNotFoundError as e:
|
|
print(colored(f"{filename}:0: error: {e}", "red"))
|
|
|
|
|
|
return result_lines
|
|
|
|
def _apply_defines(self, lines):
|
|
|
|
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
|
|
|
|
processed = []
|
|
for line in lines:
|
|
parts = token_pattern.split(line)
|
|
# parts: tokens and separators
|
|
for i, part in enumerate(parts):
|
|
if part in self.defines:
|
|
print(f"Replaced {part} with {self.defines[part]}")
|
|
part = self.defines[part]
|
|
parts[i] = part
|
|
new_line = "".join(parts)
|
|
processed.append(new_line)
|
|
return processed
|
|
|
|
def apply_typedef(self, t):
|
|
if t in self.typedefs:
|
|
return self.typedefs[t]
|
|
return t
|
|
|
|
def allocate_bytes(self, count):
|
|
start_addr = self.data_ptr - (count - 1)
|
|
if start_addr < 0x000:
|
|
raise Exception("Out of memory!")
|
|
self.data_ptr = start_addr - 1
|
|
return start_addr
|
|
|
|
def allocate_var(self, name, var_type="int"):
|
|
var_type = self.apply_typedef(var_type)
|
|
if name in self.variables:
|
|
return self.variables[name]
|
|
|
|
if var_type.startswith("struct:"):
|
|
sname = var_type.split(":")[1]
|
|
fields = self.struct_definitions[sname]
|
|
length = len(fields) # each 1 byte
|
|
start_addr = self.allocate_bytes(length)
|
|
var = Variable(name, start_addr, var_type)
|
|
self.variables[name] = var
|
|
return var
|
|
else:
|
|
start_addr = self.allocate_bytes(1)
|
|
var = Variable(name, start_addr, var_type)
|
|
self.variables[name] = var
|
|
return var
|
|
|
|
def allocate_array(self, name, length, var_type="int"):
|
|
var_type = self.apply_typedef(var_type)
|
|
arr_start = self.allocate_bytes(length)
|
|
var_addr = self.allocate_bytes(1)
|
|
var = Variable(name, var_addr, "array")
|
|
self.variables[name] = var
|
|
return var, arr_start
|
|
|
|
def store_string(self, string_value):
|
|
string_value = string_value.replace('\\n', '\n')
|
|
length = len(string_value) + 1
|
|
start_addr = self.allocate_bytes(length)
|
|
asm = []
|
|
current_addr = start_addr
|
|
for ch in string_value:
|
|
ascii_val = ord(ch)
|
|
asm.append(f"ldw a, {ascii_val}")
|
|
asm.append(f"str a, 0x{current_addr:X}")
|
|
current_addr += 1
|
|
asm.append("ldw a, 0")
|
|
asm.append(f"str a, 0x{current_addr:X}")
|
|
return asm, start_addr
|
|
|
|
def get_struct_field_offset(self, struct_type, field_name):
|
|
sname = struct_type.split(":")[1]
|
|
fields = self.struct_definitions[sname]
|
|
for i, (fname, ftype) in enumerate(fields):
|
|
if fname == field_name:
|
|
return i
|
|
raise Exception(f"Field {field_name} not found in {struct_type}")
|
|
|
|
def parse_condition(self, cond_str):
|
|
# cond_str like "a == b" or "a != b"
|
|
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
|
|
if not m:
|
|
raise Exception("Unsupported condition: " + cond_str)
|
|
var1, op, var2 = m.groups()
|
|
return var1, op, var2
|
|
|
|
def compile_condition(self, var1, op, var2):
|
|
asm = []
|
|
v1 = self.allocate_var(var1)
|
|
v2 = self.allocate_var(var2)
|
|
asm.append(f"ldr a, 0x{v1.address:X}")
|
|
asm.append(f"ldr b, 0x{v2.address:X}")
|
|
# a = a - b
|
|
asm.append("sub a, b")
|
|
return asm, op
|
|
|
|
def extract_comment(self, line):
|
|
comment_index = line.find('//')
|
|
if comment_index != -1:
|
|
code_part = line[:comment_index]
|
|
comment_part = line[comment_index+2:].strip()
|
|
return code_part, comment_part
|
|
return line, None
|
|
|
|
def compile_line(self, code_part):
|
|
line = code_part.strip()
|
|
asm = []
|
|
|
|
if self.in_struct_def:
|
|
if line.startswith("};"):
|
|
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
|
|
self.in_struct_def = False
|
|
self.current_struct_name = None
|
|
self.current_struct_fields = []
|
|
return asm
|
|
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
|
|
if mfield:
|
|
fname = mfield.group(1)
|
|
ftype = "int"
|
|
self.current_struct_fields.append((fname, ftype))
|
|
return asm
|
|
|
|
# struct definition start
|
|
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
|
|
if msd:
|
|
self.in_struct_def = True
|
|
self.current_struct_name = msd.group(1)
|
|
self.current_struct_fields = []
|
|
return asm
|
|
|
|
# struct var declaration
|
|
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
|
|
if msv:
|
|
sname, varname = msv.groups()
|
|
var_type = "struct:" + sname
|
|
self.allocate_var(varname, var_type)
|
|
return asm
|
|
|
|
# if statement
|
|
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
|
|
if mif:
|
|
cond_str = mif.group(1)
|
|
var1, op, var2 = self.parse_condition(cond_str)
|
|
end_label = self.new_label("endif")
|
|
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
|
asm.extend(cond_code)
|
|
# if '==': jump if not zero a != 0
|
|
# if '!=': jump if zero a == 0
|
|
if cmp_op == '==':
|
|
asm.append("bne a, 0, " + end_label)
|
|
else:
|
|
asm.append("beq a, 0, " + end_label)
|
|
self.block_stack.append(('if', end_label))
|
|
return asm
|
|
|
|
# while statement
|
|
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
|
|
if mwhile:
|
|
cond_str = mwhile.group(1)
|
|
var1, op, var2 = self.parse_condition(cond_str)
|
|
start_label = self.new_label("whilestart")
|
|
end_label = self.new_label("whileend")
|
|
asm.append(start_label + ":")
|
|
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
|
asm.extend(cond_code)
|
|
if cmp_op == '==':
|
|
asm.append("bne a, 0, " + end_label)
|
|
else:
|
|
asm.append("beq a, 0, " + end_label)
|
|
self.block_stack.append(('while', start_label, end_label))
|
|
return asm
|
|
|
|
# end of block
|
|
if line == "}":
|
|
if not self.block_stack:
|
|
return asm
|
|
blk = self.block_stack.pop()
|
|
if blk[0] == 'if':
|
|
end_label = blk[1]
|
|
asm.append(end_label + ":")
|
|
elif blk[0] == 'while':
|
|
start_label = blk[1]
|
|
end_label = blk[2]
|
|
# jump back to start
|
|
asm.append(f"jmp {start_label}")
|
|
asm.append(end_label + ":")
|
|
return asm
|
|
|
|
# p.x = number;
|
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname, fieldname, value = m.groups()
|
|
value = int(value)
|
|
v = self.allocate_var(varname)
|
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
if offset != 0:
|
|
asm.append(f"ldw b, {offset}")
|
|
asm.append("add a, b")
|
|
asm.append(f"ldw c, {value}")
|
|
asm.append("stb c, a")
|
|
return asm
|
|
|
|
# p.x = var + number;
|
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname, fieldname, srcvar, number = m.groups()
|
|
number = int(number)
|
|
v = self.allocate_var(varname)
|
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
if offset != 0:
|
|
asm.append(f"ldw b, {offset}")
|
|
asm.append("add a, b")
|
|
v2 = self.allocate_var(srcvar)
|
|
asm.append(f"ldr c, 0x{v2.address:X}")
|
|
asm.append(f"ldw d, {number}")
|
|
asm.append("add c, d")
|
|
asm.append("stb c, a")
|
|
return asm
|
|
|
|
# p.x = srcvar;
|
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
|
|
if m:
|
|
varname, fieldname, srcvar = m.groups()
|
|
v = self.allocate_var(varname)
|
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
if offset != 0:
|
|
asm.append(f"ldw b, {offset}")
|
|
asm.append("add a, b")
|
|
v2 = self.allocate_var(srcvar)
|
|
asm.append(f"ldr c, 0x{v2.address:X}")
|
|
asm.append("stb c, a")
|
|
return asm
|
|
|
|
# x = p.x;
|
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
|
|
if m:
|
|
dst, varname, fieldname = m.groups()
|
|
v = self.allocate_var(varname)
|
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
vd = self.allocate_var(dst)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
if offset != 0:
|
|
asm.append(f"ldw b, {offset}")
|
|
asm.append("add a, b")
|
|
asm.append("ldb c, a")
|
|
asm.append(f"str c, 0x{vd.address:X}")
|
|
return asm
|
|
|
|
# print_int(p.x);
|
|
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
|
|
if m:
|
|
varname, fieldname = m.groups()
|
|
v = self.allocate_var(varname)
|
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
if offset != 0:
|
|
asm.append(f"ldw b, {offset}")
|
|
asm.append("add a, b")
|
|
asm.append("ldb a, a")
|
|
asm.append("int 0x01")
|
|
return asm
|
|
|
|
# int arr[10];
|
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
|
if m:
|
|
varname = m.group(1)
|
|
length = int(m.group(2))
|
|
arr_var, start_addr = self.allocate_array(varname, length)
|
|
asm.append(f"ldw a, 0x{start_addr:X}")
|
|
asm.append(f"str a, 0x{arr_var.address:X}")
|
|
return asm
|
|
|
|
# int x = number;
|
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname = m.group(1)
|
|
value = int(m.group(2))
|
|
var = self.allocate_var(varname, "int")
|
|
asm.append(f"ldw a, {value}")
|
|
asm.append(f"str a, 0x{var.address:X}")
|
|
return asm
|
|
|
|
# int y = x + number;
|
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname, var2, number = m.groups()
|
|
number = int(number)
|
|
v1 = self.allocate_var(varname, "int")
|
|
v2 = self.allocate_var(var2, "int")
|
|
asm.append(f"ldr a, 0x{v2.address:X}")
|
|
asm.append(f"ldw b, {number}")
|
|
asm.append("add a, b")
|
|
asm.append(f"str a, 0x{v1.address:X}")
|
|
return asm
|
|
|
|
# char *msg = "Hello\n";
|
|
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
|
|
if m:
|
|
varname, string_val = m.groups()
|
|
v = self.allocate_var(varname, "char*")
|
|
code, start_addr = self.store_string(string_val)
|
|
asm.extend(code)
|
|
asm.append(f"ldw a, 0x{start_addr:X}")
|
|
asm.append(f"str a, 0x{v.address:X}")
|
|
return asm
|
|
|
|
# var = number;
|
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname, value = m.groups()
|
|
value = int(value)
|
|
v = self.allocate_var(varname, "int")
|
|
asm.append(f"ldw a, {value}")
|
|
asm.append(f"str a, 0x{v.address:X}")
|
|
return asm
|
|
|
|
# var = var2 + number;
|
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
if m:
|
|
varname, var2, number = m.groups()
|
|
number = int(number)
|
|
v1 = self.allocate_var(varname, "int")
|
|
v2 = self.allocate_var(var2, "int")
|
|
asm.append(f"ldr a, 0x{v2.address:X}")
|
|
asm.append(f"ldw b, {number}")
|
|
asm.append("add a, b")
|
|
asm.append(f"str a, 0x{v1.address:X}")
|
|
return asm
|
|
|
|
# var[index] = number;
|
|
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
|
|
if m:
|
|
arr, index, value = m.groups()
|
|
index = int(index)
|
|
value = int(value)
|
|
arr_var = self.allocate_var(arr)
|
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
asm.append(f"ldw b, {index}")
|
|
asm.append("add a, b")
|
|
asm.append(f"ldw c, {value}")
|
|
asm.append("stb c, a")
|
|
return asm
|
|
|
|
# x = arr[index];
|
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
|
if m:
|
|
varname, arr, index = m.groups()
|
|
index = int(index)
|
|
v = self.allocate_var(varname, "int")
|
|
arr_var = self.allocate_var(arr)
|
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
asm.append(f"ldw b, {index}")
|
|
asm.append("add a, b")
|
|
asm.append("ldb d, a")
|
|
asm.append(f"str d, 0x{v.address:X}")
|
|
return asm
|
|
|
|
# print_char(var);
|
|
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
if m:
|
|
varname = m.group(1)
|
|
v = self.allocate_var(varname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
asm.append("int 0x00")
|
|
return asm
|
|
|
|
# print_char(arr[index]);
|
|
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
|
|
if m:
|
|
arr, index = m.groups()
|
|
index = int(index)
|
|
arr_var = self.allocate_var(arr)
|
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
asm.append(f"ldw b, {index}")
|
|
asm.append("add a, b")
|
|
asm.append("ldb a, a")
|
|
asm.append("int 0x00")
|
|
return asm
|
|
|
|
# print_int(var);
|
|
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
if m:
|
|
varname = m.group(1)
|
|
v = self.allocate_var(varname)
|
|
asm.append(f"ldr a, 0x{v.address:X}")
|
|
asm.append("int 0x01")
|
|
return asm
|
|
|
|
# print_string(var);
|
|
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
if m:
|
|
varname = m.group(1)
|
|
v = self.allocate_var(varname, "char*")
|
|
asm.append("ldw d, 0")
|
|
asm.append(f"ldr b, 0x{v.address:X}")
|
|
asm.append("ldw c, 1")
|
|
asm.append("string_loop:")
|
|
asm.append("ldb a, b")
|
|
asm.append("beq a, d, string_end")
|
|
asm.append("int 0x00")
|
|
asm.append("add b, c")
|
|
asm.append("jmp string_loop")
|
|
asm.append("string_end:")
|
|
return asm
|
|
|
|
# return number;
|
|
m = re.match(r'return\s+(\d+)\s*;', line)
|
|
if m:
|
|
asm.append("int 0xFF")
|
|
return asm
|
|
|
|
# Unrecognized line or empty
|
|
return asm
|
|
|
|
def compile_c(self, c_code):
|
|
# First, parse everything to detect structs and typedef done in preprocess
|
|
all_lines = c_code.split('\n')
|
|
# struct definitions might appear outside main
|
|
for cline in all_lines:
|
|
self.compile_line(cline)
|
|
|
|
# Extract lines inside main
|
|
lines = []
|
|
in_main = False
|
|
for cline in all_lines:
|
|
cline = cline.rstrip()
|
|
if 'int main(' in cline:
|
|
in_main = True
|
|
continue
|
|
if in_main:
|
|
if cline.startswith('}'):
|
|
in_main = False
|
|
break
|
|
lines.append(cline)
|
|
|
|
asm = ["main:"]
|
|
for line in lines:
|
|
code_part, comment_part = self.extract_comment(line)
|
|
instructions = self.compile_line(code_part)
|
|
if instructions:
|
|
for i, instr in enumerate(instructions):
|
|
if i == 0 and comment_part:
|
|
asm.append(f" {instr} ; {comment_part}")
|
|
else:
|
|
asm.append(f" {instr}")
|
|
else:
|
|
if comment_part:
|
|
asm.append(f" ; {comment_part}")
|
|
|
|
return asm
|
|
|
|
if __name__ == "__main__":
|
|
compiler = Compiler()
|
|
preprocessed_lines = compiler.preprocess("main.c")
|
|
c_code = "\n".join(preprocessed_lines)
|
|
asm_code = compiler.compile_c(c_code)
|
|
|
|
with open("test.asm", "w") as out:
|
|
for line in asm_code:
|
|
out.write(line + "\n")
|