did nothing...
This commit is contained in:
parent
b88b8bffc9
commit
a86ffb54db
BIN
__pycache__/program.cpython-311.pyc
Normal file
BIN
__pycache__/program.cpython-311.pyc
Normal file
Binary file not shown.
745
c-to-asm.py
745
c-to-asm.py
@ -1,548 +1,241 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
import re
|
import re
|
||||||
import os
|
|
||||||
from termcolor import colored
|
|
||||||
|
|
||||||
|
# Valid registers and instructions.
|
||||||
|
valid_registers = {"a", "b", "c", "d", "e", "f"}
|
||||||
|
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
|
||||||
|
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
|
||||||
|
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
|
||||||
|
|
||||||
class Variable:
|
# Fixed pool of registers.
|
||||||
def __init__(self, name, address, var_type="int"):
|
register_pool = ["a", "b", "c", "d", "e", "f"]
|
||||||
self.name = name
|
|
||||||
self.address = address
|
|
||||||
self.type = var_type
|
|
||||||
|
|
||||||
class Compiler:
|
def allocate_register(var_name, context):
|
||||||
def __init__(self):
|
"""Allocate a register for a variable in the given function context."""
|
||||||
# 1024 bytes total: 0x000 to 0x3FF
|
var_to_reg = context['var_to_reg']
|
||||||
self.data_ptr = 0x400
|
for reg in register_pool:
|
||||||
self.variables = {}
|
if reg not in var_to_reg.values():
|
||||||
self.struct_definitions = {}
|
var_to_reg[var_name] = reg
|
||||||
self.in_struct_def = False
|
return reg
|
||||||
self.current_struct_name = None
|
raise Exception("Out of registers!")
|
||||||
self.current_struct_fields = []
|
|
||||||
self.defines = {} # For #define macros
|
|
||||||
self.typedefs = {} # For typedef
|
|
||||||
self.label_counter = 0
|
|
||||||
self.block_stack = [] # For if/while blocks
|
|
||||||
|
|
||||||
def new_label(self, prefix):
|
def compile_expr(expr, dest, temp, context):
|
||||||
lbl = f"{prefix}{self.label_counter}"
|
"""
|
||||||
self.label_counter += 1
|
Compile a simple expression (literals, variables, +, -) into assembly.
|
||||||
return lbl
|
|
||||||
|
|
||||||
def preprocess(self, filename):
|
Parameters:
|
||||||
lines = self._read_file_recursive(filename)
|
expr: string expression (e.g., "5", "x", "x + 3")
|
||||||
processed_lines = self._apply_defines(lines)
|
dest: destination register for the result.
|
||||||
return processed_lines
|
temp: temporary register.
|
||||||
|
context: dictionary with function context (like var_to_reg).
|
||||||
|
"""
|
||||||
|
var_to_reg = context['var_to_reg']
|
||||||
|
instructions = []
|
||||||
|
tokens = re.split(r'(\+|\-)', expr)
|
||||||
|
tokens = [t.strip() for t in tokens if t.strip() != '']
|
||||||
|
if not tokens:
|
||||||
|
return instructions
|
||||||
|
|
||||||
def _read_file_recursive(self, filename, included_files=None):
|
# Process first term.
|
||||||
if included_files is None:
|
token = tokens[0]
|
||||||
included_files = set()
|
if token.isdigit():
|
||||||
|
instructions.append(f"ldw {dest}, {token}")
|
||||||
|
else:
|
||||||
|
if token not in var_to_reg:
|
||||||
|
raise Exception(f"Variable '{token}' not declared")
|
||||||
|
src_reg = var_to_reg[token]
|
||||||
|
if src_reg != dest:
|
||||||
|
instructions.append(f"mov {dest}, {src_reg}")
|
||||||
|
i = 1
|
||||||
|
while i < len(tokens):
|
||||||
|
op = tokens[i]
|
||||||
|
operand = tokens[i+1]
|
||||||
|
if operand.isdigit():
|
||||||
|
instructions.append(f"ldw {temp}, {operand}")
|
||||||
|
if op == "+":
|
||||||
|
instructions.append(f"add {dest}, {temp}")
|
||||||
|
elif op == "-":
|
||||||
|
instructions.append(f"sub {dest}, {temp}")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unsupported operator '{op}'")
|
||||||
|
else:
|
||||||
|
if operand not in var_to_reg:
|
||||||
|
raise Exception(f"Variable '{operand}' not declared")
|
||||||
|
operand_reg = var_to_reg[operand]
|
||||||
|
if op == "+":
|
||||||
|
instructions.append(f"add {dest}, {operand_reg}")
|
||||||
|
elif op == "-":
|
||||||
|
instructions.append(f"sub {dest}, {operand_reg}")
|
||||||
|
else:
|
||||||
|
raise Exception(f"Unsupported operator '{op}'")
|
||||||
|
i += 2
|
||||||
|
return instructions
|
||||||
|
|
||||||
if filename in included_files:
|
def compile_statement(line, context):
|
||||||
# Prevent infinite recursion on includes
|
"""
|
||||||
return []
|
Compile a single statement from our limited C language.
|
||||||
|
Supports:
|
||||||
|
- Variable declaration: e.g., "int x = 5;"
|
||||||
|
- Assignment: e.g., "x = x + 2;"
|
||||||
|
- Function call: e.g., "foo();"
|
||||||
|
- Return statement: e.g., "return x;"
|
||||||
|
"""
|
||||||
|
var_to_reg = context['var_to_reg']
|
||||||
|
instructions = []
|
||||||
|
line = line.strip().rstrip(';')
|
||||||
|
if not line:
|
||||||
|
return instructions
|
||||||
|
|
||||||
included_files.add(filename)
|
# Function call statement pattern: identifier followed by "()"
|
||||||
|
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
|
||||||
|
if m:
|
||||||
|
func_name = m.group(1)
|
||||||
|
instructions.append(f"jsr {func_name}")
|
||||||
|
return instructions
|
||||||
|
|
||||||
result_lines = []
|
# Variable declaration.
|
||||||
try:
|
if line.startswith("int "):
|
||||||
with open(filename, "r") as f:
|
line = line[4:].strip() # Remove "int "
|
||||||
for line in f:
|
parts = line.split("=", 1)
|
||||||
line_stripped = line.strip()
|
if len(parts) != 2:
|
||||||
|
raise Exception("Invalid declaration syntax.")
|
||||||
|
var_name = parts[0].strip()
|
||||||
|
expr = parts[1].strip()
|
||||||
|
reg = allocate_register(var_name, context)
|
||||||
|
# Choose a temporary register different from the destination.
|
||||||
|
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
|
||||||
|
if temp is None:
|
||||||
|
temp = next((r for r in register_pool if r != reg), None)
|
||||||
|
instructions.extend(compile_expr(expr, reg, temp, context))
|
||||||
|
return instructions
|
||||||
|
|
||||||
# #include "file"
|
# Return statement.
|
||||||
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
|
if line.startswith("return"):
|
||||||
if inc_match:
|
ret_expr = line[6:].strip() # Remove "return"
|
||||||
inc_file = inc_match.group(1)
|
if ret_expr:
|
||||||
included_content = self._read_file_recursive(inc_file, included_files)
|
# Convention: return value in register a.
|
||||||
result_lines.extend(included_content)
|
temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
|
||||||
continue
|
if temp is None:
|
||||||
|
temp = next((r for r in register_pool if r != "a"), None)
|
||||||
|
instructions.extend(compile_expr(ret_expr, "a", temp, context))
|
||||||
|
instructions.append("ret")
|
||||||
|
return instructions
|
||||||
|
|
||||||
# #define KEY VALUE
|
# Assignment statement.
|
||||||
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
|
if "=" in line:
|
||||||
if def_match:
|
parts = line.split("=", 1)
|
||||||
key = def_match.group(1)
|
var_name = parts[0].strip()
|
||||||
value = def_match.group(2)
|
expr = parts[1].strip()
|
||||||
self.defines[key] = value
|
if var_name not in var_to_reg:
|
||||||
continue
|
raise Exception(f"Variable '{var_name}' not declared")
|
||||||
|
dest = var_to_reg[var_name]
|
||||||
|
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
|
||||||
|
if temp is None:
|
||||||
|
temp = next((r for r in register_pool if r != dest), None)
|
||||||
|
instructions.extend(compile_expr(expr, dest, temp, context))
|
||||||
|
return instructions
|
||||||
|
|
||||||
# typedef oldtype newtype;
|
raise Exception(f"Unrecognized statement: {line}")
|
||||||
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
|
|
||||||
if tmatch:
|
|
||||||
oldt = tmatch.group(1)
|
|
||||||
newt = tmatch.group(2)
|
|
||||||
# Resolve oldt if it's also a typedef
|
|
||||||
oldt = self.apply_typedef(oldt)
|
|
||||||
self.typedefs[newt] = oldt
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_lines.append(line)
|
def compile_function(func_name, lines):
|
||||||
except FileNotFoundError as e:
|
"""
|
||||||
print(colored(f"{filename}:0: error: {e}", "red"))
|
Compile a function given its name and body (as a list of lines).
|
||||||
|
Returns the assembly instructions for the function.
|
||||||
|
"""
|
||||||
return result_lines
|
# Create a fresh context for the function.
|
||||||
|
context = {"var_to_reg": {}}
|
||||||
def _apply_defines(self, lines):
|
instructions = []
|
||||||
|
# Function label.
|
||||||
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
|
instructions.append(f"{func_name}:")
|
||||||
|
|
||||||
processed = []
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
parts = token_pattern.split(line)
|
line = line.strip()
|
||||||
# parts: tokens and separators
|
if not line or line.startswith("//"):
|
||||||
for i, part in enumerate(parts):
|
|
||||||
if part in self.defines:
|
|
||||||
print(f"Replaced {part} with {self.defines[part]}")
|
|
||||||
part = self.defines[part]
|
|
||||||
parts[i] = part
|
|
||||||
new_line = "".join(parts)
|
|
||||||
processed.append(new_line)
|
|
||||||
return processed
|
|
||||||
|
|
||||||
def apply_typedef(self, t):
|
|
||||||
if t in self.typedefs:
|
|
||||||
return self.typedefs[t]
|
|
||||||
return t
|
|
||||||
|
|
||||||
def allocate_bytes(self, count):
|
|
||||||
start_addr = self.data_ptr - (count - 1)
|
|
||||||
if start_addr < 0x000:
|
|
||||||
raise Exception("Out of memory!")
|
|
||||||
self.data_ptr = start_addr - 1
|
|
||||||
return start_addr
|
|
||||||
|
|
||||||
def allocate_var(self, name, var_type="int"):
|
|
||||||
var_type = self.apply_typedef(var_type)
|
|
||||||
if name in self.variables:
|
|
||||||
return self.variables[name]
|
|
||||||
|
|
||||||
if var_type.startswith("struct:"):
|
|
||||||
sname = var_type.split(":")[1]
|
|
||||||
fields = self.struct_definitions[sname]
|
|
||||||
length = len(fields) # each 1 byte
|
|
||||||
start_addr = self.allocate_bytes(length)
|
|
||||||
var = Variable(name, start_addr, var_type)
|
|
||||||
self.variables[name] = var
|
|
||||||
return var
|
|
||||||
else:
|
|
||||||
start_addr = self.allocate_bytes(1)
|
|
||||||
var = Variable(name, start_addr, var_type)
|
|
||||||
self.variables[name] = var
|
|
||||||
return var
|
|
||||||
|
|
||||||
def allocate_array(self, name, length, var_type="int"):
|
|
||||||
var_type = self.apply_typedef(var_type)
|
|
||||||
arr_start = self.allocate_bytes(length)
|
|
||||||
var_addr = self.allocate_bytes(1)
|
|
||||||
var = Variable(name, var_addr, "array")
|
|
||||||
self.variables[name] = var
|
|
||||||
return var, arr_start
|
|
||||||
|
|
||||||
def store_string(self, string_value):
|
|
||||||
string_value = string_value.replace('\\n', '\n')
|
|
||||||
length = len(string_value) + 1
|
|
||||||
start_addr = self.allocate_bytes(length)
|
|
||||||
asm = []
|
|
||||||
current_addr = start_addr
|
|
||||||
for ch in string_value:
|
|
||||||
ascii_val = ord(ch)
|
|
||||||
asm.append(f"ldw a, {ascii_val}")
|
|
||||||
asm.append(f"str a, 0x{current_addr:X}")
|
|
||||||
current_addr += 1
|
|
||||||
asm.append("ldw a, 0")
|
|
||||||
asm.append(f"str a, 0x{current_addr:X}")
|
|
||||||
return asm, start_addr
|
|
||||||
|
|
||||||
def get_struct_field_offset(self, struct_type, field_name):
|
|
||||||
sname = struct_type.split(":")[1]
|
|
||||||
fields = self.struct_definitions[sname]
|
|
||||||
for i, (fname, ftype) in enumerate(fields):
|
|
||||||
if fname == field_name:
|
|
||||||
return i
|
|
||||||
raise Exception(f"Field {field_name} not found in {struct_type}")
|
|
||||||
|
|
||||||
def parse_condition(self, cond_str):
|
|
||||||
# cond_str like "a == b" or "a != b"
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
|
|
||||||
if not m:
|
|
||||||
raise Exception("Unsupported condition: " + cond_str)
|
|
||||||
var1, op, var2 = m.groups()
|
|
||||||
return var1, op, var2
|
|
||||||
|
|
||||||
def compile_condition(self, var1, op, var2):
|
|
||||||
asm = []
|
|
||||||
v1 = self.allocate_var(var1)
|
|
||||||
v2 = self.allocate_var(var2)
|
|
||||||
asm.append(f"ldr a, 0x{v1.address:X}")
|
|
||||||
asm.append(f"ldr b, 0x{v2.address:X}")
|
|
||||||
# a = a - b
|
|
||||||
asm.append("sub a, b")
|
|
||||||
return asm, op
|
|
||||||
|
|
||||||
def extract_comment(self, line):
|
|
||||||
comment_index = line.find('//')
|
|
||||||
if comment_index != -1:
|
|
||||||
code_part = line[:comment_index]
|
|
||||||
comment_part = line[comment_index+2:].strip()
|
|
||||||
return code_part, comment_part
|
|
||||||
return line, None
|
|
||||||
|
|
||||||
def compile_line(self, code_part):
|
|
||||||
line = code_part.strip()
|
|
||||||
asm = []
|
|
||||||
|
|
||||||
if self.in_struct_def:
|
|
||||||
if line.startswith("};"):
|
|
||||||
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
|
|
||||||
self.in_struct_def = False
|
|
||||||
self.current_struct_name = None
|
|
||||||
self.current_struct_fields = []
|
|
||||||
return asm
|
|
||||||
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
|
|
||||||
if mfield:
|
|
||||||
fname = mfield.group(1)
|
|
||||||
ftype = "int"
|
|
||||||
self.current_struct_fields.append((fname, ftype))
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# struct definition start
|
|
||||||
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
|
|
||||||
if msd:
|
|
||||||
self.in_struct_def = True
|
|
||||||
self.current_struct_name = msd.group(1)
|
|
||||||
self.current_struct_fields = []
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# struct var declaration
|
|
||||||
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
|
|
||||||
if msv:
|
|
||||||
sname, varname = msv.groups()
|
|
||||||
var_type = "struct:" + sname
|
|
||||||
self.allocate_var(varname, var_type)
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# if statement
|
|
||||||
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
|
|
||||||
if mif:
|
|
||||||
cond_str = mif.group(1)
|
|
||||||
var1, op, var2 = self.parse_condition(cond_str)
|
|
||||||
end_label = self.new_label("endif")
|
|
||||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
|
||||||
asm.extend(cond_code)
|
|
||||||
# if '==': jump if not zero a != 0
|
|
||||||
# if '!=': jump if zero a == 0
|
|
||||||
if cmp_op == '==':
|
|
||||||
asm.append("bne a, 0, " + end_label)
|
|
||||||
else:
|
|
||||||
asm.append("beq a, 0, " + end_label)
|
|
||||||
self.block_stack.append(('if', end_label))
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# while statement
|
|
||||||
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
|
|
||||||
if mwhile:
|
|
||||||
cond_str = mwhile.group(1)
|
|
||||||
var1, op, var2 = self.parse_condition(cond_str)
|
|
||||||
start_label = self.new_label("whilestart")
|
|
||||||
end_label = self.new_label("whileend")
|
|
||||||
asm.append(start_label + ":")
|
|
||||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
|
||||||
asm.extend(cond_code)
|
|
||||||
if cmp_op == '==':
|
|
||||||
asm.append("bne a, 0, " + end_label)
|
|
||||||
else:
|
|
||||||
asm.append("beq a, 0, " + end_label)
|
|
||||||
self.block_stack.append(('while', start_label, end_label))
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# end of block
|
|
||||||
if line == "}":
|
|
||||||
if not self.block_stack:
|
|
||||||
return asm
|
|
||||||
blk = self.block_stack.pop()
|
|
||||||
if blk[0] == 'if':
|
|
||||||
end_label = blk[1]
|
|
||||||
asm.append(end_label + ":")
|
|
||||||
elif blk[0] == 'while':
|
|
||||||
start_label = blk[1]
|
|
||||||
end_label = blk[2]
|
|
||||||
# jump back to start
|
|
||||||
asm.append(f"jmp {start_label}")
|
|
||||||
asm.append(end_label + ":")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# p.x = number;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, fieldname, value = m.groups()
|
|
||||||
value = int(value)
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
if offset != 0:
|
|
||||||
asm.append(f"ldw b, {offset}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append(f"ldw c, {value}")
|
|
||||||
asm.append("stb c, a")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# p.x = var + number;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, fieldname, srcvar, number = m.groups()
|
|
||||||
number = int(number)
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
if offset != 0:
|
|
||||||
asm.append(f"ldw b, {offset}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
v2 = self.allocate_var(srcvar)
|
|
||||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
|
||||||
asm.append(f"ldw d, {number}")
|
|
||||||
asm.append("add c, d")
|
|
||||||
asm.append("stb c, a")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# p.x = srcvar;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, fieldname, srcvar = m.groups()
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
if offset != 0:
|
|
||||||
asm.append(f"ldw b, {offset}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
v2 = self.allocate_var(srcvar)
|
|
||||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
|
||||||
asm.append("stb c, a")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# x = p.x;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
|
|
||||||
if m:
|
|
||||||
dst, varname, fieldname = m.groups()
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
||||||
vd = self.allocate_var(dst)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
if offset != 0:
|
|
||||||
asm.append(f"ldw b, {offset}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append("ldb c, a")
|
|
||||||
asm.append(f"str c, 0x{vd.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# print_int(p.x);
|
|
||||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, fieldname = m.groups()
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
if offset != 0:
|
|
||||||
asm.append(f"ldw b, {offset}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append("ldb a, a")
|
|
||||||
asm.append("int 0x01")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# int arr[10];
|
|
||||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname = m.group(1)
|
|
||||||
length = int(m.group(2))
|
|
||||||
arr_var, start_addr = self.allocate_array(varname, length)
|
|
||||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
|
||||||
asm.append(f"str a, 0x{arr_var.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# int x = number;
|
|
||||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname = m.group(1)
|
|
||||||
value = int(m.group(2))
|
|
||||||
var = self.allocate_var(varname, "int")
|
|
||||||
asm.append(f"ldw a, {value}")
|
|
||||||
asm.append(f"str a, 0x{var.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# int y = x + number;
|
|
||||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, var2, number = m.groups()
|
|
||||||
number = int(number)
|
|
||||||
v1 = self.allocate_var(varname, "int")
|
|
||||||
v2 = self.allocate_var(var2, "int")
|
|
||||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
|
||||||
asm.append(f"ldw b, {number}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append(f"str a, 0x{v1.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# char *msg = "Hello\n";
|
|
||||||
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, string_val = m.groups()
|
|
||||||
v = self.allocate_var(varname, "char*")
|
|
||||||
code, start_addr = self.store_string(string_val)
|
|
||||||
asm.extend(code)
|
|
||||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
|
||||||
asm.append(f"str a, 0x{v.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# var = number;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, value = m.groups()
|
|
||||||
value = int(value)
|
|
||||||
v = self.allocate_var(varname, "int")
|
|
||||||
asm.append(f"ldw a, {value}")
|
|
||||||
asm.append(f"str a, 0x{v.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# var = var2 + number;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, var2, number = m.groups()
|
|
||||||
number = int(number)
|
|
||||||
v1 = self.allocate_var(varname, "int")
|
|
||||||
v2 = self.allocate_var(var2, "int")
|
|
||||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
|
||||||
asm.append(f"ldw b, {number}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append(f"str a, 0x{v1.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# var[index] = number;
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
arr, index, value = m.groups()
|
|
||||||
index = int(index)
|
|
||||||
value = int(value)
|
|
||||||
arr_var = self.allocate_var(arr)
|
|
||||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
||||||
asm.append(f"ldw b, {index}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append(f"ldw c, {value}")
|
|
||||||
asm.append("stb c, a")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# x = arr[index];
|
|
||||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname, arr, index = m.groups()
|
|
||||||
index = int(index)
|
|
||||||
v = self.allocate_var(varname, "int")
|
|
||||||
arr_var = self.allocate_var(arr)
|
|
||||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
||||||
asm.append(f"ldw b, {index}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append("ldb d, a")
|
|
||||||
asm.append(f"str d, 0x{v.address:X}")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# print_char(var);
|
|
||||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname = m.group(1)
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
asm.append("int 0x00")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# print_char(arr[index]);
|
|
||||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
|
|
||||||
if m:
|
|
||||||
arr, index = m.groups()
|
|
||||||
index = int(index)
|
|
||||||
arr_var = self.allocate_var(arr)
|
|
||||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
|
||||||
asm.append(f"ldw b, {index}")
|
|
||||||
asm.append("add a, b")
|
|
||||||
asm.append("ldb a, a")
|
|
||||||
asm.append("int 0x00")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# print_int(var);
|
|
||||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname = m.group(1)
|
|
||||||
v = self.allocate_var(varname)
|
|
||||||
asm.append(f"ldr a, 0x{v.address:X}")
|
|
||||||
asm.append("int 0x01")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# print_string(var);
|
|
||||||
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
|
|
||||||
if m:
|
|
||||||
varname = m.group(1)
|
|
||||||
v = self.allocate_var(varname, "char*")
|
|
||||||
asm.append("ldw d, 0")
|
|
||||||
asm.append(f"ldr b, 0x{v.address:X}")
|
|
||||||
asm.append("ldw c, 1")
|
|
||||||
asm.append("string_loop:")
|
|
||||||
asm.append("ldb a, b")
|
|
||||||
asm.append("beq a, d, string_end")
|
|
||||||
asm.append("int 0x00")
|
|
||||||
asm.append("add b, c")
|
|
||||||
asm.append("jmp string_loop")
|
|
||||||
asm.append("string_end:")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# return number;
|
|
||||||
m = re.match(r'return\s+(\d+)\s*;', line)
|
|
||||||
if m:
|
|
||||||
asm.append("int 0xFF")
|
|
||||||
return asm
|
|
||||||
|
|
||||||
# Unrecognized line or empty
|
|
||||||
return asm
|
|
||||||
|
|
||||||
def compile_c(self, c_code):
|
|
||||||
# First, parse everything to detect structs and typedef done in preprocess
|
|
||||||
all_lines = c_code.split('\n')
|
|
||||||
# struct definitions might appear outside main
|
|
||||||
for cline in all_lines:
|
|
||||||
self.compile_line(cline)
|
|
||||||
|
|
||||||
# Extract lines inside main
|
|
||||||
lines = []
|
|
||||||
in_main = False
|
|
||||||
for cline in all_lines:
|
|
||||||
cline = cline.rstrip()
|
|
||||||
if 'int main(' in cline:
|
|
||||||
in_main = True
|
|
||||||
continue
|
continue
|
||||||
if in_main:
|
stmt_instructions = compile_statement(line, context)
|
||||||
if cline.startswith('}'):
|
instructions.extend(stmt_instructions)
|
||||||
in_main = False
|
return instructions
|
||||||
break
|
|
||||||
lines.append(cline)
|
def compile_c_to_asm(c_code):
|
||||||
|
"""
|
||||||
|
Compile a simple C program (with functions) into assembly.
|
||||||
|
The program must contain functions defined as:
|
||||||
|
|
||||||
|
int func_name() {
|
||||||
|
// statements
|
||||||
|
}
|
||||||
|
|
||||||
|
The compiled output will start at the main function (if defined).
|
||||||
|
"""
|
||||||
|
lines = c_code.splitlines()
|
||||||
|
functions = {}
|
||||||
|
current_func = None
|
||||||
|
current_lines = []
|
||||||
|
in_function = False
|
||||||
|
|
||||||
asm = ["main:"]
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
code_part, comment_part = self.extract_comment(line)
|
stripped = line.strip()
|
||||||
instructions = self.compile_line(code_part)
|
if not stripped or stripped.startswith("//"):
|
||||||
if instructions:
|
continue
|
||||||
for i, instr in enumerate(instructions):
|
|
||||||
if i == 0 and comment_part:
|
|
||||||
asm.append(f" {instr} ; {comment_part}")
|
|
||||||
else:
|
|
||||||
asm.append(f" {instr}")
|
|
||||||
else:
|
|
||||||
if comment_part:
|
|
||||||
asm.append(f" ; {comment_part}")
|
|
||||||
|
|
||||||
return asm
|
# Detect function start: "int funcName() {"
|
||||||
|
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
|
||||||
|
if m:
|
||||||
|
if in_function:
|
||||||
|
raise Exception("Nested functions not supported.")
|
||||||
|
current_func = m.group(1)
|
||||||
|
in_function = True
|
||||||
|
current_lines = []
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Detect end of function: "}"
|
||||||
|
if stripped == "}":
|
||||||
|
if not in_function:
|
||||||
|
raise Exception("Unexpected '}'")
|
||||||
|
functions[current_func] = compile_function(current_func, current_lines)
|
||||||
|
in_function = False
|
||||||
|
current_func = None
|
||||||
|
current_lines = []
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Inside a function, add the line.
|
||||||
|
if in_function:
|
||||||
|
current_lines.append(stripped)
|
||||||
|
else:
|
||||||
|
# Outside any function; for simplicity, ignore global declarations.
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Build the final assembly code.
|
||||||
|
# If "main" is defined, list it first.
|
||||||
|
asm_lines = []
|
||||||
|
if "main" in functions:
|
||||||
|
asm_lines.extend(functions["main"])
|
||||||
|
for fname, code in functions.items():
|
||||||
|
if fname != "main":
|
||||||
|
asm_lines.extend(code)
|
||||||
|
else:
|
||||||
|
for fname, code in functions.items():
|
||||||
|
asm_lines.extend(code)
|
||||||
|
return asm_lines
|
||||||
|
|
||||||
|
# Example usage.
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
compiler = Compiler()
|
sample_c = """
|
||||||
preprocessed_lines = compiler.preprocess("main.c")
|
// sample C program with functions.
|
||||||
c_code = "\n".join(preprocessed_lines)
|
int main() {
|
||||||
asm_code = compiler.compile_c(c_code)
|
int x = 5;
|
||||||
|
int y = 10;
|
||||||
|
x = x + y;
|
||||||
|
foo();
|
||||||
|
|
||||||
with open("test.asm", "w") as out:
|
}
|
||||||
for line in asm_code:
|
|
||||||
out.write(line + "\n")
|
int foo() {
|
||||||
|
int a = 3;
|
||||||
|
int b = 7;
|
||||||
|
a = a + b;
|
||||||
|
return a;
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
asm_output = compile_c_to_asm(sample_c)
|
||||||
|
for inst in asm_output:
|
||||||
|
print(inst)
|
||||||
|
@ -1 +1 @@
|
|||||||
program = [1,0,87,5,0,1004,1,0,101,5,0,1005,1,0,108,5,0,1006,1,0,99,5,0,1007,1,0,111,5,0,1008,1,0,109,5,0,1009,1,0,101,5,0,1010,1,0,33,5,0,1011,1,0,10,5,0,1012,1,0,0,5,0,1013,1,0,1004,5,0,1024,1,3,0,6,1,1024,1,2,1,21,0,1,9,0,3,91,10,0,0,3,1,2,17,75,0,10,255,0]
|
program = [1,0,5,1,1,10,3,0,1,13,15,0,14,0,0,1,0,3,1,1,7,3,0,1,14,0,0]
|
2
std.h
2
std.h
@ -1,6 +1,6 @@
|
|||||||
#define true 1
|
#define true 1
|
||||||
#define false 0
|
#define false 0
|
||||||
#define TEST 256
|
|
||||||
|
|
||||||
struct vec2 {
|
struct vec2 {
|
||||||
int x;
|
int x;
|
||||||
|
43
test.asm
43
test.asm
@ -1,34 +1,11 @@
|
|||||||
main:
|
main:
|
||||||
ldw a, 87
|
ldw a, 5
|
||||||
str a, 0x3EC
|
ldw b, 10
|
||||||
ldw a, 101
|
add a, b
|
||||||
str a, 0x3ED
|
jsr foo
|
||||||
ldw a, 108
|
ret
|
||||||
str a, 0x3EE
|
foo:
|
||||||
ldw a, 99
|
ldw a, 3
|
||||||
str a, 0x3EF
|
ldw b, 7
|
||||||
ldw a, 111
|
add a, b
|
||||||
str a, 0x3F0
|
retZ
|
||||||
ldw a, 109
|
|
||||||
str a, 0x3F1
|
|
||||||
ldw a, 101
|
|
||||||
str a, 0x3F2
|
|
||||||
ldw a, 33
|
|
||||||
str a, 0x3F3
|
|
||||||
ldw a, 10
|
|
||||||
str a, 0x3F4
|
|
||||||
ldw a, 0
|
|
||||||
str a, 0x3F5
|
|
||||||
ldw a, 0x3EC
|
|
||||||
str a, 0x400
|
|
||||||
ldw d, 0
|
|
||||||
ldr b, 0x400
|
|
||||||
ldw c, 1
|
|
||||||
string_loop:
|
|
||||||
ldb a, b
|
|
||||||
beq a, d, string_end
|
|
||||||
int 0x00
|
|
||||||
add b, c
|
|
||||||
jmp string_loop
|
|
||||||
string_end:
|
|
||||||
int 0xFF
|
|
548
tests/c-test02.py
Normal file
548
tests/c-test02.py
Normal file
@ -0,0 +1,548 @@
|
|||||||
|
import re
|
||||||
|
import os
|
||||||
|
from termcolor import colored
|
||||||
|
|
||||||
|
|
||||||
|
class Variable:
|
||||||
|
def __init__(self, name, address, var_type="int"):
|
||||||
|
self.name = name
|
||||||
|
self.address = address
|
||||||
|
self.type = var_type
|
||||||
|
|
||||||
|
class Compiler:
|
||||||
|
def __init__(self):
|
||||||
|
# 1024 bytes total: 0x000 to 0x3FF
|
||||||
|
self.data_ptr = 0x400
|
||||||
|
self.variables = {}
|
||||||
|
self.struct_definitions = {}
|
||||||
|
self.in_struct_def = False
|
||||||
|
self.current_struct_name = None
|
||||||
|
self.current_struct_fields = []
|
||||||
|
self.defines = {} # For #define macros
|
||||||
|
self.typedefs = {} # For typedef
|
||||||
|
self.label_counter = 0
|
||||||
|
self.block_stack = [] # For if/while blocks
|
||||||
|
|
||||||
|
def new_label(self, prefix):
|
||||||
|
lbl = f"{prefix}{self.label_counter}"
|
||||||
|
self.label_counter += 1
|
||||||
|
return lbl
|
||||||
|
|
||||||
|
def preprocess(self, filename):
|
||||||
|
lines = self._read_file_recursive(filename)
|
||||||
|
processed_lines = self._apply_defines(lines)
|
||||||
|
return processed_lines
|
||||||
|
|
||||||
|
def _read_file_recursive(self, filename, included_files=None):
|
||||||
|
if included_files is None:
|
||||||
|
included_files = set()
|
||||||
|
|
||||||
|
if filename in included_files:
|
||||||
|
# Prevent infinite recursion on includes
|
||||||
|
return []
|
||||||
|
|
||||||
|
included_files.add(filename)
|
||||||
|
|
||||||
|
result_lines = []
|
||||||
|
try:
|
||||||
|
with open(filename, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
line_stripped = line.strip()
|
||||||
|
|
||||||
|
# #include "file"
|
||||||
|
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
|
||||||
|
if inc_match:
|
||||||
|
inc_file = inc_match.group(1)
|
||||||
|
included_content = self._read_file_recursive(inc_file, included_files)
|
||||||
|
result_lines.extend(included_content)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# #define KEY VALUE
|
||||||
|
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
|
||||||
|
if def_match:
|
||||||
|
key = def_match.group(1)
|
||||||
|
value = def_match.group(2)
|
||||||
|
self.defines[key] = value
|
||||||
|
continue
|
||||||
|
|
||||||
|
# typedef oldtype newtype;
|
||||||
|
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
|
||||||
|
if tmatch:
|
||||||
|
oldt = tmatch.group(1)
|
||||||
|
newt = tmatch.group(2)
|
||||||
|
# Resolve oldt if it's also a typedef
|
||||||
|
oldt = self.apply_typedef(oldt)
|
||||||
|
self.typedefs[newt] = oldt
|
||||||
|
continue
|
||||||
|
|
||||||
|
result_lines.append(line)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
print(colored(f"{filename}:0: error: {e}", "red"))
|
||||||
|
|
||||||
|
|
||||||
|
return result_lines
|
||||||
|
|
||||||
|
def _apply_defines(self, lines):
|
||||||
|
|
||||||
|
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
|
||||||
|
|
||||||
|
processed = []
|
||||||
|
for line in lines:
|
||||||
|
parts = token_pattern.split(line)
|
||||||
|
# parts: tokens and separators
|
||||||
|
for i, part in enumerate(parts):
|
||||||
|
if part in self.defines:
|
||||||
|
print(f"Replaced {part} with {self.defines[part]}")
|
||||||
|
part = self.defines[part]
|
||||||
|
parts[i] = part
|
||||||
|
new_line = "".join(parts)
|
||||||
|
processed.append(new_line)
|
||||||
|
return processed
|
||||||
|
|
||||||
|
def apply_typedef(self, t):
|
||||||
|
if t in self.typedefs:
|
||||||
|
return self.typedefs[t]
|
||||||
|
return t
|
||||||
|
|
||||||
|
def allocate_bytes(self, count):
|
||||||
|
start_addr = self.data_ptr - (count - 1)
|
||||||
|
if start_addr < 0x000:
|
||||||
|
raise Exception("Out of memory!")
|
||||||
|
self.data_ptr = start_addr - 1
|
||||||
|
return start_addr
|
||||||
|
|
||||||
|
def allocate_var(self, name, var_type="int"):
|
||||||
|
var_type = self.apply_typedef(var_type)
|
||||||
|
if name in self.variables:
|
||||||
|
return self.variables[name]
|
||||||
|
|
||||||
|
if var_type.startswith("struct:"):
|
||||||
|
sname = var_type.split(":")[1]
|
||||||
|
fields = self.struct_definitions[sname]
|
||||||
|
length = len(fields) # each 1 byte
|
||||||
|
start_addr = self.allocate_bytes(length)
|
||||||
|
var = Variable(name, start_addr, var_type)
|
||||||
|
self.variables[name] = var
|
||||||
|
return var
|
||||||
|
else:
|
||||||
|
start_addr = self.allocate_bytes(1)
|
||||||
|
var = Variable(name, start_addr, var_type)
|
||||||
|
self.variables[name] = var
|
||||||
|
return var
|
||||||
|
|
||||||
|
def allocate_array(self, name, length, var_type="int"):
|
||||||
|
var_type = self.apply_typedef(var_type)
|
||||||
|
arr_start = self.allocate_bytes(length)
|
||||||
|
var_addr = self.allocate_bytes(1)
|
||||||
|
var = Variable(name, var_addr, "array")
|
||||||
|
self.variables[name] = var
|
||||||
|
return var, arr_start
|
||||||
|
|
||||||
|
def store_string(self, string_value):
|
||||||
|
string_value = string_value.replace('\\n', '\n')
|
||||||
|
length = len(string_value) + 1
|
||||||
|
start_addr = self.allocate_bytes(length)
|
||||||
|
asm = []
|
||||||
|
current_addr = start_addr
|
||||||
|
for ch in string_value:
|
||||||
|
ascii_val = ord(ch)
|
||||||
|
asm.append(f"ldw a, {ascii_val}")
|
||||||
|
asm.append(f"str a, 0x{current_addr:X}")
|
||||||
|
current_addr += 1
|
||||||
|
asm.append("ldw a, 0")
|
||||||
|
asm.append(f"str a, 0x{current_addr:X}")
|
||||||
|
return asm, start_addr
|
||||||
|
|
||||||
|
def get_struct_field_offset(self, struct_type, field_name):
|
||||||
|
sname = struct_type.split(":")[1]
|
||||||
|
fields = self.struct_definitions[sname]
|
||||||
|
for i, (fname, ftype) in enumerate(fields):
|
||||||
|
if fname == field_name:
|
||||||
|
return i
|
||||||
|
raise Exception(f"Field {field_name} not found in {struct_type}")
|
||||||
|
|
||||||
|
def parse_condition(self, cond_str):
|
||||||
|
# cond_str like "a == b" or "a != b"
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
|
||||||
|
if not m:
|
||||||
|
raise Exception("Unsupported condition: " + cond_str)
|
||||||
|
var1, op, var2 = m.groups()
|
||||||
|
return var1, op, var2
|
||||||
|
|
||||||
|
def compile_condition(self, var1, op, var2):
|
||||||
|
asm = []
|
||||||
|
v1 = self.allocate_var(var1)
|
||||||
|
v2 = self.allocate_var(var2)
|
||||||
|
asm.append(f"ldr a, 0x{v1.address:X}")
|
||||||
|
asm.append(f"ldr b, 0x{v2.address:X}")
|
||||||
|
# a = a - b
|
||||||
|
asm.append("sub a, b")
|
||||||
|
return asm, op
|
||||||
|
|
||||||
|
def extract_comment(self, line):
|
||||||
|
comment_index = line.find('//')
|
||||||
|
if comment_index != -1:
|
||||||
|
code_part = line[:comment_index]
|
||||||
|
comment_part = line[comment_index+2:].strip()
|
||||||
|
return code_part, comment_part
|
||||||
|
return line, None
|
||||||
|
|
||||||
|
def compile_line(self, code_part):
|
||||||
|
line = code_part.strip()
|
||||||
|
asm = []
|
||||||
|
|
||||||
|
if self.in_struct_def:
|
||||||
|
if line.startswith("};"):
|
||||||
|
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
|
||||||
|
self.in_struct_def = False
|
||||||
|
self.current_struct_name = None
|
||||||
|
self.current_struct_fields = []
|
||||||
|
return asm
|
||||||
|
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
|
||||||
|
if mfield:
|
||||||
|
fname = mfield.group(1)
|
||||||
|
ftype = "int"
|
||||||
|
self.current_struct_fields.append((fname, ftype))
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# struct definition start
|
||||||
|
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
|
||||||
|
if msd:
|
||||||
|
self.in_struct_def = True
|
||||||
|
self.current_struct_name = msd.group(1)
|
||||||
|
self.current_struct_fields = []
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# struct var declaration
|
||||||
|
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
|
||||||
|
if msv:
|
||||||
|
sname, varname = msv.groups()
|
||||||
|
var_type = "struct:" + sname
|
||||||
|
self.allocate_var(varname, var_type)
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# if statement
|
||||||
|
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
|
||||||
|
if mif:
|
||||||
|
cond_str = mif.group(1)
|
||||||
|
var1, op, var2 = self.parse_condition(cond_str)
|
||||||
|
end_label = self.new_label("endif")
|
||||||
|
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||||
|
asm.extend(cond_code)
|
||||||
|
# if '==': jump if not zero a != 0
|
||||||
|
# if '!=': jump if zero a == 0
|
||||||
|
if cmp_op == '==':
|
||||||
|
asm.append("bne a, 0, " + end_label)
|
||||||
|
else:
|
||||||
|
asm.append("beq a, 0, " + end_label)
|
||||||
|
self.block_stack.append(('if', end_label))
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# while statement
|
||||||
|
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
|
||||||
|
if mwhile:
|
||||||
|
cond_str = mwhile.group(1)
|
||||||
|
var1, op, var2 = self.parse_condition(cond_str)
|
||||||
|
start_label = self.new_label("whilestart")
|
||||||
|
end_label = self.new_label("whileend")
|
||||||
|
asm.append(start_label + ":")
|
||||||
|
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||||
|
asm.extend(cond_code)
|
||||||
|
if cmp_op == '==':
|
||||||
|
asm.append("bne a, 0, " + end_label)
|
||||||
|
else:
|
||||||
|
asm.append("beq a, 0, " + end_label)
|
||||||
|
self.block_stack.append(('while', start_label, end_label))
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# end of block
|
||||||
|
if line == "}":
|
||||||
|
if not self.block_stack:
|
||||||
|
return asm
|
||||||
|
blk = self.block_stack.pop()
|
||||||
|
if blk[0] == 'if':
|
||||||
|
end_label = blk[1]
|
||||||
|
asm.append(end_label + ":")
|
||||||
|
elif blk[0] == 'while':
|
||||||
|
start_label = blk[1]
|
||||||
|
end_label = blk[2]
|
||||||
|
# jump back to start
|
||||||
|
asm.append(f"jmp {start_label}")
|
||||||
|
asm.append(end_label + ":")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# p.x = number;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, fieldname, value = m.groups()
|
||||||
|
value = int(value)
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
if offset != 0:
|
||||||
|
asm.append(f"ldw b, {offset}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append(f"ldw c, {value}")
|
||||||
|
asm.append("stb c, a")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# p.x = var + number;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, fieldname, srcvar, number = m.groups()
|
||||||
|
number = int(number)
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
if offset != 0:
|
||||||
|
asm.append(f"ldw b, {offset}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
v2 = self.allocate_var(srcvar)
|
||||||
|
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||||
|
asm.append(f"ldw d, {number}")
|
||||||
|
asm.append("add c, d")
|
||||||
|
asm.append("stb c, a")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# p.x = srcvar;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, fieldname, srcvar = m.groups()
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
if offset != 0:
|
||||||
|
asm.append(f"ldw b, {offset}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
v2 = self.allocate_var(srcvar)
|
||||||
|
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||||
|
asm.append("stb c, a")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# x = p.x;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
|
||||||
|
if m:
|
||||||
|
dst, varname, fieldname = m.groups()
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||||
|
vd = self.allocate_var(dst)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
if offset != 0:
|
||||||
|
asm.append(f"ldw b, {offset}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append("ldb c, a")
|
||||||
|
asm.append(f"str c, 0x{vd.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# print_int(p.x);
|
||||||
|
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, fieldname = m.groups()
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
if offset != 0:
|
||||||
|
asm.append(f"ldw b, {offset}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append("ldb a, a")
|
||||||
|
asm.append("int 0x01")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# int arr[10];
|
||||||
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname = m.group(1)
|
||||||
|
length = int(m.group(2))
|
||||||
|
arr_var, start_addr = self.allocate_array(varname, length)
|
||||||
|
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||||
|
asm.append(f"str a, 0x{arr_var.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# int x = number;
|
||||||
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname = m.group(1)
|
||||||
|
value = int(m.group(2))
|
||||||
|
var = self.allocate_var(varname, "int")
|
||||||
|
asm.append(f"ldw a, {value}")
|
||||||
|
asm.append(f"str a, 0x{var.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# int y = x + number;
|
||||||
|
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, var2, number = m.groups()
|
||||||
|
number = int(number)
|
||||||
|
v1 = self.allocate_var(varname, "int")
|
||||||
|
v2 = self.allocate_var(var2, "int")
|
||||||
|
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||||
|
asm.append(f"ldw b, {number}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append(f"str a, 0x{v1.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# char *msg = "Hello\n";
|
||||||
|
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, string_val = m.groups()
|
||||||
|
v = self.allocate_var(varname, "char*")
|
||||||
|
code, start_addr = self.store_string(string_val)
|
||||||
|
asm.extend(code)
|
||||||
|
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||||
|
asm.append(f"str a, 0x{v.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# var = number;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, value = m.groups()
|
||||||
|
value = int(value)
|
||||||
|
v = self.allocate_var(varname, "int")
|
||||||
|
asm.append(f"ldw a, {value}")
|
||||||
|
asm.append(f"str a, 0x{v.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# var = var2 + number;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, var2, number = m.groups()
|
||||||
|
number = int(number)
|
||||||
|
v1 = self.allocate_var(varname, "int")
|
||||||
|
v2 = self.allocate_var(var2, "int")
|
||||||
|
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||||
|
asm.append(f"ldw b, {number}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append(f"str a, 0x{v1.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# var[index] = number;
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
arr, index, value = m.groups()
|
||||||
|
index = int(index)
|
||||||
|
value = int(value)
|
||||||
|
arr_var = self.allocate_var(arr)
|
||||||
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||||
|
asm.append(f"ldw b, {index}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append(f"ldw c, {value}")
|
||||||
|
asm.append("stb c, a")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# x = arr[index];
|
||||||
|
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname, arr, index = m.groups()
|
||||||
|
index = int(index)
|
||||||
|
v = self.allocate_var(varname, "int")
|
||||||
|
arr_var = self.allocate_var(arr)
|
||||||
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||||
|
asm.append(f"ldw b, {index}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append("ldb d, a")
|
||||||
|
asm.append(f"str d, 0x{v.address:X}")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# print_char(var);
|
||||||
|
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname = m.group(1)
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
asm.append("int 0x00")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# print_char(arr[index]);
|
||||||
|
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
|
||||||
|
if m:
|
||||||
|
arr, index = m.groups()
|
||||||
|
index = int(index)
|
||||||
|
arr_var = self.allocate_var(arr)
|
||||||
|
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||||
|
asm.append(f"ldw b, {index}")
|
||||||
|
asm.append("add a, b")
|
||||||
|
asm.append("ldb a, a")
|
||||||
|
asm.append("int 0x00")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# print_int(var);
|
||||||
|
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname = m.group(1)
|
||||||
|
v = self.allocate_var(varname)
|
||||||
|
asm.append(f"ldr a, 0x{v.address:X}")
|
||||||
|
asm.append("int 0x01")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# print_string(var);
|
||||||
|
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||||
|
if m:
|
||||||
|
varname = m.group(1)
|
||||||
|
v = self.allocate_var(varname, "char*")
|
||||||
|
asm.append("ldw d, 0")
|
||||||
|
asm.append(f"ldr b, 0x{v.address:X}")
|
||||||
|
asm.append("ldw c, 1")
|
||||||
|
asm.append("string_loop:")
|
||||||
|
asm.append("ldb a, b")
|
||||||
|
asm.append("beq a, d, string_end")
|
||||||
|
asm.append("int 0x00")
|
||||||
|
asm.append("add b, c")
|
||||||
|
asm.append("jmp string_loop")
|
||||||
|
asm.append("string_end:")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# return number;
|
||||||
|
m = re.match(r'return\s+(\d+)\s*;', line)
|
||||||
|
if m:
|
||||||
|
asm.append("int 0xFF")
|
||||||
|
return asm
|
||||||
|
|
||||||
|
# Unrecognized line or empty
|
||||||
|
return asm
|
||||||
|
|
||||||
|
def compile_c(self, c_code):
|
||||||
|
# First, parse everything to detect structs and typedef done in preprocess
|
||||||
|
all_lines = c_code.split('\n')
|
||||||
|
# struct definitions might appear outside main
|
||||||
|
for cline in all_lines:
|
||||||
|
self.compile_line(cline)
|
||||||
|
|
||||||
|
# Extract lines inside main
|
||||||
|
lines = []
|
||||||
|
in_main = False
|
||||||
|
for cline in all_lines:
|
||||||
|
cline = cline.rstrip()
|
||||||
|
if 'int main(' in cline:
|
||||||
|
in_main = True
|
||||||
|
continue
|
||||||
|
if in_main:
|
||||||
|
if cline.startswith('}'):
|
||||||
|
in_main = False
|
||||||
|
break
|
||||||
|
lines.append(cline)
|
||||||
|
|
||||||
|
asm = ["main:"]
|
||||||
|
for line in lines:
|
||||||
|
code_part, comment_part = self.extract_comment(line)
|
||||||
|
instructions = self.compile_line(code_part)
|
||||||
|
if instructions:
|
||||||
|
for i, instr in enumerate(instructions):
|
||||||
|
if i == 0 and comment_part:
|
||||||
|
asm.append(f" {instr} ; {comment_part}")
|
||||||
|
else:
|
||||||
|
asm.append(f" {instr}")
|
||||||
|
else:
|
||||||
|
if comment_part:
|
||||||
|
asm.append(f" ; {comment_part}")
|
||||||
|
|
||||||
|
return asm
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
compiler = Compiler()
|
||||||
|
preprocessed_lines = compiler.preprocess("main.c")
|
||||||
|
c_code = "\n".join(preprocessed_lines)
|
||||||
|
asm_code = compiler.compile_c(c_code)
|
||||||
|
|
||||||
|
with open("test.asm", "w") as out:
|
||||||
|
for line in asm_code:
|
||||||
|
out.write(line + "\n")
|
Loading…
Reference in New Issue
Block a user