did nothing...
This commit is contained in:
parent
b88b8bffc9
commit
a86ffb54db
BIN
__pycache__/program.cpython-311.pyc
Normal file
BIN
__pycache__/program.cpython-311.pyc
Normal file
Binary file not shown.
753
c-to-asm.py
753
c-to-asm.py
@ -1,548 +1,241 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
import os
|
||||
from termcolor import colored
|
||||
|
||||
# Valid registers and instructions.
|
||||
valid_registers = {"a", "b", "c", "d", "e", "f"}
|
||||
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
|
||||
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
|
||||
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
|
||||
|
||||
class Variable:
|
||||
def __init__(self, name, address, var_type="int"):
|
||||
self.name = name
|
||||
self.address = address
|
||||
self.type = var_type
|
||||
# Fixed pool of registers.
|
||||
register_pool = ["a", "b", "c", "d", "e", "f"]
|
||||
|
||||
class Compiler:
|
||||
def __init__(self):
|
||||
# 1024 bytes total: 0x000 to 0x3FF
|
||||
self.data_ptr = 0x400
|
||||
self.variables = {}
|
||||
self.struct_definitions = {}
|
||||
self.in_struct_def = False
|
||||
self.current_struct_name = None
|
||||
self.current_struct_fields = []
|
||||
self.defines = {} # For #define macros
|
||||
self.typedefs = {} # For typedef
|
||||
self.label_counter = 0
|
||||
self.block_stack = [] # For if/while blocks
|
||||
def allocate_register(var_name, context):
|
||||
"""Allocate a register for a variable in the given function context."""
|
||||
var_to_reg = context['var_to_reg']
|
||||
for reg in register_pool:
|
||||
if reg not in var_to_reg.values():
|
||||
var_to_reg[var_name] = reg
|
||||
return reg
|
||||
raise Exception("Out of registers!")
|
||||
|
||||
def new_label(self, prefix):
|
||||
lbl = f"{prefix}{self.label_counter}"
|
||||
self.label_counter += 1
|
||||
return lbl
|
||||
|
||||
def preprocess(self, filename):
|
||||
lines = self._read_file_recursive(filename)
|
||||
processed_lines = self._apply_defines(lines)
|
||||
return processed_lines
|
||||
|
||||
def _read_file_recursive(self, filename, included_files=None):
|
||||
if included_files is None:
|
||||
included_files = set()
|
||||
|
||||
if filename in included_files:
|
||||
# Prevent infinite recursion on includes
|
||||
return []
|
||||
|
||||
included_files.add(filename)
|
||||
|
||||
result_lines = []
|
||||
try:
|
||||
with open(filename, "r") as f:
|
||||
for line in f:
|
||||
line_stripped = line.strip()
|
||||
def compile_expr(expr, dest, temp, context):
|
||||
"""
|
||||
Compile a simple expression (literals, variables, +, -) into assembly.
|
||||
|
||||
# #include "file"
|
||||
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
|
||||
if inc_match:
|
||||
inc_file = inc_match.group(1)
|
||||
included_content = self._read_file_recursive(inc_file, included_files)
|
||||
result_lines.extend(included_content)
|
||||
continue
|
||||
|
||||
# #define KEY VALUE
|
||||
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
|
||||
if def_match:
|
||||
key = def_match.group(1)
|
||||
value = def_match.group(2)
|
||||
self.defines[key] = value
|
||||
continue
|
||||
|
||||
# typedef oldtype newtype;
|
||||
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
|
||||
if tmatch:
|
||||
oldt = tmatch.group(1)
|
||||
newt = tmatch.group(2)
|
||||
# Resolve oldt if it's also a typedef
|
||||
oldt = self.apply_typedef(oldt)
|
||||
self.typedefs[newt] = oldt
|
||||
continue
|
||||
|
||||
result_lines.append(line)
|
||||
except FileNotFoundError as e:
|
||||
print(colored(f"{filename}:0: error: {e}", "red"))
|
||||
|
||||
Parameters:
|
||||
expr: string expression (e.g., "5", "x", "x + 3")
|
||||
dest: destination register for the result.
|
||||
temp: temporary register.
|
||||
context: dictionary with function context (like var_to_reg).
|
||||
"""
|
||||
var_to_reg = context['var_to_reg']
|
||||
instructions = []
|
||||
tokens = re.split(r'(\+|\-)', expr)
|
||||
tokens = [t.strip() for t in tokens if t.strip() != '']
|
||||
if not tokens:
|
||||
return instructions
|
||||
|
||||
return result_lines
|
||||
|
||||
def _apply_defines(self, lines):
|
||||
|
||||
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
|
||||
|
||||
processed = []
|
||||
for line in lines:
|
||||
parts = token_pattern.split(line)
|
||||
# parts: tokens and separators
|
||||
for i, part in enumerate(parts):
|
||||
if part in self.defines:
|
||||
print(f"Replaced {part} with {self.defines[part]}")
|
||||
part = self.defines[part]
|
||||
parts[i] = part
|
||||
new_line = "".join(parts)
|
||||
processed.append(new_line)
|
||||
return processed
|
||||
|
||||
def apply_typedef(self, t):
|
||||
if t in self.typedefs:
|
||||
return self.typedefs[t]
|
||||
return t
|
||||
|
||||
def allocate_bytes(self, count):
|
||||
start_addr = self.data_ptr - (count - 1)
|
||||
if start_addr < 0x000:
|
||||
raise Exception("Out of memory!")
|
||||
self.data_ptr = start_addr - 1
|
||||
return start_addr
|
||||
|
||||
def allocate_var(self, name, var_type="int"):
|
||||
var_type = self.apply_typedef(var_type)
|
||||
if name in self.variables:
|
||||
return self.variables[name]
|
||||
|
||||
if var_type.startswith("struct:"):
|
||||
sname = var_type.split(":")[1]
|
||||
fields = self.struct_definitions[sname]
|
||||
length = len(fields) # each 1 byte
|
||||
start_addr = self.allocate_bytes(length)
|
||||
var = Variable(name, start_addr, var_type)
|
||||
self.variables[name] = var
|
||||
return var
|
||||
# Process first term.
|
||||
token = tokens[0]
|
||||
if token.isdigit():
|
||||
instructions.append(f"ldw {dest}, {token}")
|
||||
else:
|
||||
if token not in var_to_reg:
|
||||
raise Exception(f"Variable '{token}' not declared")
|
||||
src_reg = var_to_reg[token]
|
||||
if src_reg != dest:
|
||||
instructions.append(f"mov {dest}, {src_reg}")
|
||||
i = 1
|
||||
while i < len(tokens):
|
||||
op = tokens[i]
|
||||
operand = tokens[i+1]
|
||||
if operand.isdigit():
|
||||
instructions.append(f"ldw {temp}, {operand}")
|
||||
if op == "+":
|
||||
instructions.append(f"add {dest}, {temp}")
|
||||
elif op == "-":
|
||||
instructions.append(f"sub {dest}, {temp}")
|
||||
else:
|
||||
raise Exception(f"Unsupported operator '{op}'")
|
||||
else:
|
||||
start_addr = self.allocate_bytes(1)
|
||||
var = Variable(name, start_addr, var_type)
|
||||
self.variables[name] = var
|
||||
return var
|
||||
|
||||
def allocate_array(self, name, length, var_type="int"):
|
||||
var_type = self.apply_typedef(var_type)
|
||||
arr_start = self.allocate_bytes(length)
|
||||
var_addr = self.allocate_bytes(1)
|
||||
var = Variable(name, var_addr, "array")
|
||||
self.variables[name] = var
|
||||
return var, arr_start
|
||||
|
||||
def store_string(self, string_value):
|
||||
string_value = string_value.replace('\\n', '\n')
|
||||
length = len(string_value) + 1
|
||||
start_addr = self.allocate_bytes(length)
|
||||
asm = []
|
||||
current_addr = start_addr
|
||||
for ch in string_value:
|
||||
ascii_val = ord(ch)
|
||||
asm.append(f"ldw a, {ascii_val}")
|
||||
asm.append(f"str a, 0x{current_addr:X}")
|
||||
current_addr += 1
|
||||
asm.append("ldw a, 0")
|
||||
asm.append(f"str a, 0x{current_addr:X}")
|
||||
return asm, start_addr
|
||||
|
||||
def get_struct_field_offset(self, struct_type, field_name):
|
||||
sname = struct_type.split(":")[1]
|
||||
fields = self.struct_definitions[sname]
|
||||
for i, (fname, ftype) in enumerate(fields):
|
||||
if fname == field_name:
|
||||
return i
|
||||
raise Exception(f"Field {field_name} not found in {struct_type}")
|
||||
|
||||
def parse_condition(self, cond_str):
|
||||
# cond_str like "a == b" or "a != b"
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
|
||||
if not m:
|
||||
raise Exception("Unsupported condition: " + cond_str)
|
||||
var1, op, var2 = m.groups()
|
||||
return var1, op, var2
|
||||
|
||||
def compile_condition(self, var1, op, var2):
|
||||
asm = []
|
||||
v1 = self.allocate_var(var1)
|
||||
v2 = self.allocate_var(var2)
|
||||
asm.append(f"ldr a, 0x{v1.address:X}")
|
||||
asm.append(f"ldr b, 0x{v2.address:X}")
|
||||
# a = a - b
|
||||
asm.append("sub a, b")
|
||||
return asm, op
|
||||
|
||||
def extract_comment(self, line):
|
||||
comment_index = line.find('//')
|
||||
if comment_index != -1:
|
||||
code_part = line[:comment_index]
|
||||
comment_part = line[comment_index+2:].strip()
|
||||
return code_part, comment_part
|
||||
return line, None
|
||||
|
||||
def compile_line(self, code_part):
|
||||
line = code_part.strip()
|
||||
asm = []
|
||||
|
||||
if self.in_struct_def:
|
||||
if line.startswith("};"):
|
||||
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
|
||||
self.in_struct_def = False
|
||||
self.current_struct_name = None
|
||||
self.current_struct_fields = []
|
||||
return asm
|
||||
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
|
||||
if mfield:
|
||||
fname = mfield.group(1)
|
||||
ftype = "int"
|
||||
self.current_struct_fields.append((fname, ftype))
|
||||
return asm
|
||||
|
||||
# struct definition start
|
||||
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
|
||||
if msd:
|
||||
self.in_struct_def = True
|
||||
self.current_struct_name = msd.group(1)
|
||||
self.current_struct_fields = []
|
||||
return asm
|
||||
|
||||
# struct var declaration
|
||||
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
|
||||
if msv:
|
||||
sname, varname = msv.groups()
|
||||
var_type = "struct:" + sname
|
||||
self.allocate_var(varname, var_type)
|
||||
return asm
|
||||
|
||||
# if statement
|
||||
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
|
||||
if mif:
|
||||
cond_str = mif.group(1)
|
||||
var1, op, var2 = self.parse_condition(cond_str)
|
||||
end_label = self.new_label("endif")
|
||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||
asm.extend(cond_code)
|
||||
# if '==': jump if not zero a != 0
|
||||
# if '!=': jump if zero a == 0
|
||||
if cmp_op == '==':
|
||||
asm.append("bne a, 0, " + end_label)
|
||||
if operand not in var_to_reg:
|
||||
raise Exception(f"Variable '{operand}' not declared")
|
||||
operand_reg = var_to_reg[operand]
|
||||
if op == "+":
|
||||
instructions.append(f"add {dest}, {operand_reg}")
|
||||
elif op == "-":
|
||||
instructions.append(f"sub {dest}, {operand_reg}")
|
||||
else:
|
||||
asm.append("beq a, 0, " + end_label)
|
||||
self.block_stack.append(('if', end_label))
|
||||
return asm
|
||||
raise Exception(f"Unsupported operator '{op}'")
|
||||
i += 2
|
||||
return instructions
|
||||
|
||||
# while statement
|
||||
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
|
||||
if mwhile:
|
||||
cond_str = mwhile.group(1)
|
||||
var1, op, var2 = self.parse_condition(cond_str)
|
||||
start_label = self.new_label("whilestart")
|
||||
end_label = self.new_label("whileend")
|
||||
asm.append(start_label + ":")
|
||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||
asm.extend(cond_code)
|
||||
if cmp_op == '==':
|
||||
asm.append("bne a, 0, " + end_label)
|
||||
else:
|
||||
asm.append("beq a, 0, " + end_label)
|
||||
self.block_stack.append(('while', start_label, end_label))
|
||||
return asm
|
||||
def compile_statement(line, context):
|
||||
"""
|
||||
Compile a single statement from our limited C language.
|
||||
Supports:
|
||||
- Variable declaration: e.g., "int x = 5;"
|
||||
- Assignment: e.g., "x = x + 2;"
|
||||
- Function call: e.g., "foo();"
|
||||
- Return statement: e.g., "return x;"
|
||||
"""
|
||||
var_to_reg = context['var_to_reg']
|
||||
instructions = []
|
||||
line = line.strip().rstrip(';')
|
||||
if not line:
|
||||
return instructions
|
||||
|
||||
# end of block
|
||||
if line == "}":
|
||||
if not self.block_stack:
|
||||
return asm
|
||||
blk = self.block_stack.pop()
|
||||
if blk[0] == 'if':
|
||||
end_label = blk[1]
|
||||
asm.append(end_label + ":")
|
||||
elif blk[0] == 'while':
|
||||
start_label = blk[1]
|
||||
end_label = blk[2]
|
||||
# jump back to start
|
||||
asm.append(f"jmp {start_label}")
|
||||
asm.append(end_label + ":")
|
||||
return asm
|
||||
# Function call statement pattern: identifier followed by "()"
|
||||
m = re.match(r'^(\w+)\s*\(\s*\)\s*$', line)
|
||||
if m:
|
||||
func_name = m.group(1)
|
||||
instructions.append(f"jsr {func_name}")
|
||||
return instructions
|
||||
|
||||
# p.x = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
# Variable declaration.
|
||||
if line.startswith("int "):
|
||||
line = line[4:].strip() # Remove "int "
|
||||
parts = line.split("=", 1)
|
||||
if len(parts) != 2:
|
||||
raise Exception("Invalid declaration syntax.")
|
||||
var_name = parts[0].strip()
|
||||
expr = parts[1].strip()
|
||||
reg = allocate_register(var_name, context)
|
||||
# Choose a temporary register different from the destination.
|
||||
temp = next((r for r in register_pool if r != reg and r not in var_to_reg.values()), None)
|
||||
if temp is None:
|
||||
temp = next((r for r in register_pool if r != reg), None)
|
||||
instructions.extend(compile_expr(expr, reg, temp, context))
|
||||
return instructions
|
||||
|
||||
# Return statement.
|
||||
if line.startswith("return"):
|
||||
ret_expr = line[6:].strip() # Remove "return"
|
||||
if ret_expr:
|
||||
# Convention: return value in register a.
|
||||
temp = next((r for r in register_pool if r != "a" and r not in var_to_reg.values()), None)
|
||||
if temp is None:
|
||||
temp = next((r for r in register_pool if r != "a"), None)
|
||||
instructions.extend(compile_expr(ret_expr, "a", temp, context))
|
||||
instructions.append("ret")
|
||||
return instructions
|
||||
|
||||
# Assignment statement.
|
||||
if "=" in line:
|
||||
parts = line.split("=", 1)
|
||||
var_name = parts[0].strip()
|
||||
expr = parts[1].strip()
|
||||
if var_name not in var_to_reg:
|
||||
raise Exception(f"Variable '{var_name}' not declared")
|
||||
dest = var_to_reg[var_name]
|
||||
temp = next((r for r in register_pool if r != dest and r not in var_to_reg.values()), None)
|
||||
if temp is None:
|
||||
temp = next((r for r in register_pool if r != dest), None)
|
||||
instructions.extend(compile_expr(expr, dest, temp, context))
|
||||
return instructions
|
||||
|
||||
raise Exception(f"Unrecognized statement: {line}")
|
||||
|
||||
def compile_function(func_name, lines):
|
||||
"""
|
||||
Compile a function given its name and body (as a list of lines).
|
||||
Returns the assembly instructions for the function.
|
||||
"""
|
||||
# Create a fresh context for the function.
|
||||
context = {"var_to_reg": {}}
|
||||
instructions = []
|
||||
# Function label.
|
||||
instructions.append(f"{func_name}:")
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line or line.startswith("//"):
|
||||
continue
|
||||
stmt_instructions = compile_statement(line, context)
|
||||
instructions.extend(stmt_instructions)
|
||||
return instructions
|
||||
|
||||
def compile_c_to_asm(c_code):
|
||||
"""
|
||||
Compile a simple C program (with functions) into assembly.
|
||||
The program must contain functions defined as:
|
||||
|
||||
int func_name() {
|
||||
// statements
|
||||
}
|
||||
|
||||
The compiled output will start at the main function (if defined).
|
||||
"""
|
||||
lines = c_code.splitlines()
|
||||
functions = {}
|
||||
current_func = None
|
||||
current_lines = []
|
||||
in_function = False
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("//"):
|
||||
continue
|
||||
|
||||
# Detect function start: "int funcName() {"
|
||||
m = re.match(r'^int\s+(\w+)\s*\(\s*\)\s*\{', stripped)
|
||||
if m:
|
||||
varname, fieldname, value = m.groups()
|
||||
value = int(value)
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"ldw c, {value}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
if in_function:
|
||||
raise Exception("Nested functions not supported.")
|
||||
current_func = m.group(1)
|
||||
in_function = True
|
||||
current_lines = []
|
||||
continue
|
||||
|
||||
# p.x = var + number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname, srcvar, number = m.groups()
|
||||
number = int(number)
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
v2 = self.allocate_var(srcvar)
|
||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||
asm.append(f"ldw d, {number}")
|
||||
asm.append("add c, d")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
# Detect end of function: "}"
|
||||
if stripped == "}":
|
||||
if not in_function:
|
||||
raise Exception("Unexpected '}'")
|
||||
functions[current_func] = compile_function(current_func, current_lines)
|
||||
in_function = False
|
||||
current_func = None
|
||||
current_lines = []
|
||||
continue
|
||||
|
||||
# p.x = srcvar;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname, srcvar = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
v2 = self.allocate_var(srcvar)
|
||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
# Inside a function, add the line.
|
||||
if in_function:
|
||||
current_lines.append(stripped)
|
||||
else:
|
||||
# Outside any function; for simplicity, ignore global declarations.
|
||||
continue
|
||||
|
||||
# x = p.x;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
|
||||
if m:
|
||||
dst, varname, fieldname = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
vd = self.allocate_var(dst)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb c, a")
|
||||
asm.append(f"str c, 0x{vd.address:X}")
|
||||
return asm
|
||||
|
||||
# print_int(p.x);
|
||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb a, a")
|
||||
asm.append("int 0x01")
|
||||
return asm
|
||||
|
||||
# int arr[10];
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
length = int(m.group(2))
|
||||
arr_var, start_addr = self.allocate_array(varname, length)
|
||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||
asm.append(f"str a, 0x{arr_var.address:X}")
|
||||
return asm
|
||||
|
||||
# int x = number;
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
value = int(m.group(2))
|
||||
var = self.allocate_var(varname, "int")
|
||||
asm.append(f"ldw a, {value}")
|
||||
asm.append(f"str a, 0x{var.address:X}")
|
||||
return asm
|
||||
|
||||
# int y = x + number;
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, var2, number = m.groups()
|
||||
number = int(number)
|
||||
v1 = self.allocate_var(varname, "int")
|
||||
v2 = self.allocate_var(var2, "int")
|
||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||
asm.append(f"ldw b, {number}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"str a, 0x{v1.address:X}")
|
||||
return asm
|
||||
|
||||
# char *msg = "Hello\n";
|
||||
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
|
||||
if m:
|
||||
varname, string_val = m.groups()
|
||||
v = self.allocate_var(varname, "char*")
|
||||
code, start_addr = self.store_string(string_val)
|
||||
asm.extend(code)
|
||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||
asm.append(f"str a, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# var = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, value = m.groups()
|
||||
value = int(value)
|
||||
v = self.allocate_var(varname, "int")
|
||||
asm.append(f"ldw a, {value}")
|
||||
asm.append(f"str a, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# var = var2 + number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, var2, number = m.groups()
|
||||
number = int(number)
|
||||
v1 = self.allocate_var(varname, "int")
|
||||
v2 = self.allocate_var(var2, "int")
|
||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||
asm.append(f"ldw b, {number}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"str a, 0x{v1.address:X}")
|
||||
return asm
|
||||
|
||||
# var[index] = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
arr, index, value = m.groups()
|
||||
index = int(index)
|
||||
value = int(value)
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"ldw c, {value}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
|
||||
# x = arr[index];
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||
if m:
|
||||
varname, arr, index = m.groups()
|
||||
index = int(index)
|
||||
v = self.allocate_var(varname, "int")
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb d, a")
|
||||
asm.append(f"str d, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# print_char(var);
|
||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
asm.append("int 0x00")
|
||||
return asm
|
||||
|
||||
# print_char(arr[index]);
|
||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
|
||||
if m:
|
||||
arr, index = m.groups()
|
||||
index = int(index)
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb a, a")
|
||||
asm.append("int 0x00")
|
||||
return asm
|
||||
|
||||
# print_int(var);
|
||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
asm.append("int 0x01")
|
||||
return asm
|
||||
|
||||
# print_string(var);
|
||||
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname, "char*")
|
||||
asm.append("ldw d, 0")
|
||||
asm.append(f"ldr b, 0x{v.address:X}")
|
||||
asm.append("ldw c, 1")
|
||||
asm.append("string_loop:")
|
||||
asm.append("ldb a, b")
|
||||
asm.append("beq a, d, string_end")
|
||||
asm.append("int 0x00")
|
||||
asm.append("add b, c")
|
||||
asm.append("jmp string_loop")
|
||||
asm.append("string_end:")
|
||||
return asm
|
||||
|
||||
# return number;
|
||||
m = re.match(r'return\s+(\d+)\s*;', line)
|
||||
if m:
|
||||
asm.append("int 0xFF")
|
||||
return asm
|
||||
|
||||
# Unrecognized line or empty
|
||||
return asm
|
||||
|
||||
def compile_c(self, c_code):
|
||||
# First, parse everything to detect structs and typedef done in preprocess
|
||||
all_lines = c_code.split('\n')
|
||||
# struct definitions might appear outside main
|
||||
for cline in all_lines:
|
||||
self.compile_line(cline)
|
||||
|
||||
# Extract lines inside main
|
||||
lines = []
|
||||
in_main = False
|
||||
for cline in all_lines:
|
||||
cline = cline.rstrip()
|
||||
if 'int main(' in cline:
|
||||
in_main = True
|
||||
continue
|
||||
if in_main:
|
||||
if cline.startswith('}'):
|
||||
in_main = False
|
||||
break
|
||||
lines.append(cline)
|
||||
|
||||
asm = ["main:"]
|
||||
for line in lines:
|
||||
code_part, comment_part = self.extract_comment(line)
|
||||
instructions = self.compile_line(code_part)
|
||||
if instructions:
|
||||
for i, instr in enumerate(instructions):
|
||||
if i == 0 and comment_part:
|
||||
asm.append(f" {instr} ; {comment_part}")
|
||||
else:
|
||||
asm.append(f" {instr}")
|
||||
else:
|
||||
if comment_part:
|
||||
asm.append(f" ; {comment_part}")
|
||||
|
||||
return asm
|
||||
# Build the final assembly code.
|
||||
# If "main" is defined, list it first.
|
||||
asm_lines = []
|
||||
if "main" in functions:
|
||||
asm_lines.extend(functions["main"])
|
||||
for fname, code in functions.items():
|
||||
if fname != "main":
|
||||
asm_lines.extend(code)
|
||||
else:
|
||||
for fname, code in functions.items():
|
||||
asm_lines.extend(code)
|
||||
return asm_lines
|
||||
|
||||
# Example usage.
|
||||
if __name__ == "__main__":
|
||||
compiler = Compiler()
|
||||
preprocessed_lines = compiler.preprocess("main.c")
|
||||
c_code = "\n".join(preprocessed_lines)
|
||||
asm_code = compiler.compile_c(c_code)
|
||||
|
||||
with open("test.asm", "w") as out:
|
||||
for line in asm_code:
|
||||
out.write(line + "\n")
|
||||
sample_c = """
|
||||
// sample C program with functions.
|
||||
int main() {
|
||||
int x = 5;
|
||||
int y = 10;
|
||||
x = x + y;
|
||||
foo();
|
||||
|
||||
}
|
||||
|
||||
int foo() {
|
||||
int a = 3;
|
||||
int b = 7;
|
||||
a = a + b;
|
||||
return a;
|
||||
}
|
||||
"""
|
||||
asm_output = compile_c_to_asm(sample_c)
|
||||
for inst in asm_output:
|
||||
print(inst)
|
||||
|
@ -1 +1 @@
|
||||
program = [1,0,87,5,0,1004,1,0,101,5,0,1005,1,0,108,5,0,1006,1,0,99,5,0,1007,1,0,111,5,0,1008,1,0,109,5,0,1009,1,0,101,5,0,1010,1,0,33,5,0,1011,1,0,10,5,0,1012,1,0,0,5,0,1013,1,0,1004,5,0,1024,1,3,0,6,1,1024,1,2,1,21,0,1,9,0,3,91,10,0,0,3,1,2,17,75,0,10,255,0]
|
||||
program = [1,0,5,1,1,10,3,0,1,13,15,0,14,0,0,1,0,3,1,1,7,3,0,1,14,0,0]
|
2
std.h
2
std.h
@ -1,6 +1,6 @@
|
||||
#define true 1
|
||||
#define false 0
|
||||
#define TEST 256
|
||||
|
||||
|
||||
struct vec2 {
|
||||
int x;
|
||||
|
43
test.asm
43
test.asm
@ -1,34 +1,11 @@
|
||||
main:
|
||||
ldw a, 87
|
||||
str a, 0x3EC
|
||||
ldw a, 101
|
||||
str a, 0x3ED
|
||||
ldw a, 108
|
||||
str a, 0x3EE
|
||||
ldw a, 99
|
||||
str a, 0x3EF
|
||||
ldw a, 111
|
||||
str a, 0x3F0
|
||||
ldw a, 109
|
||||
str a, 0x3F1
|
||||
ldw a, 101
|
||||
str a, 0x3F2
|
||||
ldw a, 33
|
||||
str a, 0x3F3
|
||||
ldw a, 10
|
||||
str a, 0x3F4
|
||||
ldw a, 0
|
||||
str a, 0x3F5
|
||||
ldw a, 0x3EC
|
||||
str a, 0x400
|
||||
ldw d, 0
|
||||
ldr b, 0x400
|
||||
ldw c, 1
|
||||
string_loop:
|
||||
ldb a, b
|
||||
beq a, d, string_end
|
||||
int 0x00
|
||||
add b, c
|
||||
jmp string_loop
|
||||
string_end:
|
||||
int 0xFF
|
||||
ldw a, 5
|
||||
ldw b, 10
|
||||
add a, b
|
||||
jsr foo
|
||||
ret
|
||||
foo:
|
||||
ldw a, 3
|
||||
ldw b, 7
|
||||
add a, b
|
||||
retZ
|
548
tests/c-test02.py
Normal file
548
tests/c-test02.py
Normal file
@ -0,0 +1,548 @@
|
||||
import re
|
||||
import os
|
||||
from termcolor import colored
|
||||
|
||||
|
||||
class Variable:
|
||||
def __init__(self, name, address, var_type="int"):
|
||||
self.name = name
|
||||
self.address = address
|
||||
self.type = var_type
|
||||
|
||||
class Compiler:
|
||||
def __init__(self):
|
||||
# 1024 bytes total: 0x000 to 0x3FF
|
||||
self.data_ptr = 0x400
|
||||
self.variables = {}
|
||||
self.struct_definitions = {}
|
||||
self.in_struct_def = False
|
||||
self.current_struct_name = None
|
||||
self.current_struct_fields = []
|
||||
self.defines = {} # For #define macros
|
||||
self.typedefs = {} # For typedef
|
||||
self.label_counter = 0
|
||||
self.block_stack = [] # For if/while blocks
|
||||
|
||||
def new_label(self, prefix):
|
||||
lbl = f"{prefix}{self.label_counter}"
|
||||
self.label_counter += 1
|
||||
return lbl
|
||||
|
||||
def preprocess(self, filename):
|
||||
lines = self._read_file_recursive(filename)
|
||||
processed_lines = self._apply_defines(lines)
|
||||
return processed_lines
|
||||
|
||||
def _read_file_recursive(self, filename, included_files=None):
|
||||
if included_files is None:
|
||||
included_files = set()
|
||||
|
||||
if filename in included_files:
|
||||
# Prevent infinite recursion on includes
|
||||
return []
|
||||
|
||||
included_files.add(filename)
|
||||
|
||||
result_lines = []
|
||||
try:
|
||||
with open(filename, "r") as f:
|
||||
for line in f:
|
||||
line_stripped = line.strip()
|
||||
|
||||
# #include "file"
|
||||
inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
|
||||
if inc_match:
|
||||
inc_file = inc_match.group(1)
|
||||
included_content = self._read_file_recursive(inc_file, included_files)
|
||||
result_lines.extend(included_content)
|
||||
continue
|
||||
|
||||
# #define KEY VALUE
|
||||
def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
|
||||
if def_match:
|
||||
key = def_match.group(1)
|
||||
value = def_match.group(2)
|
||||
self.defines[key] = value
|
||||
continue
|
||||
|
||||
# typedef oldtype newtype;
|
||||
tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line_stripped)
|
||||
if tmatch:
|
||||
oldt = tmatch.group(1)
|
||||
newt = tmatch.group(2)
|
||||
# Resolve oldt if it's also a typedef
|
||||
oldt = self.apply_typedef(oldt)
|
||||
self.typedefs[newt] = oldt
|
||||
continue
|
||||
|
||||
result_lines.append(line)
|
||||
except FileNotFoundError as e:
|
||||
print(colored(f"{filename}:0: error: {e}", "red"))
|
||||
|
||||
|
||||
return result_lines
|
||||
|
||||
def _apply_defines(self, lines):
|
||||
|
||||
token_pattern = re.compile(r'([A-Za-z0-9_]+)')
|
||||
|
||||
processed = []
|
||||
for line in lines:
|
||||
parts = token_pattern.split(line)
|
||||
# parts: tokens and separators
|
||||
for i, part in enumerate(parts):
|
||||
if part in self.defines:
|
||||
print(f"Replaced {part} with {self.defines[part]}")
|
||||
part = self.defines[part]
|
||||
parts[i] = part
|
||||
new_line = "".join(parts)
|
||||
processed.append(new_line)
|
||||
return processed
|
||||
|
||||
def apply_typedef(self, t):
|
||||
if t in self.typedefs:
|
||||
return self.typedefs[t]
|
||||
return t
|
||||
|
||||
def allocate_bytes(self, count):
|
||||
start_addr = self.data_ptr - (count - 1)
|
||||
if start_addr < 0x000:
|
||||
raise Exception("Out of memory!")
|
||||
self.data_ptr = start_addr - 1
|
||||
return start_addr
|
||||
|
||||
def allocate_var(self, name, var_type="int"):
|
||||
var_type = self.apply_typedef(var_type)
|
||||
if name in self.variables:
|
||||
return self.variables[name]
|
||||
|
||||
if var_type.startswith("struct:"):
|
||||
sname = var_type.split(":")[1]
|
||||
fields = self.struct_definitions[sname]
|
||||
length = len(fields) # each 1 byte
|
||||
start_addr = self.allocate_bytes(length)
|
||||
var = Variable(name, start_addr, var_type)
|
||||
self.variables[name] = var
|
||||
return var
|
||||
else:
|
||||
start_addr = self.allocate_bytes(1)
|
||||
var = Variable(name, start_addr, var_type)
|
||||
self.variables[name] = var
|
||||
return var
|
||||
|
||||
def allocate_array(self, name, length, var_type="int"):
|
||||
var_type = self.apply_typedef(var_type)
|
||||
arr_start = self.allocate_bytes(length)
|
||||
var_addr = self.allocate_bytes(1)
|
||||
var = Variable(name, var_addr, "array")
|
||||
self.variables[name] = var
|
||||
return var, arr_start
|
||||
|
||||
def store_string(self, string_value):
|
||||
string_value = string_value.replace('\\n', '\n')
|
||||
length = len(string_value) + 1
|
||||
start_addr = self.allocate_bytes(length)
|
||||
asm = []
|
||||
current_addr = start_addr
|
||||
for ch in string_value:
|
||||
ascii_val = ord(ch)
|
||||
asm.append(f"ldw a, {ascii_val}")
|
||||
asm.append(f"str a, 0x{current_addr:X}")
|
||||
current_addr += 1
|
||||
asm.append("ldw a, 0")
|
||||
asm.append(f"str a, 0x{current_addr:X}")
|
||||
return asm, start_addr
|
||||
|
||||
def get_struct_field_offset(self, struct_type, field_name):
|
||||
sname = struct_type.split(":")[1]
|
||||
fields = self.struct_definitions[sname]
|
||||
for i, (fname, ftype) in enumerate(fields):
|
||||
if fname == field_name:
|
||||
return i
|
||||
raise Exception(f"Field {field_name} not found in {struct_type}")
|
||||
|
||||
def parse_condition(self, cond_str):
|
||||
# cond_str like "a == b" or "a != b"
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
|
||||
if not m:
|
||||
raise Exception("Unsupported condition: " + cond_str)
|
||||
var1, op, var2 = m.groups()
|
||||
return var1, op, var2
|
||||
|
||||
def compile_condition(self, var1, op, var2):
|
||||
asm = []
|
||||
v1 = self.allocate_var(var1)
|
||||
v2 = self.allocate_var(var2)
|
||||
asm.append(f"ldr a, 0x{v1.address:X}")
|
||||
asm.append(f"ldr b, 0x{v2.address:X}")
|
||||
# a = a - b
|
||||
asm.append("sub a, b")
|
||||
return asm, op
|
||||
|
||||
def extract_comment(self, line):
|
||||
comment_index = line.find('//')
|
||||
if comment_index != -1:
|
||||
code_part = line[:comment_index]
|
||||
comment_part = line[comment_index+2:].strip()
|
||||
return code_part, comment_part
|
||||
return line, None
|
||||
|
||||
def compile_line(self, code_part):
|
||||
line = code_part.strip()
|
||||
asm = []
|
||||
|
||||
if self.in_struct_def:
|
||||
if line.startswith("};"):
|
||||
self.struct_definitions[self.current_struct_name] = self.current_struct_fields
|
||||
self.in_struct_def = False
|
||||
self.current_struct_name = None
|
||||
self.current_struct_fields = []
|
||||
return asm
|
||||
mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
|
||||
if mfield:
|
||||
fname = mfield.group(1)
|
||||
ftype = "int"
|
||||
self.current_struct_fields.append((fname, ftype))
|
||||
return asm
|
||||
|
||||
# struct definition start
|
||||
msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
|
||||
if msd:
|
||||
self.in_struct_def = True
|
||||
self.current_struct_name = msd.group(1)
|
||||
self.current_struct_fields = []
|
||||
return asm
|
||||
|
||||
# struct var declaration
|
||||
msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
|
||||
if msv:
|
||||
sname, varname = msv.groups()
|
||||
var_type = "struct:" + sname
|
||||
self.allocate_var(varname, var_type)
|
||||
return asm
|
||||
|
||||
# if statement
|
||||
mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
|
||||
if mif:
|
||||
cond_str = mif.group(1)
|
||||
var1, op, var2 = self.parse_condition(cond_str)
|
||||
end_label = self.new_label("endif")
|
||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||
asm.extend(cond_code)
|
||||
# if '==': jump if not zero a != 0
|
||||
# if '!=': jump if zero a == 0
|
||||
if cmp_op == '==':
|
||||
asm.append("bne a, 0, " + end_label)
|
||||
else:
|
||||
asm.append("beq a, 0, " + end_label)
|
||||
self.block_stack.append(('if', end_label))
|
||||
return asm
|
||||
|
||||
# while statement
|
||||
mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
|
||||
if mwhile:
|
||||
cond_str = mwhile.group(1)
|
||||
var1, op, var2 = self.parse_condition(cond_str)
|
||||
start_label = self.new_label("whilestart")
|
||||
end_label = self.new_label("whileend")
|
||||
asm.append(start_label + ":")
|
||||
cond_code, cmp_op = self.compile_condition(var1, op, var2)
|
||||
asm.extend(cond_code)
|
||||
if cmp_op == '==':
|
||||
asm.append("bne a, 0, " + end_label)
|
||||
else:
|
||||
asm.append("beq a, 0, " + end_label)
|
||||
self.block_stack.append(('while', start_label, end_label))
|
||||
return asm
|
||||
|
||||
# end of block
|
||||
if line == "}":
|
||||
if not self.block_stack:
|
||||
return asm
|
||||
blk = self.block_stack.pop()
|
||||
if blk[0] == 'if':
|
||||
end_label = blk[1]
|
||||
asm.append(end_label + ":")
|
||||
elif blk[0] == 'while':
|
||||
start_label = blk[1]
|
||||
end_label = blk[2]
|
||||
# jump back to start
|
||||
asm.append(f"jmp {start_label}")
|
||||
asm.append(end_label + ":")
|
||||
return asm
|
||||
|
||||
# p.x = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname, value = m.groups()
|
||||
value = int(value)
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"ldw c, {value}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
|
||||
# p.x = var + number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname, srcvar, number = m.groups()
|
||||
number = int(number)
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
v2 = self.allocate_var(srcvar)
|
||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||
asm.append(f"ldw d, {number}")
|
||||
asm.append("add c, d")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
|
||||
# p.x = srcvar;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname, srcvar = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
v2 = self.allocate_var(srcvar)
|
||||
asm.append(f"ldr c, 0x{v2.address:X}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
|
||||
# x = p.x;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
|
||||
if m:
|
||||
dst, varname, fieldname = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
vd = self.allocate_var(dst)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb c, a")
|
||||
asm.append(f"str c, 0x{vd.address:X}")
|
||||
return asm
|
||||
|
||||
# print_int(p.x);
|
||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname, fieldname = m.groups()
|
||||
v = self.allocate_var(varname)
|
||||
offset = self.get_struct_field_offset(v.type, fieldname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
if offset != 0:
|
||||
asm.append(f"ldw b, {offset}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb a, a")
|
||||
asm.append("int 0x01")
|
||||
return asm
|
||||
|
||||
# int arr[10];
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
length = int(m.group(2))
|
||||
arr_var, start_addr = self.allocate_array(varname, length)
|
||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||
asm.append(f"str a, 0x{arr_var.address:X}")
|
||||
return asm
|
||||
|
||||
# int x = number;
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
value = int(m.group(2))
|
||||
var = self.allocate_var(varname, "int")
|
||||
asm.append(f"ldw a, {value}")
|
||||
asm.append(f"str a, 0x{var.address:X}")
|
||||
return asm
|
||||
|
||||
# int y = x + number;
|
||||
m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, var2, number = m.groups()
|
||||
number = int(number)
|
||||
v1 = self.allocate_var(varname, "int")
|
||||
v2 = self.allocate_var(var2, "int")
|
||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||
asm.append(f"ldw b, {number}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"str a, 0x{v1.address:X}")
|
||||
return asm
|
||||
|
||||
# char *msg = "Hello\n";
|
||||
m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
|
||||
if m:
|
||||
varname, string_val = m.groups()
|
||||
v = self.allocate_var(varname, "char*")
|
||||
code, start_addr = self.store_string(string_val)
|
||||
asm.extend(code)
|
||||
asm.append(f"ldw a, 0x{start_addr:X}")
|
||||
asm.append(f"str a, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# var = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, value = m.groups()
|
||||
value = int(value)
|
||||
v = self.allocate_var(varname, "int")
|
||||
asm.append(f"ldw a, {value}")
|
||||
asm.append(f"str a, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# var = var2 + number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
varname, var2, number = m.groups()
|
||||
number = int(number)
|
||||
v1 = self.allocate_var(varname, "int")
|
||||
v2 = self.allocate_var(var2, "int")
|
||||
asm.append(f"ldr a, 0x{v2.address:X}")
|
||||
asm.append(f"ldw b, {number}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"str a, 0x{v1.address:X}")
|
||||
return asm
|
||||
|
||||
# var[index] = number;
|
||||
m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
|
||||
if m:
|
||||
arr, index, value = m.groups()
|
||||
index = int(index)
|
||||
value = int(value)
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append(f"ldw c, {value}")
|
||||
asm.append("stb c, a")
|
||||
return asm
|
||||
|
||||
# x = arr[index];
|
||||
m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
|
||||
if m:
|
||||
varname, arr, index = m.groups()
|
||||
index = int(index)
|
||||
v = self.allocate_var(varname, "int")
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb d, a")
|
||||
asm.append(f"str d, 0x{v.address:X}")
|
||||
return asm
|
||||
|
||||
# print_char(var);
|
||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
asm.append("int 0x00")
|
||||
return asm
|
||||
|
||||
# print_char(arr[index]);
|
||||
m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
|
||||
if m:
|
||||
arr, index = m.groups()
|
||||
index = int(index)
|
||||
arr_var = self.allocate_var(arr)
|
||||
asm.append(f"ldr a, 0x{arr_var.address:X}")
|
||||
asm.append(f"ldw b, {index}")
|
||||
asm.append("add a, b")
|
||||
asm.append("ldb a, a")
|
||||
asm.append("int 0x00")
|
||||
return asm
|
||||
|
||||
# print_int(var);
|
||||
m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname)
|
||||
asm.append(f"ldr a, 0x{v.address:X}")
|
||||
asm.append("int 0x01")
|
||||
return asm
|
||||
|
||||
# print_string(var);
|
||||
m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
|
||||
if m:
|
||||
varname = m.group(1)
|
||||
v = self.allocate_var(varname, "char*")
|
||||
asm.append("ldw d, 0")
|
||||
asm.append(f"ldr b, 0x{v.address:X}")
|
||||
asm.append("ldw c, 1")
|
||||
asm.append("string_loop:")
|
||||
asm.append("ldb a, b")
|
||||
asm.append("beq a, d, string_end")
|
||||
asm.append("int 0x00")
|
||||
asm.append("add b, c")
|
||||
asm.append("jmp string_loop")
|
||||
asm.append("string_end:")
|
||||
return asm
|
||||
|
||||
# return number;
|
||||
m = re.match(r'return\s+(\d+)\s*;', line)
|
||||
if m:
|
||||
asm.append("int 0xFF")
|
||||
return asm
|
||||
|
||||
# Unrecognized line or empty
|
||||
return asm
|
||||
|
||||
def compile_c(self, c_code):
|
||||
# First, parse everything to detect structs and typedef done in preprocess
|
||||
all_lines = c_code.split('\n')
|
||||
# struct definitions might appear outside main
|
||||
for cline in all_lines:
|
||||
self.compile_line(cline)
|
||||
|
||||
# Extract lines inside main
|
||||
lines = []
|
||||
in_main = False
|
||||
for cline in all_lines:
|
||||
cline = cline.rstrip()
|
||||
if 'int main(' in cline:
|
||||
in_main = True
|
||||
continue
|
||||
if in_main:
|
||||
if cline.startswith('}'):
|
||||
in_main = False
|
||||
break
|
||||
lines.append(cline)
|
||||
|
||||
asm = ["main:"]
|
||||
for line in lines:
|
||||
code_part, comment_part = self.extract_comment(line)
|
||||
instructions = self.compile_line(code_part)
|
||||
if instructions:
|
||||
for i, instr in enumerate(instructions):
|
||||
if i == 0 and comment_part:
|
||||
asm.append(f" {instr} ; {comment_part}")
|
||||
else:
|
||||
asm.append(f" {instr}")
|
||||
else:
|
||||
if comment_part:
|
||||
asm.append(f" ; {comment_part}")
|
||||
|
||||
return asm
|
||||
|
||||
if __name__ == "__main__":
|
||||
compiler = Compiler()
|
||||
preprocessed_lines = compiler.preprocess("main.c")
|
||||
c_code = "\n".join(preprocessed_lines)
|
||||
asm_code = compiler.compile_c(c_code)
|
||||
|
||||
with open("test.asm", "w") as out:
|
||||
for line in asm_code:
|
||||
out.write(line + "\n")
|
Loading…
Reference in New Issue
Block a user