309 lines
9.0 KiB
Python
309 lines
9.0 KiB
Python
|
|
|
|
|
|
# User settings
|
|
|
|
filename = "main.c"
|
|
DEBUG = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
types = ['int', 'void'] # Recognized types
|
|
|
|
variable_data = {}
|
|
|
|
|
|
CurrentCompilerLine = 1
|
|
|
|
|
|
import re
|
|
|
|
|
|
def CompileError(message):
|
|
global CurrentCompilerLine
|
|
column = 0
|
|
print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}")
|
|
|
|
|
|
def DEBUG_MODE_PRINT(*args):
|
|
if DEBUG:
|
|
print(*args)
|
|
|
|
|
|
|
|
# Tokenization regex for splitting code into meaningful segments
|
|
tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})')
|
|
|
|
# Function to parse C code into chunks
|
|
def parse_code_to_chunks(code):
|
|
chunks = [] # List to hold all chunks
|
|
current_chunk = [] # Temporary list for tokens in the current chunk
|
|
tokens = tokenizer.findall(code) # Split the code into tokens
|
|
|
|
for token in tokens:
|
|
if token == ';': # End of a statement
|
|
if current_chunk:
|
|
#current_chunk.append(token) # Include the semicolon
|
|
chunks.append(current_chunk) # Finalize the chunk
|
|
current_chunk = [] # Reset for the next chunk
|
|
elif token in ['{', '}']: # Start or end of a block
|
|
if current_chunk:
|
|
chunks.append(current_chunk) # Finalize the current chunk
|
|
current_chunk = []
|
|
chunks.append([token]) # Braces are standalone chunks
|
|
else:
|
|
current_chunk.append(token) # Add token to the current chunk
|
|
|
|
# Add any remaining tokens as a final chunk
|
|
if current_chunk:
|
|
chunks.append(current_chunk)
|
|
|
|
return chunks
|
|
|
|
|
|
def classify_assignment(assignment):
|
|
# Split the assignment into variable and value
|
|
var, value = assignment.split('=', 1)
|
|
var = var.strip()
|
|
value = value.strip()
|
|
|
|
# Check if the value is a single number (constant)
|
|
if value.isdigit():
|
|
return 1 # Single number
|
|
|
|
# Check if the value is a simple arithmetic expression
|
|
elif any(op in value for op in ['+', '-', '*', '/']):
|
|
# Check for parentheses and handle them recursively
|
|
if '(' in value and ')' in value:
|
|
# Expression with parentheses: check for math with variables or constants
|
|
return 3 # Math between variables or variables/constants, including parentheses
|
|
else:
|
|
# Extract operands and operator to handle math without parentheses
|
|
operator = next((op for op in ['+', '-', '*', '/'] if op in value), None)
|
|
left, right = [p.strip() for p in value.split(operator, 1)]
|
|
|
|
if left.isdigit() and right.isdigit():
|
|
return 2 # Math between two numbers
|
|
elif left.isidentifier() or right.isidentifier():
|
|
return 3 # Math between variables or variables/constants
|
|
|
|
# Check if the value is equal to another variable
|
|
elif value.isidentifier():
|
|
return 4 # Equal to another variable
|
|
|
|
# Return 0 if the assignment does not match any known category
|
|
return 0
|
|
|
|
|
|
|
|
import re
|
|
|
|
def parse_operands(equation: str):
|
|
# Remove spaces for easier handling
|
|
equation = equation.replace(" ", "")
|
|
|
|
# Base case: if no parentheses, split by the operator
|
|
if '(' not in equation and ')' not in equation:
|
|
return parse_simple_expression(equation)
|
|
|
|
# If parentheses are present, handle recursively
|
|
while '(' in equation:
|
|
# Find the innermost parentheses expression
|
|
innermost = re.search(r'\(([^()]+)\)', equation)
|
|
if innermost:
|
|
# Recursively parse the expression inside parentheses
|
|
inner_expr = innermost.group(1)
|
|
result = parse_simple_expression(inner_expr)
|
|
# Replace the expression inside parentheses with the parsed result
|
|
equation = equation.replace(f'({inner_expr})', result)
|
|
|
|
return parse_simple_expression(equation)
|
|
|
|
def parse_simple_expression(equation):
|
|
# This function handles simple expressions without parentheses
|
|
match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation)
|
|
if match:
|
|
operand1 = match.group(1) # The first operand (could be a variable or constant)
|
|
operator = match.group(2) # The operator (+, -, *, /)
|
|
operand2 = match.group(3) # The second operand (could be a variable or constant)
|
|
|
|
# Return operands in a list, this can be extended for more complex parsing
|
|
return [operand1, operand2]
|
|
else:
|
|
raise ValueError(f"Invalid equation format: {equation}")
|
|
|
|
|
|
|
|
def create_new_variable(name, address):
|
|
variable_data[name] = address
|
|
|
|
def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]:
|
|
equasion = " ".join(equasion)
|
|
#DEBUG_MODE_PRINT(equasion)
|
|
output = []
|
|
__ = equasion.split("=")
|
|
value = __[1].strip()
|
|
del __
|
|
|
|
|
|
|
|
#DEBUG_MODE_PRINT(value)
|
|
if classify_assignment(equasion) == 1:
|
|
# constant
|
|
# int x = 1;
|
|
DEBUG_MODE_PRINT("1 >", value)
|
|
|
|
output.append([f'ldw a, {value}'])
|
|
output.append([f'str a, {output_address}'])
|
|
|
|
elif classify_assignment(equasion) == 4:
|
|
DEBUG_MODE_PRINT("4 >", value)
|
|
# equal to variable
|
|
# int x = y
|
|
address = variable_data[value]
|
|
output.append([f'ldr a, {address}']) # move the data in address to register a
|
|
output.append([f'str a, {output_address}']) # put register a into free_memory_address
|
|
|
|
elif classify_assignment(equasion) == 3:
|
|
|
|
# arithmetic expression: example x = y + 5;
|
|
# Evaluate the expression and store the result
|
|
DEBUG_MODE_PRINT("3 >", value)
|
|
operands = parse_operands(value) # You need to parse operands like 'y + 5'
|
|
if operands[0].isidentifier():
|
|
output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a
|
|
else:
|
|
output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a
|
|
|
|
if operands[1].isidentifier():
|
|
output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a
|
|
else:
|
|
output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a
|
|
|
|
output.append([f'add a, b']) # perform addition a = y + 5
|
|
|
|
|
|
# Store in variable or in new variable
|
|
output.append([f'str a, {output_address}']) # store result into memory address
|
|
|
|
else:
|
|
# Not Implemented
|
|
DEBUG_MODE_PRINT("NotImpl: ",equasion)
|
|
|
|
return output
|
|
|
|
|
|
# Read the C code from the file
|
|
with open(filename, "r") as f:
|
|
code = f.read()
|
|
|
|
# Preprocess the code: remove newlines and excess whitespace
|
|
code = re.sub(r'\s+', ' ', code.strip())
|
|
# Parse the code into chunks
|
|
chunks = parse_code_to_chunks(code)
|
|
#DEBUG_MODE_PRINT("Chunks:", chunks)
|
|
# Extract functions and variables
|
|
|
|
|
|
|
|
|
|
free_memory_address = 0xFF
|
|
|
|
|
|
output = []
|
|
|
|
|
|
|
|
for chunk in chunks:
|
|
#DEBUG_MODE_PRINT(chunk)
|
|
chunk: list
|
|
|
|
if chunk[0] in types:
|
|
if "=" in chunk:
|
|
if chunk[0] in variable_data:
|
|
CompileError(f"Redefinition of variable: '{chunk[0]}'")
|
|
create_new_variable(chunk[1], free_memory_address)
|
|
output += equasion_to_asm(chunk, free_memory_address)
|
|
free_memory_address -= 1
|
|
elif "=" in chunk:
|
|
#DEBUG_MODE_PRINT(chunk)
|
|
if chunk[0] in variable_data:
|
|
|
|
output += equasion_to_asm(chunk, free_memory_address)
|
|
else:
|
|
CompileError(f"Undefined Variable: '{chunk[0]}'")
|
|
else:
|
|
# DEBUG_MODE_PRINT(chunk)
|
|
for i,_ in enumerate(chunk):
|
|
chunk[i] = chunk[i].strip("{}")
|
|
|
|
|
|
if chunk != ['']:
|
|
#print(chunk)
|
|
|
|
if chunk[0] == 'return':
|
|
# handle return statements
|
|
pass
|
|
elif chunk[0].isidentifier():
|
|
#print("Function Name: " + str(chunk[0]))
|
|
function_name = chunk[0]
|
|
|
|
arguments = chunk[1:]
|
|
#print(arguments)
|
|
|
|
|
|
for itter, argument in enumerate(arguments):
|
|
if argument == ',' or argument == '(' or argument == ')':
|
|
arguments.pop(itter)
|
|
continue
|
|
|
|
arguments[itter] = argument
|
|
|
|
|
|
#print(arguments)
|
|
|
|
if function_name == 'syscall':
|
|
output += [f'int {int(arguments[0], 16)}']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CurrentCompilerLine += 1
|
|
|
|
|
|
|
|
|
|
|
|
with open("output.asm", "w") as f:
|
|
# Write the assembly code to the output file
|
|
f.write("main:\n ")
|
|
for line in output:
|
|
f.write("".join(line))
|
|
f.write("\n ") |