Py502/C-Parcer.py

309 lines
9.0 KiB
Python

# User settings
filename = "main.c"
DEBUG = False
types = ['int', 'void'] # Recognized types
variable_data = {}
CurrentCompilerLine = 1
import re
def CompileError(message):
global CurrentCompilerLine
column = 0
print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}")
def DEBUG_MODE_PRINT(*args):
if DEBUG:
print(*args)
# Tokenization regex for splitting code into meaningful segments
tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})')
# Function to parse C code into chunks
def parse_code_to_chunks(code):
chunks = [] # List to hold all chunks
current_chunk = [] # Temporary list for tokens in the current chunk
tokens = tokenizer.findall(code) # Split the code into tokens
for token in tokens:
if token == ';': # End of a statement
if current_chunk:
#current_chunk.append(token) # Include the semicolon
chunks.append(current_chunk) # Finalize the chunk
current_chunk = [] # Reset for the next chunk
elif token in ['{', '}']: # Start or end of a block
if current_chunk:
chunks.append(current_chunk) # Finalize the current chunk
current_chunk = []
chunks.append([token]) # Braces are standalone chunks
else:
current_chunk.append(token) # Add token to the current chunk
# Add any remaining tokens as a final chunk
if current_chunk:
chunks.append(current_chunk)
return chunks
def classify_assignment(assignment):
# Split the assignment into variable and value
var, value = assignment.split('=', 1)
var = var.strip()
value = value.strip()
# Check if the value is a single number (constant)
if value.isdigit():
return 1 # Single number
# Check if the value is a simple arithmetic expression
elif any(op in value for op in ['+', '-', '*', '/']):
# Check for parentheses and handle them recursively
if '(' in value and ')' in value:
# Expression with parentheses: check for math with variables or constants
return 3 # Math between variables or variables/constants, including parentheses
else:
# Extract operands and operator to handle math without parentheses
operator = next((op for op in ['+', '-', '*', '/'] if op in value), None)
left, right = [p.strip() for p in value.split(operator, 1)]
if left.isdigit() and right.isdigit():
return 2 # Math between two numbers
elif left.isidentifier() or right.isidentifier():
return 3 # Math between variables or variables/constants
# Check if the value is equal to another variable
elif value.isidentifier():
return 4 # Equal to another variable
# Return 0 if the assignment does not match any known category
return 0
import re
def parse_operands(equation: str):
# Remove spaces for easier handling
equation = equation.replace(" ", "")
# Base case: if no parentheses, split by the operator
if '(' not in equation and ')' not in equation:
return parse_simple_expression(equation)
# If parentheses are present, handle recursively
while '(' in equation:
# Find the innermost parentheses expression
innermost = re.search(r'\(([^()]+)\)', equation)
if innermost:
# Recursively parse the expression inside parentheses
inner_expr = innermost.group(1)
result = parse_simple_expression(inner_expr)
# Replace the expression inside parentheses with the parsed result
equation = equation.replace(f'({inner_expr})', result)
return parse_simple_expression(equation)
def parse_simple_expression(equation):
# This function handles simple expressions without parentheses
match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation)
if match:
operand1 = match.group(1) # The first operand (could be a variable or constant)
operator = match.group(2) # The operator (+, -, *, /)
operand2 = match.group(3) # The second operand (could be a variable or constant)
# Return operands in a list, this can be extended for more complex parsing
return [operand1, operand2]
else:
raise ValueError(f"Invalid equation format: {equation}")
def create_new_variable(name, address):
variable_data[name] = address
def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]:
equasion = " ".join(equasion)
#DEBUG_MODE_PRINT(equasion)
output = []
__ = equasion.split("=")
value = __[1].strip()
del __
#DEBUG_MODE_PRINT(value)
if classify_assignment(equasion) == 1:
# constant
# int x = 1;
DEBUG_MODE_PRINT("1 >", value)
output.append([f'ldw a, {value}'])
output.append([f'str a, {output_address}'])
elif classify_assignment(equasion) == 4:
DEBUG_MODE_PRINT("4 >", value)
# equal to variable
# int x = y
address = variable_data[value]
output.append([f'ldr a, {address}']) # move the data in address to register a
output.append([f'str a, {output_address}']) # put register a into free_memory_address
elif classify_assignment(equasion) == 3:
# arithmetic expression: example x = y + 5;
# Evaluate the expression and store the result
DEBUG_MODE_PRINT("3 >", value)
operands = parse_operands(value) # You need to parse operands like 'y + 5'
if operands[0].isidentifier():
output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a
else:
output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a
if operands[1].isidentifier():
output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a
else:
output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a
output.append([f'add a, b']) # perform addition a = y + 5
# Store in variable or in new variable
output.append([f'str a, {output_address}']) # store result into memory address
else:
# Not Implemented
DEBUG_MODE_PRINT("NotImpl: ",equasion)
return output
# Read the C code from the file
with open(filename, "r") as f:
code = f.read()
# Preprocess the code: remove newlines and excess whitespace
code = re.sub(r'\s+', ' ', code.strip())
# Parse the code into chunks
chunks = parse_code_to_chunks(code)
#DEBUG_MODE_PRINT("Chunks:", chunks)
# Extract functions and variables
free_memory_address = 0xFF
output = []
for chunk in chunks:
#DEBUG_MODE_PRINT(chunk)
chunk: list
if chunk[0] in types:
if "=" in chunk:
if chunk[0] in variable_data:
CompileError(f"Redefinition of variable: '{chunk[0]}'")
create_new_variable(chunk[1], free_memory_address)
output += equasion_to_asm(chunk, free_memory_address)
free_memory_address -= 1
elif "=" in chunk:
#DEBUG_MODE_PRINT(chunk)
if chunk[0] in variable_data:
output += equasion_to_asm(chunk, free_memory_address)
else:
CompileError(f"Undefined Variable: '{chunk[0]}'")
else:
# DEBUG_MODE_PRINT(chunk)
for i,_ in enumerate(chunk):
chunk[i] = chunk[i].strip("{}")
if chunk != ['']:
#print(chunk)
if chunk[0] == 'return':
# handle return statements
pass
elif chunk[0].isidentifier():
#print("Function Name: " + str(chunk[0]))
function_name = chunk[0]
arguments = chunk[1:]
#print(arguments)
for itter, argument in enumerate(arguments):
if argument == ',' or argument == '(' or argument == ')':
arguments.pop(itter)
continue
arguments[itter] = argument
#print(arguments)
if function_name == 'syscall':
output += [f'int {int(arguments[0], 16)}']
CurrentCompilerLine += 1
with open("output.asm", "w") as f:
# Write the assembly code to the output file
f.write("main:\n ")
for line in output:
f.write("".join(line))
f.write("\n ")