# User settings filename = "main.c" DEBUG = False types = ['int', 'void'] # Recognized types variable_data = {} CurrentCompilerLine = 1 import re def CompileError(message): global CurrentCompilerLine column = 0 print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}") def DEBUG_MODE_PRINT(*args): if DEBUG: print(*args) # Tokenization regex for splitting code into meaningful segments tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})') # Function to parse C code into chunks def parse_code_to_chunks(code): chunks = [] # List to hold all chunks current_chunk = [] # Temporary list for tokens in the current chunk tokens = tokenizer.findall(code) # Split the code into tokens for token in tokens: if token == ';': # End of a statement if current_chunk: #current_chunk.append(token) # Include the semicolon chunks.append(current_chunk) # Finalize the chunk current_chunk = [] # Reset for the next chunk elif token in ['{', '}']: # Start or end of a block if current_chunk: chunks.append(current_chunk) # Finalize the current chunk current_chunk = [] chunks.append([token]) # Braces are standalone chunks else: current_chunk.append(token) # Add token to the current chunk # Add any remaining tokens as a final chunk if current_chunk: chunks.append(current_chunk) return chunks def classify_assignment(assignment): # Split the assignment into variable and value var, value = assignment.split('=', 1) var = var.strip() value = value.strip() # Check if the value is a single number (constant) if value.isdigit(): return 1 # Single number # Check if the value is a simple arithmetic expression elif any(op in value for op in ['+', '-', '*', '/']): # Check for parentheses and handle them recursively if '(' in value and ')' in value: # Expression with parentheses: check for math with variables or constants return 3 # Math between variables or variables/constants, including parentheses else: # Extract operands and operator to handle math without parentheses operator = next((op for op in ['+', '-', '*', '/'] if op in value), None) left, right = [p.strip() for p in value.split(operator, 1)] if left.isdigit() and right.isdigit(): return 2 # Math between two numbers elif left.isidentifier() or right.isidentifier(): return 3 # Math between variables or variables/constants # Check if the value is equal to another variable elif value.isidentifier(): return 4 # Equal to another variable # Return 0 if the assignment does not match any known category return 0 import re def parse_operands(equation: str): # Remove spaces for easier handling equation = equation.replace(" ", "") # Base case: if no parentheses, split by the operator if '(' not in equation and ')' not in equation: return parse_simple_expression(equation) # If parentheses are present, handle recursively while '(' in equation: # Find the innermost parentheses expression innermost = re.search(r'\(([^()]+)\)', equation) if innermost: # Recursively parse the expression inside parentheses inner_expr = innermost.group(1) result = parse_simple_expression(inner_expr) # Replace the expression inside parentheses with the parsed result equation = equation.replace(f'({inner_expr})', result) return parse_simple_expression(equation) def parse_simple_expression(equation): # This function handles simple expressions without parentheses match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation) if match: operand1 = match.group(1) # The first operand (could be a variable or constant) operator = match.group(2) # The operator (+, -, *, /) operand2 = match.group(3) # The second operand (could be a variable or constant) # Return operands in a list, this can be extended for more complex parsing return [operand1, operand2] else: raise ValueError(f"Invalid equation format: {equation}") def create_new_variable(name, address): variable_data[name] = address def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]: equasion = " ".join(equasion) #DEBUG_MODE_PRINT(equasion) output = [] __ = equasion.split("=") value = __[1].strip() del __ #DEBUG_MODE_PRINT(value) if classify_assignment(equasion) == 1: # constant # int x = 1; DEBUG_MODE_PRINT("1 >", value) output.append([f'ldw a, {value}']) output.append([f'str a, {output_address}']) elif classify_assignment(equasion) == 4: DEBUG_MODE_PRINT("4 >", value) # equal to variable # int x = y address = variable_data[value] output.append([f'ldr a, {address}']) # move the data in address to register a output.append([f'str a, {output_address}']) # put register a into free_memory_address elif classify_assignment(equasion) == 3: # arithmetic expression: example x = y + 5; # Evaluate the expression and store the result DEBUG_MODE_PRINT("3 >", value) operands = parse_operands(value) # You need to parse operands like 'y + 5' if operands[0].isidentifier(): output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a else: output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a if operands[1].isidentifier(): output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a else: output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a output.append([f'add a, b']) # perform addition a = y + 5 # Store in variable or in new variable output.append([f'str a, {output_address}']) # store result into memory address else: # Not Implemented DEBUG_MODE_PRINT("NotImpl: ",equasion) return output # Read the C code from the file with open(filename, "r") as f: code = f.read() # Preprocess the code: remove newlines and excess whitespace code = re.sub(r'\s+', ' ', code.strip()) # Parse the code into chunks chunks = parse_code_to_chunks(code) #DEBUG_MODE_PRINT("Chunks:", chunks) # Extract functions and variables free_memory_address = 0xFF output = [] for chunk in chunks: #DEBUG_MODE_PRINT(chunk) chunk: list if chunk[0] in types: if "=" in chunk: if chunk[0] in variable_data: CompileError(f"Redefinition of variable: '{chunk[0]}'") create_new_variable(chunk[1], free_memory_address) output += equasion_to_asm(chunk, free_memory_address) free_memory_address -= 1 elif "=" in chunk: #DEBUG_MODE_PRINT(chunk) if chunk[0] in variable_data: output += equasion_to_asm(chunk, free_memory_address) else: CompileError(f"Undefined Variable: '{chunk[0]}'") else: # DEBUG_MODE_PRINT(chunk) for i,_ in enumerate(chunk): chunk[i] = chunk[i].strip("{}") if chunk != ['']: #print(chunk) if chunk[0] == 'return': # handle return statements pass elif chunk[0].isidentifier(): #print("Function Name: " + str(chunk[0])) function_name = chunk[0] arguments = chunk[1:] #print(arguments) for itter, argument in enumerate(arguments): if argument == ',' or argument == '(' or argument == ')': arguments.pop(itter) continue arguments[itter] = argument #print(arguments) if function_name == 'syscall': output += [f'int {int(arguments[0], 16)}'] CurrentCompilerLine += 1 with open("output.asm", "w") as f: # Write the assembly code to the output file f.write("main:\n ") for line in output: f.write("".join(line)) f.write("\n ")