From ddd64d397a7bb010ec3530b53386b727d8da3366 Mon Sep 17 00:00:00 2001 From: Spencer Conlon Date: Tue, 24 Dec 2024 05:12:44 +0000 Subject: [PATCH] added files from previous --- C-Parcer.py | 309 ++++++++++++++++++++++++++++++++++++++++++++++ example.asm | 30 +++++ lang-to-asm.py | 117 ++++++++++++++++++ try_to_fix_me.asm | 7 +- 4 files changed, 460 insertions(+), 3 deletions(-) create mode 100644 C-Parcer.py create mode 100644 lang-to-asm.py diff --git a/C-Parcer.py b/C-Parcer.py new file mode 100644 index 0000000..29b0656 --- /dev/null +++ b/C-Parcer.py @@ -0,0 +1,309 @@ + + + +# User settings + +filename = "main.c" +DEBUG = False + + + + + + + + + + + + + + + + + + + + + + + + + + +types = ['int', 'void'] # Recognized types + +variable_data = {} + + +CurrentCompilerLine = 1 + + +import re + + +def CompileError(message): + global CurrentCompilerLine + column = 0 + print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}") + + +def DEBUG_MODE_PRINT(*args): + if DEBUG: + print(*args) + + + +# Tokenization regex for splitting code into meaningful segments +tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})') + +# Function to parse C code into chunks +def parse_code_to_chunks(code): + chunks = [] # List to hold all chunks + current_chunk = [] # Temporary list for tokens in the current chunk + tokens = tokenizer.findall(code) # Split the code into tokens + + for token in tokens: + if token == ';': # End of a statement + if current_chunk: + #current_chunk.append(token) # Include the semicolon + chunks.append(current_chunk) # Finalize the chunk + current_chunk = [] # Reset for the next chunk + elif token in ['{', '}']: # Start or end of a block + if current_chunk: + chunks.append(current_chunk) # Finalize the current chunk + current_chunk = [] + chunks.append([token]) # Braces are standalone chunks + else: + current_chunk.append(token) # Add token to the current chunk + + # Add any remaining tokens as a final chunk + if current_chunk: + chunks.append(current_chunk) + + return chunks + + +def classify_assignment(assignment): + # Split the assignment into variable and value + var, value = assignment.split('=', 1) + var = var.strip() + value = value.strip() + + # Check if the value is a single number (constant) + if value.isdigit(): + return 1 # Single number + + # Check if the value is a simple arithmetic expression + elif any(op in value for op in ['+', '-', '*', '/']): + # Check for parentheses and handle them recursively + if '(' in value and ')' in value: + # Expression with parentheses: check for math with variables or constants + return 3 # Math between variables or variables/constants, including parentheses + else: + # Extract operands and operator to handle math without parentheses + operator = next((op for op in ['+', '-', '*', '/'] if op in value), None) + left, right = [p.strip() for p in value.split(operator, 1)] + + if left.isdigit() and right.isdigit(): + return 2 # Math between two numbers + elif left.isidentifier() or right.isidentifier(): + return 3 # Math between variables or variables/constants + + # Check if the value is equal to another variable + elif value.isidentifier(): + return 4 # Equal to another variable + + # Return 0 if the assignment does not match any known category + return 0 + + + +import re + +def parse_operands(equation: str): + # Remove spaces for easier handling + equation = equation.replace(" ", "") + + # Base case: if no parentheses, split by the operator + if '(' not in equation and ')' not in equation: + return parse_simple_expression(equation) + + # If parentheses are present, handle recursively + while '(' in equation: + # Find the innermost parentheses expression + innermost = re.search(r'\(([^()]+)\)', equation) + if innermost: + # Recursively parse the expression inside parentheses + inner_expr = innermost.group(1) + result = parse_simple_expression(inner_expr) + # Replace the expression inside parentheses with the parsed result + equation = equation.replace(f'({inner_expr})', result) + + return parse_simple_expression(equation) + +def parse_simple_expression(equation): + # This function handles simple expressions without parentheses + match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation) + if match: + operand1 = match.group(1) # The first operand (could be a variable or constant) + operator = match.group(2) # The operator (+, -, *, /) + operand2 = match.group(3) # The second operand (could be a variable or constant) + + # Return operands in a list, this can be extended for more complex parsing + return [operand1, operand2] + else: + raise ValueError(f"Invalid equation format: {equation}") + + + +def create_new_variable(name, address): + variable_data[name] = address + +def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]: + equasion = " ".join(equasion) + #DEBUG_MODE_PRINT(equasion) + output = [] + __ = equasion.split("=") + value = __[1].strip() + del __ + + + + #DEBUG_MODE_PRINT(value) + if classify_assignment(equasion) == 1: + # constant + # int x = 1; + DEBUG_MODE_PRINT("1 >", value) + + output.append([f'ldw a, {value}']) + output.append([f'str a, {output_address}']) + + elif classify_assignment(equasion) == 4: + DEBUG_MODE_PRINT("4 >", value) + # equal to variable + # int x = y + address = variable_data[value] + output.append([f'ldr a, {address}']) # move the data in address to register a + output.append([f'str a, {output_address}']) # put register a into free_memory_address + + elif classify_assignment(equasion) == 3: + + # arithmetic expression: example x = y + 5; + # Evaluate the expression and store the result + DEBUG_MODE_PRINT("3 >", value) + operands = parse_operands(value) # You need to parse operands like 'y + 5' + if operands[0].isidentifier(): + output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a + else: + output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a + + if operands[1].isidentifier(): + output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a + else: + output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a + + output.append([f'add a, b']) # perform addition a = y + 5 + + + # Store in variable or in new variable + output.append([f'str a, {output_address}']) # store result into memory address + + else: + # Not Implemented + DEBUG_MODE_PRINT("NotImpl: ",equasion) + + return output + + +# Read the C code from the file +with open(filename, "r") as f: + code = f.read() + +# Preprocess the code: remove newlines and excess whitespace +code = re.sub(r'\s+', ' ', code.strip()) +# Parse the code into chunks +chunks = parse_code_to_chunks(code) +#DEBUG_MODE_PRINT("Chunks:", chunks) +# Extract functions and variables + + + + +free_memory_address = 0xFF + + +output = [] + + + +for chunk in chunks: + #DEBUG_MODE_PRINT(chunk) + chunk: list + + if chunk[0] in types: + if "=" in chunk: + if chunk[0] in variable_data: + CompileError(f"Redefinition of variable: '{chunk[0]}'") + create_new_variable(chunk[1], free_memory_address) + output += equasion_to_asm(chunk, free_memory_address) + free_memory_address -= 1 + elif "=" in chunk: + #DEBUG_MODE_PRINT(chunk) + if chunk[0] in variable_data: + + output += equasion_to_asm(chunk, free_memory_address) + else: + CompileError(f"Undefined Variable: '{chunk[0]}'") + else: + # DEBUG_MODE_PRINT(chunk) + for i,_ in enumerate(chunk): + chunk[i] = chunk[i].strip("{}") + + + if chunk != ['']: + #print(chunk) + + if chunk[0] == 'return': + # handle return statements + pass + elif chunk[0].isidentifier(): + #print("Function Name: " + str(chunk[0])) + function_name = chunk[0] + + arguments = chunk[1:] + #print(arguments) + + + for itter, argument in enumerate(arguments): + if argument == ',' or argument == '(' or argument == ')': + arguments.pop(itter) + continue + + arguments[itter] = argument + + + #print(arguments) + + if function_name == 'syscall': + output += [f'int {int(arguments[0], 16)}'] + + + + + + + + + CurrentCompilerLine += 1 + + + + + +with open("output.asm", "w") as f: + # Write the assembly code to the output file + f.write("main:\n ") + for line in output: + f.write("".join(line)) + f.write("\n ") \ No newline at end of file diff --git a/example.asm b/example.asm index f3f8c57..bf3bc4b 100644 --- a/example.asm +++ b/example.asm @@ -1,3 +1,5 @@ +;; Interupt list + ; 0x00 -> print register a as char ; 0x01 -> print register a as int ; @@ -43,6 +45,34 @@ ; (C) byte offset ; (D) byte to write +; Instructions +; +; add ; adds 2 registers together acumulating in the first register +; sub ; same as add ; except subtracts +; ldw ; load word, load a immediate to a register +; ldr ; load a byte from a adderss to a register +; ldb ; load a byte from an adress (address stored in a register) to another register +; str ; stores from a register to an address +; jsr ; jump to a label +; push/pop ; stack stuff, takes 1 register +; ret ; return (pops off of stack) +; mov ; move a value from one register to another +; mul ; multiply takes 2 registers +; div ; divide takes 2 registers +; bne/beq ; branch not equal example: b(n)e a, b, Label1 +; +; +; +; +; +; + +; Campialer option +; %include include anotehr asm file +; %define all accurances of the define will be raplaced + + + main: diff --git a/lang-to-asm.py b/lang-to-asm.py new file mode 100644 index 0000000..64c58ba --- /dev/null +++ b/lang-to-asm.py @@ -0,0 +1,117 @@ +def parse_math_to_instructions(math_expression, memory_map): + """ + Converts a simple math expression or variable assignment to assembly instructions. + :param math_expression: The math expression to convert. + :param memory_map: A dictionary tracking variable memory locations. + :return: A list of assembly instructions. + """ + instructions = [] + temp_memory_address = 0xFF # Starting memory address for variables + + # Check for assignment + if '=' in math_expression: + var_name, expr = map(str.strip, math_expression.split('=')) + expr = expr.strip(';') # Remove trailing semicolon + var_name = var_name.strip() + + var_name = var_name.strip("int ") + + # Generate instructions for the expression + expr_instructions, result_register = parse_expression(expr, memory_map, temp_memory_address) + instructions.extend(expr_instructions) + + # Assign the result to the variable + if var_name not in memory_map: + memory_map[var_name] = temp_memory_address + temp_memory_address -= 1 + instructions.append(f"str {result_register}, 0x{memory_map[var_name]:X} ; Save variable {var_name} in memory") + else: + # Generate instructions for the expression + expr_instructions, _ = parse_expression(math_expression, memory_map, temp_memory_address) + instructions.extend(expr_instructions) + + return instructions + +def precedence(op): + """ + Returns the precedence of the given operator. + """ + if op in ('+', '-'): + return 1 + if op in ('*', '/'): + return 2 + return 0 + +def apply_operator(instructions, operand_stack, operator): + """ + Applies an operator to the top two operands in the operand stack and generates instructions. + """ + b = operand_stack.pop() + a = operand_stack.pop() + if operator == '+': + instructions.append("add a, b") + elif operator == '-': + instructions.append("sub a, b") + elif operator == '*': + instructions.append("mul a, b") + elif operator == '/': + instructions.append("div a, b") + operand_stack.append('a') + +def parse_expression(expr, memory_map, temp_memory_address): + """ + Parses a math expression and generates instructions. + :param expr: The math expression. + :param memory_map: Memory map for variables. + :param temp_memory_address: Memory address to use for new variables. + :return: (list of instructions, result_register) + """ + instructions = [] + tokens = expr.replace('(', ' ( ').replace(')', ' ) ').split() + operator_stack = [] + operand_stack = [] + + for token in tokens: + if token.isdigit(): + if 'a' not in operand_stack: + instructions.append(f"ldw a, {token}") + operand_stack.append('a') + else: + instructions.append(f"ldw b, {token}") + operand_stack.append('b') + elif token in memory_map: + if 'a' not in operand_stack: + instructions.append(f"ldr a, 0x{memory_map[token]:X}") + operand_stack.append('a') + else: + instructions.append(f"ldr b, 0x{memory_map[token]:X}") + operand_stack.append('b') + elif token in ['+', '-', '*', '/']: + while (operator_stack and precedence(operator_stack[-1]) >= precedence(token)): + apply_operator(instructions, operand_stack, operator_stack.pop()) + operator_stack.append(token) + elif token == '(': + operator_stack.append(token) + elif token == ')': + while operator_stack and operator_stack[-1] != '(': + apply_operator(instructions, operand_stack, operator_stack.pop()) + operator_stack.pop() # Remove '(' + + while operator_stack: + apply_operator(instructions, operand_stack, operator_stack.pop()) + + return instructions, 'a' + +# Example Usage +memory_map = {} +expressions = [ + "1 + ( 3 + 8 )", + "int variable = 1 + ( 3 + 8 );", + "int variable_dose = variable + ( 4 + 4 );" +] + +for expr in expressions: + instructions = parse_math_to_instructions(expr, memory_map) + for instr in instructions: + print(instr) + print('; - - - - - - - - - -') \ No newline at end of file diff --git a/try_to_fix_me.asm b/try_to_fix_me.asm index 810d721..59e142b 100644 --- a/try_to_fix_me.asm +++ b/try_to_fix_me.asm @@ -47,7 +47,7 @@ main: ldw a, 0xFF ; load 255 to A register, Unused - str a, 0x0F ; will cause an error because its writing to program memory + str a, 0xEF ; will cause an error because its writing to program memory ; You will have to make the address bigger than the program int 0x00 ; interupt to print a register to terminal as a char (debug) @@ -59,7 +59,7 @@ main: ldw a, 600 ; screen height str a, 0xF2 ; copy A register to memory address - jsr _init_graphics_mode + jsr init_graphics_mode @@ -68,7 +68,7 @@ init_graphics_mode: push b push c - ldw g, 0xFF ; This is never used + ldw a, 0xFF ; This is never used ldr a, 0xF0 ; copy data from memory address to register 'A' ldr b, 0xF1 ; copy data from memory address to register 'B' @@ -77,6 +77,7 @@ init_graphics_mode: int 0x70 ; all interupts in 0x70 - 0x7F are for graphics, ; only 0x70 to 0x72 are implemented + pop c pop b pop a