added files from previous

This commit is contained in:
Spencer Conlon 2024-12-24 05:12:44 +00:00
parent 5ea18b5958
commit ddd64d397a
4 changed files with 460 additions and 3 deletions

309
C-Parcer.py Normal file
View File

@ -0,0 +1,309 @@
# User settings
filename = "main.c"
DEBUG = False
types = ['int', 'void'] # Recognized types
variable_data = {}
CurrentCompilerLine = 1
import re
def CompileError(message):
global CurrentCompilerLine
column = 0
print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}")
def DEBUG_MODE_PRINT(*args):
if DEBUG:
print(*args)
# Tokenization regex for splitting code into meaningful segments
tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})')
# Function to parse C code into chunks
def parse_code_to_chunks(code):
chunks = [] # List to hold all chunks
current_chunk = [] # Temporary list for tokens in the current chunk
tokens = tokenizer.findall(code) # Split the code into tokens
for token in tokens:
if token == ';': # End of a statement
if current_chunk:
#current_chunk.append(token) # Include the semicolon
chunks.append(current_chunk) # Finalize the chunk
current_chunk = [] # Reset for the next chunk
elif token in ['{', '}']: # Start or end of a block
if current_chunk:
chunks.append(current_chunk) # Finalize the current chunk
current_chunk = []
chunks.append([token]) # Braces are standalone chunks
else:
current_chunk.append(token) # Add token to the current chunk
# Add any remaining tokens as a final chunk
if current_chunk:
chunks.append(current_chunk)
return chunks
def classify_assignment(assignment):
# Split the assignment into variable and value
var, value = assignment.split('=', 1)
var = var.strip()
value = value.strip()
# Check if the value is a single number (constant)
if value.isdigit():
return 1 # Single number
# Check if the value is a simple arithmetic expression
elif any(op in value for op in ['+', '-', '*', '/']):
# Check for parentheses and handle them recursively
if '(' in value and ')' in value:
# Expression with parentheses: check for math with variables or constants
return 3 # Math between variables or variables/constants, including parentheses
else:
# Extract operands and operator to handle math without parentheses
operator = next((op for op in ['+', '-', '*', '/'] if op in value), None)
left, right = [p.strip() for p in value.split(operator, 1)]
if left.isdigit() and right.isdigit():
return 2 # Math between two numbers
elif left.isidentifier() or right.isidentifier():
return 3 # Math between variables or variables/constants
# Check if the value is equal to another variable
elif value.isidentifier():
return 4 # Equal to another variable
# Return 0 if the assignment does not match any known category
return 0
import re
def parse_operands(equation: str):
# Remove spaces for easier handling
equation = equation.replace(" ", "")
# Base case: if no parentheses, split by the operator
if '(' not in equation and ')' not in equation:
return parse_simple_expression(equation)
# If parentheses are present, handle recursively
while '(' in equation:
# Find the innermost parentheses expression
innermost = re.search(r'\(([^()]+)\)', equation)
if innermost:
# Recursively parse the expression inside parentheses
inner_expr = innermost.group(1)
result = parse_simple_expression(inner_expr)
# Replace the expression inside parentheses with the parsed result
equation = equation.replace(f'({inner_expr})', result)
return parse_simple_expression(equation)
def parse_simple_expression(equation):
# This function handles simple expressions without parentheses
match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation)
if match:
operand1 = match.group(1) # The first operand (could be a variable or constant)
operator = match.group(2) # The operator (+, -, *, /)
operand2 = match.group(3) # The second operand (could be a variable or constant)
# Return operands in a list, this can be extended for more complex parsing
return [operand1, operand2]
else:
raise ValueError(f"Invalid equation format: {equation}")
def create_new_variable(name, address):
variable_data[name] = address
def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]:
equasion = " ".join(equasion)
#DEBUG_MODE_PRINT(equasion)
output = []
__ = equasion.split("=")
value = __[1].strip()
del __
#DEBUG_MODE_PRINT(value)
if classify_assignment(equasion) == 1:
# constant
# int x = 1;
DEBUG_MODE_PRINT("1 >", value)
output.append([f'ldw a, {value}'])
output.append([f'str a, {output_address}'])
elif classify_assignment(equasion) == 4:
DEBUG_MODE_PRINT("4 >", value)
# equal to variable
# int x = y
address = variable_data[value]
output.append([f'ldr a, {address}']) # move the data in address to register a
output.append([f'str a, {output_address}']) # put register a into free_memory_address
elif classify_assignment(equasion) == 3:
# arithmetic expression: example x = y + 5;
# Evaluate the expression and store the result
DEBUG_MODE_PRINT("3 >", value)
operands = parse_operands(value) # You need to parse operands like 'y + 5'
if operands[0].isidentifier():
output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a
else:
output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a
if operands[1].isidentifier():
output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a
else:
output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a
output.append([f'add a, b']) # perform addition a = y + 5
# Store in variable or in new variable
output.append([f'str a, {output_address}']) # store result into memory address
else:
# Not Implemented
DEBUG_MODE_PRINT("NotImpl: ",equasion)
return output
# Read the C code from the file
with open(filename, "r") as f:
code = f.read()
# Preprocess the code: remove newlines and excess whitespace
code = re.sub(r'\s+', ' ', code.strip())
# Parse the code into chunks
chunks = parse_code_to_chunks(code)
#DEBUG_MODE_PRINT("Chunks:", chunks)
# Extract functions and variables
free_memory_address = 0xFF
output = []
for chunk in chunks:
#DEBUG_MODE_PRINT(chunk)
chunk: list
if chunk[0] in types:
if "=" in chunk:
if chunk[0] in variable_data:
CompileError(f"Redefinition of variable: '{chunk[0]}'")
create_new_variable(chunk[1], free_memory_address)
output += equasion_to_asm(chunk, free_memory_address)
free_memory_address -= 1
elif "=" in chunk:
#DEBUG_MODE_PRINT(chunk)
if chunk[0] in variable_data:
output += equasion_to_asm(chunk, free_memory_address)
else:
CompileError(f"Undefined Variable: '{chunk[0]}'")
else:
# DEBUG_MODE_PRINT(chunk)
for i,_ in enumerate(chunk):
chunk[i] = chunk[i].strip("{}")
if chunk != ['']:
#print(chunk)
if chunk[0] == 'return':
# handle return statements
pass
elif chunk[0].isidentifier():
#print("Function Name: " + str(chunk[0]))
function_name = chunk[0]
arguments = chunk[1:]
#print(arguments)
for itter, argument in enumerate(arguments):
if argument == ',' or argument == '(' or argument == ')':
arguments.pop(itter)
continue
arguments[itter] = argument
#print(arguments)
if function_name == 'syscall':
output += [f'int {int(arguments[0], 16)}']
CurrentCompilerLine += 1
with open("output.asm", "w") as f:
# Write the assembly code to the output file
f.write("main:\n ")
for line in output:
f.write("".join(line))
f.write("\n ")

View File

@ -1,3 +1,5 @@
;; Interupt list
; 0x00 -> print register a as char
; 0x01 -> print register a as int
;
@ -43,6 +45,34 @@
; (C) byte offset
; (D) byte to write
; Instructions
;
; add ; adds 2 registers together acumulating in the first register
; sub ; same as add ; except subtracts
; ldw ; load word, load a immediate to a register
; ldr ; load a byte from a adderss to a register
; ldb ; load a byte from an adress (address stored in a register) to another register
; str ; stores from a register to an address
; jsr ; jump to a label
; push/pop ; stack stuff, takes 1 register
; ret ; return (pops off of stack)
; mov ; move a value from one register to another
; mul ; multiply takes 2 registers
; div ; divide takes 2 registers
; bne/beq ; branch not equal example: b(n)e a, b, Label1
;
;
;
;
;
;
; Campialer option
; %include include anotehr asm file
; %define all accurances of the define will be raplaced
main:

117
lang-to-asm.py Normal file
View File

@ -0,0 +1,117 @@
def parse_math_to_instructions(math_expression, memory_map):
"""
Converts a simple math expression or variable assignment to assembly instructions.
:param math_expression: The math expression to convert.
:param memory_map: A dictionary tracking variable memory locations.
:return: A list of assembly instructions.
"""
instructions = []
temp_memory_address = 0xFF # Starting memory address for variables
# Check for assignment
if '=' in math_expression:
var_name, expr = map(str.strip, math_expression.split('='))
expr = expr.strip(';') # Remove trailing semicolon
var_name = var_name.strip()
var_name = var_name.strip("int ")
# Generate instructions for the expression
expr_instructions, result_register = parse_expression(expr, memory_map, temp_memory_address)
instructions.extend(expr_instructions)
# Assign the result to the variable
if var_name not in memory_map:
memory_map[var_name] = temp_memory_address
temp_memory_address -= 1
instructions.append(f"str {result_register}, 0x{memory_map[var_name]:X} ; Save variable {var_name} in memory")
else:
# Generate instructions for the expression
expr_instructions, _ = parse_expression(math_expression, memory_map, temp_memory_address)
instructions.extend(expr_instructions)
return instructions
def precedence(op):
"""
Returns the precedence of the given operator.
"""
if op in ('+', '-'):
return 1
if op in ('*', '/'):
return 2
return 0
def apply_operator(instructions, operand_stack, operator):
"""
Applies an operator to the top two operands in the operand stack and generates instructions.
"""
b = operand_stack.pop()
a = operand_stack.pop()
if operator == '+':
instructions.append("add a, b")
elif operator == '-':
instructions.append("sub a, b")
elif operator == '*':
instructions.append("mul a, b")
elif operator == '/':
instructions.append("div a, b")
operand_stack.append('a')
def parse_expression(expr, memory_map, temp_memory_address):
"""
Parses a math expression and generates instructions.
:param expr: The math expression.
:param memory_map: Memory map for variables.
:param temp_memory_address: Memory address to use for new variables.
:return: (list of instructions, result_register)
"""
instructions = []
tokens = expr.replace('(', ' ( ').replace(')', ' ) ').split()
operator_stack = []
operand_stack = []
for token in tokens:
if token.isdigit():
if 'a' not in operand_stack:
instructions.append(f"ldw a, {token}")
operand_stack.append('a')
else:
instructions.append(f"ldw b, {token}")
operand_stack.append('b')
elif token in memory_map:
if 'a' not in operand_stack:
instructions.append(f"ldr a, 0x{memory_map[token]:X}")
operand_stack.append('a')
else:
instructions.append(f"ldr b, 0x{memory_map[token]:X}")
operand_stack.append('b')
elif token in ['+', '-', '*', '/']:
while (operator_stack and precedence(operator_stack[-1]) >= precedence(token)):
apply_operator(instructions, operand_stack, operator_stack.pop())
operator_stack.append(token)
elif token == '(':
operator_stack.append(token)
elif token == ')':
while operator_stack and operator_stack[-1] != '(':
apply_operator(instructions, operand_stack, operator_stack.pop())
operator_stack.pop() # Remove '('
while operator_stack:
apply_operator(instructions, operand_stack, operator_stack.pop())
return instructions, 'a'
# Example Usage
memory_map = {}
expressions = [
"1 + ( 3 + 8 )",
"int variable = 1 + ( 3 + 8 );",
"int variable_dose = variable + ( 4 + 4 );"
]
for expr in expressions:
instructions = parse_math_to_instructions(expr, memory_map)
for instr in instructions:
print(instr)
print('; - - - - - - - - - -')

View File

@ -47,7 +47,7 @@
main:
ldw a, 0xFF ; load 255 to A register, Unused
str a, 0x0F ; will cause an error because its writing to program memory
str a, 0xEF ; will cause an error because its writing to program memory
; You will have to make the address bigger than the program
int 0x00 ; interupt to print a register to terminal as a char (debug)
@ -59,7 +59,7 @@ main:
ldw a, 600 ; screen height
str a, 0xF2 ; copy A register to memory address
jsr _init_graphics_mode
jsr init_graphics_mode
@ -68,7 +68,7 @@ init_graphics_mode:
push b
push c
ldw g, 0xFF ; This is never used
ldw a, 0xFF ; This is never used
ldr a, 0xF0 ; copy data from memory address to register 'A'
ldr b, 0xF1 ; copy data from memory address to register 'B'
@ -77,6 +77,7 @@ init_graphics_mode:
int 0x70 ; all interupts in 0x70 - 0x7F are for graphics,
; only 0x70 to 0x72 are implemented
pop c
pop b
pop a