added files from previous
This commit is contained in:
parent
5ea18b5958
commit
ddd64d397a
309
C-Parcer.py
Normal file
309
C-Parcer.py
Normal file
@ -0,0 +1,309 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# User settings
|
||||||
|
|
||||||
|
filename = "main.c"
|
||||||
|
DEBUG = False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
types = ['int', 'void'] # Recognized types
|
||||||
|
|
||||||
|
variable_data = {}
|
||||||
|
|
||||||
|
|
||||||
|
CurrentCompilerLine = 1
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def CompileError(message):
|
||||||
|
global CurrentCompilerLine
|
||||||
|
column = 0
|
||||||
|
print(f"{filename}:{CurrentCompilerLine}:{column}: Error: {message}")
|
||||||
|
|
||||||
|
|
||||||
|
def DEBUG_MODE_PRINT(*args):
|
||||||
|
if DEBUG:
|
||||||
|
print(*args)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Tokenization regex for splitting code into meaningful segments
|
||||||
|
tokenizer = re.compile(r'(\w+|\(|\)|,|;|=|\+|-|\*|/|{|})')
|
||||||
|
|
||||||
|
# Function to parse C code into chunks
|
||||||
|
def parse_code_to_chunks(code):
|
||||||
|
chunks = [] # List to hold all chunks
|
||||||
|
current_chunk = [] # Temporary list for tokens in the current chunk
|
||||||
|
tokens = tokenizer.findall(code) # Split the code into tokens
|
||||||
|
|
||||||
|
for token in tokens:
|
||||||
|
if token == ';': # End of a statement
|
||||||
|
if current_chunk:
|
||||||
|
#current_chunk.append(token) # Include the semicolon
|
||||||
|
chunks.append(current_chunk) # Finalize the chunk
|
||||||
|
current_chunk = [] # Reset for the next chunk
|
||||||
|
elif token in ['{', '}']: # Start or end of a block
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk) # Finalize the current chunk
|
||||||
|
current_chunk = []
|
||||||
|
chunks.append([token]) # Braces are standalone chunks
|
||||||
|
else:
|
||||||
|
current_chunk.append(token) # Add token to the current chunk
|
||||||
|
|
||||||
|
# Add any remaining tokens as a final chunk
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def classify_assignment(assignment):
|
||||||
|
# Split the assignment into variable and value
|
||||||
|
var, value = assignment.split('=', 1)
|
||||||
|
var = var.strip()
|
||||||
|
value = value.strip()
|
||||||
|
|
||||||
|
# Check if the value is a single number (constant)
|
||||||
|
if value.isdigit():
|
||||||
|
return 1 # Single number
|
||||||
|
|
||||||
|
# Check if the value is a simple arithmetic expression
|
||||||
|
elif any(op in value for op in ['+', '-', '*', '/']):
|
||||||
|
# Check for parentheses and handle them recursively
|
||||||
|
if '(' in value and ')' in value:
|
||||||
|
# Expression with parentheses: check for math with variables or constants
|
||||||
|
return 3 # Math between variables or variables/constants, including parentheses
|
||||||
|
else:
|
||||||
|
# Extract operands and operator to handle math without parentheses
|
||||||
|
operator = next((op for op in ['+', '-', '*', '/'] if op in value), None)
|
||||||
|
left, right = [p.strip() for p in value.split(operator, 1)]
|
||||||
|
|
||||||
|
if left.isdigit() and right.isdigit():
|
||||||
|
return 2 # Math between two numbers
|
||||||
|
elif left.isidentifier() or right.isidentifier():
|
||||||
|
return 3 # Math between variables or variables/constants
|
||||||
|
|
||||||
|
# Check if the value is equal to another variable
|
||||||
|
elif value.isidentifier():
|
||||||
|
return 4 # Equal to another variable
|
||||||
|
|
||||||
|
# Return 0 if the assignment does not match any known category
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
def parse_operands(equation: str):
|
||||||
|
# Remove spaces for easier handling
|
||||||
|
equation = equation.replace(" ", "")
|
||||||
|
|
||||||
|
# Base case: if no parentheses, split by the operator
|
||||||
|
if '(' not in equation and ')' not in equation:
|
||||||
|
return parse_simple_expression(equation)
|
||||||
|
|
||||||
|
# If parentheses are present, handle recursively
|
||||||
|
while '(' in equation:
|
||||||
|
# Find the innermost parentheses expression
|
||||||
|
innermost = re.search(r'\(([^()]+)\)', equation)
|
||||||
|
if innermost:
|
||||||
|
# Recursively parse the expression inside parentheses
|
||||||
|
inner_expr = innermost.group(1)
|
||||||
|
result = parse_simple_expression(inner_expr)
|
||||||
|
# Replace the expression inside parentheses with the parsed result
|
||||||
|
equation = equation.replace(f'({inner_expr})', result)
|
||||||
|
|
||||||
|
return parse_simple_expression(equation)
|
||||||
|
|
||||||
|
def parse_simple_expression(equation):
|
||||||
|
# This function handles simple expressions without parentheses
|
||||||
|
match = re.match(r'([a-zA-Z0-9_]+|\d+)\s*([+\-*/])\s*([a-zA-Z0-9_]+|\d+)', equation)
|
||||||
|
if match:
|
||||||
|
operand1 = match.group(1) # The first operand (could be a variable or constant)
|
||||||
|
operator = match.group(2) # The operator (+, -, *, /)
|
||||||
|
operand2 = match.group(3) # The second operand (could be a variable or constant)
|
||||||
|
|
||||||
|
# Return operands in a list, this can be extended for more complex parsing
|
||||||
|
return [operand1, operand2]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid equation format: {equation}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def create_new_variable(name, address):
|
||||||
|
variable_data[name] = address
|
||||||
|
|
||||||
|
def equasion_to_asm(equasion:list, output_address:int) -> list[list[str]]:
|
||||||
|
equasion = " ".join(equasion)
|
||||||
|
#DEBUG_MODE_PRINT(equasion)
|
||||||
|
output = []
|
||||||
|
__ = equasion.split("=")
|
||||||
|
value = __[1].strip()
|
||||||
|
del __
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#DEBUG_MODE_PRINT(value)
|
||||||
|
if classify_assignment(equasion) == 1:
|
||||||
|
# constant
|
||||||
|
# int x = 1;
|
||||||
|
DEBUG_MODE_PRINT("1 >", value)
|
||||||
|
|
||||||
|
output.append([f'ldw a, {value}'])
|
||||||
|
output.append([f'str a, {output_address}'])
|
||||||
|
|
||||||
|
elif classify_assignment(equasion) == 4:
|
||||||
|
DEBUG_MODE_PRINT("4 >", value)
|
||||||
|
# equal to variable
|
||||||
|
# int x = y
|
||||||
|
address = variable_data[value]
|
||||||
|
output.append([f'ldr a, {address}']) # move the data in address to register a
|
||||||
|
output.append([f'str a, {output_address}']) # put register a into free_memory_address
|
||||||
|
|
||||||
|
elif classify_assignment(equasion) == 3:
|
||||||
|
|
||||||
|
# arithmetic expression: example x = y + 5;
|
||||||
|
# Evaluate the expression and store the result
|
||||||
|
DEBUG_MODE_PRINT("3 >", value)
|
||||||
|
operands = parse_operands(value) # You need to parse operands like 'y + 5'
|
||||||
|
if operands[0].isidentifier():
|
||||||
|
output.append([f'ldr a, {variable_data[operands[0]]}']) # load value of 'y' into a
|
||||||
|
else:
|
||||||
|
output.append([f'ldw a, {operands[0]}']) # load value of 'y' into a
|
||||||
|
|
||||||
|
if operands[1].isidentifier():
|
||||||
|
output.append([f'ldr b, {variable_data[operands[1]]}']) # load value of 'y' into a
|
||||||
|
else:
|
||||||
|
output.append([f'ldw b, {operands[1]}']) # load value of 'y' into a
|
||||||
|
|
||||||
|
output.append([f'add a, b']) # perform addition a = y + 5
|
||||||
|
|
||||||
|
|
||||||
|
# Store in variable or in new variable
|
||||||
|
output.append([f'str a, {output_address}']) # store result into memory address
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Not Implemented
|
||||||
|
DEBUG_MODE_PRINT("NotImpl: ",equasion)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
# Read the C code from the file
|
||||||
|
with open(filename, "r") as f:
|
||||||
|
code = f.read()
|
||||||
|
|
||||||
|
# Preprocess the code: remove newlines and excess whitespace
|
||||||
|
code = re.sub(r'\s+', ' ', code.strip())
|
||||||
|
# Parse the code into chunks
|
||||||
|
chunks = parse_code_to_chunks(code)
|
||||||
|
#DEBUG_MODE_PRINT("Chunks:", chunks)
|
||||||
|
# Extract functions and variables
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
free_memory_address = 0xFF
|
||||||
|
|
||||||
|
|
||||||
|
output = []
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
for chunk in chunks:
|
||||||
|
#DEBUG_MODE_PRINT(chunk)
|
||||||
|
chunk: list
|
||||||
|
|
||||||
|
if chunk[0] in types:
|
||||||
|
if "=" in chunk:
|
||||||
|
if chunk[0] in variable_data:
|
||||||
|
CompileError(f"Redefinition of variable: '{chunk[0]}'")
|
||||||
|
create_new_variable(chunk[1], free_memory_address)
|
||||||
|
output += equasion_to_asm(chunk, free_memory_address)
|
||||||
|
free_memory_address -= 1
|
||||||
|
elif "=" in chunk:
|
||||||
|
#DEBUG_MODE_PRINT(chunk)
|
||||||
|
if chunk[0] in variable_data:
|
||||||
|
|
||||||
|
output += equasion_to_asm(chunk, free_memory_address)
|
||||||
|
else:
|
||||||
|
CompileError(f"Undefined Variable: '{chunk[0]}'")
|
||||||
|
else:
|
||||||
|
# DEBUG_MODE_PRINT(chunk)
|
||||||
|
for i,_ in enumerate(chunk):
|
||||||
|
chunk[i] = chunk[i].strip("{}")
|
||||||
|
|
||||||
|
|
||||||
|
if chunk != ['']:
|
||||||
|
#print(chunk)
|
||||||
|
|
||||||
|
if chunk[0] == 'return':
|
||||||
|
# handle return statements
|
||||||
|
pass
|
||||||
|
elif chunk[0].isidentifier():
|
||||||
|
#print("Function Name: " + str(chunk[0]))
|
||||||
|
function_name = chunk[0]
|
||||||
|
|
||||||
|
arguments = chunk[1:]
|
||||||
|
#print(arguments)
|
||||||
|
|
||||||
|
|
||||||
|
for itter, argument in enumerate(arguments):
|
||||||
|
if argument == ',' or argument == '(' or argument == ')':
|
||||||
|
arguments.pop(itter)
|
||||||
|
continue
|
||||||
|
|
||||||
|
arguments[itter] = argument
|
||||||
|
|
||||||
|
|
||||||
|
#print(arguments)
|
||||||
|
|
||||||
|
if function_name == 'syscall':
|
||||||
|
output += [f'int {int(arguments[0], 16)}']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CurrentCompilerLine += 1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
with open("output.asm", "w") as f:
|
||||||
|
# Write the assembly code to the output file
|
||||||
|
f.write("main:\n ")
|
||||||
|
for line in output:
|
||||||
|
f.write("".join(line))
|
||||||
|
f.write("\n ")
|
30
example.asm
30
example.asm
@ -1,3 +1,5 @@
|
|||||||
|
;; Interupt list
|
||||||
|
|
||||||
; 0x00 -> print register a as char
|
; 0x00 -> print register a as char
|
||||||
; 0x01 -> print register a as int
|
; 0x01 -> print register a as int
|
||||||
;
|
;
|
||||||
@ -43,6 +45,34 @@
|
|||||||
; (C) byte offset
|
; (C) byte offset
|
||||||
; (D) byte to write
|
; (D) byte to write
|
||||||
|
|
||||||
|
; Instructions
|
||||||
|
;
|
||||||
|
; add ; adds 2 registers together acumulating in the first register
|
||||||
|
; sub ; same as add ; except subtracts
|
||||||
|
; ldw ; load word, load a immediate to a register
|
||||||
|
; ldr ; load a byte from a adderss to a register
|
||||||
|
; ldb ; load a byte from an adress (address stored in a register) to another register
|
||||||
|
; str ; stores from a register to an address
|
||||||
|
; jsr ; jump to a label
|
||||||
|
; push/pop ; stack stuff, takes 1 register
|
||||||
|
; ret ; return (pops off of stack)
|
||||||
|
; mov ; move a value from one register to another
|
||||||
|
; mul ; multiply takes 2 registers
|
||||||
|
; div ; divide takes 2 registers
|
||||||
|
; bne/beq ; branch not equal example: b(n)e a, b, Label1
|
||||||
|
;
|
||||||
|
;
|
||||||
|
;
|
||||||
|
;
|
||||||
|
;
|
||||||
|
;
|
||||||
|
|
||||||
|
; Campialer option
|
||||||
|
; %include include anotehr asm file
|
||||||
|
; %define all accurances of the define will be raplaced
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
main:
|
main:
|
||||||
|
117
lang-to-asm.py
Normal file
117
lang-to-asm.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
def parse_math_to_instructions(math_expression, memory_map):
|
||||||
|
"""
|
||||||
|
Converts a simple math expression or variable assignment to assembly instructions.
|
||||||
|
:param math_expression: The math expression to convert.
|
||||||
|
:param memory_map: A dictionary tracking variable memory locations.
|
||||||
|
:return: A list of assembly instructions.
|
||||||
|
"""
|
||||||
|
instructions = []
|
||||||
|
temp_memory_address = 0xFF # Starting memory address for variables
|
||||||
|
|
||||||
|
# Check for assignment
|
||||||
|
if '=' in math_expression:
|
||||||
|
var_name, expr = map(str.strip, math_expression.split('='))
|
||||||
|
expr = expr.strip(';') # Remove trailing semicolon
|
||||||
|
var_name = var_name.strip()
|
||||||
|
|
||||||
|
var_name = var_name.strip("int ")
|
||||||
|
|
||||||
|
# Generate instructions for the expression
|
||||||
|
expr_instructions, result_register = parse_expression(expr, memory_map, temp_memory_address)
|
||||||
|
instructions.extend(expr_instructions)
|
||||||
|
|
||||||
|
# Assign the result to the variable
|
||||||
|
if var_name not in memory_map:
|
||||||
|
memory_map[var_name] = temp_memory_address
|
||||||
|
temp_memory_address -= 1
|
||||||
|
instructions.append(f"str {result_register}, 0x{memory_map[var_name]:X} ; Save variable {var_name} in memory")
|
||||||
|
else:
|
||||||
|
# Generate instructions for the expression
|
||||||
|
expr_instructions, _ = parse_expression(math_expression, memory_map, temp_memory_address)
|
||||||
|
instructions.extend(expr_instructions)
|
||||||
|
|
||||||
|
return instructions
|
||||||
|
|
||||||
|
def precedence(op):
|
||||||
|
"""
|
||||||
|
Returns the precedence of the given operator.
|
||||||
|
"""
|
||||||
|
if op in ('+', '-'):
|
||||||
|
return 1
|
||||||
|
if op in ('*', '/'):
|
||||||
|
return 2
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def apply_operator(instructions, operand_stack, operator):
|
||||||
|
"""
|
||||||
|
Applies an operator to the top two operands in the operand stack and generates instructions.
|
||||||
|
"""
|
||||||
|
b = operand_stack.pop()
|
||||||
|
a = operand_stack.pop()
|
||||||
|
if operator == '+':
|
||||||
|
instructions.append("add a, b")
|
||||||
|
elif operator == '-':
|
||||||
|
instructions.append("sub a, b")
|
||||||
|
elif operator == '*':
|
||||||
|
instructions.append("mul a, b")
|
||||||
|
elif operator == '/':
|
||||||
|
instructions.append("div a, b")
|
||||||
|
operand_stack.append('a')
|
||||||
|
|
||||||
|
def parse_expression(expr, memory_map, temp_memory_address):
|
||||||
|
"""
|
||||||
|
Parses a math expression and generates instructions.
|
||||||
|
:param expr: The math expression.
|
||||||
|
:param memory_map: Memory map for variables.
|
||||||
|
:param temp_memory_address: Memory address to use for new variables.
|
||||||
|
:return: (list of instructions, result_register)
|
||||||
|
"""
|
||||||
|
instructions = []
|
||||||
|
tokens = expr.replace('(', ' ( ').replace(')', ' ) ').split()
|
||||||
|
operator_stack = []
|
||||||
|
operand_stack = []
|
||||||
|
|
||||||
|
for token in tokens:
|
||||||
|
if token.isdigit():
|
||||||
|
if 'a' not in operand_stack:
|
||||||
|
instructions.append(f"ldw a, {token}")
|
||||||
|
operand_stack.append('a')
|
||||||
|
else:
|
||||||
|
instructions.append(f"ldw b, {token}")
|
||||||
|
operand_stack.append('b')
|
||||||
|
elif token in memory_map:
|
||||||
|
if 'a' not in operand_stack:
|
||||||
|
instructions.append(f"ldr a, 0x{memory_map[token]:X}")
|
||||||
|
operand_stack.append('a')
|
||||||
|
else:
|
||||||
|
instructions.append(f"ldr b, 0x{memory_map[token]:X}")
|
||||||
|
operand_stack.append('b')
|
||||||
|
elif token in ['+', '-', '*', '/']:
|
||||||
|
while (operator_stack and precedence(operator_stack[-1]) >= precedence(token)):
|
||||||
|
apply_operator(instructions, operand_stack, operator_stack.pop())
|
||||||
|
operator_stack.append(token)
|
||||||
|
elif token == '(':
|
||||||
|
operator_stack.append(token)
|
||||||
|
elif token == ')':
|
||||||
|
while operator_stack and operator_stack[-1] != '(':
|
||||||
|
apply_operator(instructions, operand_stack, operator_stack.pop())
|
||||||
|
operator_stack.pop() # Remove '('
|
||||||
|
|
||||||
|
while operator_stack:
|
||||||
|
apply_operator(instructions, operand_stack, operator_stack.pop())
|
||||||
|
|
||||||
|
return instructions, 'a'
|
||||||
|
|
||||||
|
# Example Usage
|
||||||
|
memory_map = {}
|
||||||
|
expressions = [
|
||||||
|
"1 + ( 3 + 8 )",
|
||||||
|
"int variable = 1 + ( 3 + 8 );",
|
||||||
|
"int variable_dose = variable + ( 4 + 4 );"
|
||||||
|
]
|
||||||
|
|
||||||
|
for expr in expressions:
|
||||||
|
instructions = parse_math_to_instructions(expr, memory_map)
|
||||||
|
for instr in instructions:
|
||||||
|
print(instr)
|
||||||
|
print('; - - - - - - - - - -')
|
@ -47,7 +47,7 @@
|
|||||||
|
|
||||||
main:
|
main:
|
||||||
ldw a, 0xFF ; load 255 to A register, Unused
|
ldw a, 0xFF ; load 255 to A register, Unused
|
||||||
str a, 0x0F ; will cause an error because its writing to program memory
|
str a, 0xEF ; will cause an error because its writing to program memory
|
||||||
; You will have to make the address bigger than the program
|
; You will have to make the address bigger than the program
|
||||||
int 0x00 ; interupt to print a register to terminal as a char (debug)
|
int 0x00 ; interupt to print a register to terminal as a char (debug)
|
||||||
|
|
||||||
@ -59,7 +59,7 @@ main:
|
|||||||
|
|
||||||
ldw a, 600 ; screen height
|
ldw a, 600 ; screen height
|
||||||
str a, 0xF2 ; copy A register to memory address
|
str a, 0xF2 ; copy A register to memory address
|
||||||
jsr _init_graphics_mode
|
jsr init_graphics_mode
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -68,7 +68,7 @@ init_graphics_mode:
|
|||||||
push b
|
push b
|
||||||
push c
|
push c
|
||||||
|
|
||||||
ldw g, 0xFF ; This is never used
|
ldw a, 0xFF ; This is never used
|
||||||
|
|
||||||
ldr a, 0xF0 ; copy data from memory address to register 'A'
|
ldr a, 0xF0 ; copy data from memory address to register 'A'
|
||||||
ldr b, 0xF1 ; copy data from memory address to register 'B'
|
ldr b, 0xF1 ; copy data from memory address to register 'B'
|
||||||
@ -77,6 +77,7 @@ init_graphics_mode:
|
|||||||
int 0x70 ; all interupts in 0x70 - 0x7F are for graphics,
|
int 0x70 ; all interupts in 0x70 - 0x7F are for graphics,
|
||||||
; only 0x70 to 0x72 are implemented
|
; only 0x70 to 0x72 are implemented
|
||||||
|
|
||||||
|
pop c
|
||||||
pop b
|
pop b
|
||||||
pop a
|
pop a
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user