Python-Cpu-Emulator/asm-to-prg.py
OusmBlueNinja b88b8bffc9 Main
2024-12-23 23:17:07 -06:00

914 lines
31 KiB
Python

filename = "test.asm"
#
# Change the filename here to the path of your asm file
# then copy the output to 'main.py' and replace the 'program' variable
# with the list, then run the 'main.py' file with python 3.11+
#
# there are 2 example programs, one that demonstrates bitmap mode
# and one that demonstrates text mode, with typing useing the bios
# interupts.
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
import re
from termcolor import colored
with open(filename,"r") as f:
lines = f.readlines()
def convert_to_int(value):
if isinstance(value, str): # Check if the value is a string
if value.startswith("0x"): # Handle hexadecimal strings
return int(value, 16)
else: # Handle decimal strings
return int(value)
elif isinstance(value, int): # Value is already an integer
return value
else:
raise ValueError(f"Unsupported type for conversion: {type(value)}")
import os
from termcolor import colored
macro_definitions = {}
def preprocess(lines, filename="main.asm", included_files=None):
if included_files is None:
included_files = set() # Tracks included files to prevent recursion
errors = []
warnings = []
error_flag = False
# Memory and stack tracking
instruction_count = 0
memory_limit = 1024 # Total memory available
stack_balance = 0
program_length = 0 # To calculate and validate memory access
# Valid registers and instructions
valid_registers = {"a", "b", "c", "d", "e", "f"}
valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
"push", "pop", "jsr", "ret", "xor", "and", "jmp",
"mul", "div", "bne", "beq", "blt", "ldb", "stb"}
label_references = []
labels = {}
# Expand include directives
expanded_lines = []
for line_number, line in enumerate(lines, start=1):
code = line.strip()
if code.startswith("%include"):
# Handle include directives
parts = code.split(maxsplit=1)
if len(parts) != 2:
errors.append((line_number, "Invalid %include syntax", line))
error_flag = True
continue
include_file = parts[1].strip("\"")
if include_file in included_files:
errors.append((line_number, f"Recursive inclusion detected for file '{include_file}'", line))
error_flag = True
continue
if not os.path.exists(include_file):
errors.append((line_number, f"Included file '{include_file}' not found", line))
error_flag = True
continue
try:
included_files.add(include_file)
with open(include_file, 'r') as f:
included_lines = f.readlines()
expanded_lines.extend(preprocess(included_lines, filename=include_file, included_files=included_files))
except Exception as e:
errors.append((line_number, f"Failed to include file '{include_file}': {str(e)}", line))
error_flag = True
elif code.startswith("%define"):
# Handle macros
parts = code.split(maxsplit=2)
if len(parts) != 3:
errors.append((line_number, "Invalid %define syntax", line))
error_flag = True
continue
macro_name, macro_value = parts[1], parts[2]
if macro_name in macro_definitions:
errors.append((line_number, f"Macro '{macro_name}' redefined", line))
error_flag = True
continue
macro_definitions[macro_name] = macro_value
continue # Skip adding %define line to the output
else:
expanded_lines.append(line)
# First pass: Parse instructions and calculate program length
for line_number, line in enumerate(expanded_lines, start=1):
code = line.split(";")[0].strip() # Strip comments and whitespace
if not code:
continue
# Handle labels
if code.endswith(":"):
label_name = code[:-1]
if label_name in labels:
warnings.append((line_number, f"Duplicate label '{label_name}'", line))
labels[label_name] = instruction_count
continue
# Parse instruction
parts = code.split()
instruction = parts[0].lower()
if instruction == "db":
# Handle string definitions
if len(parts) < 2:
errors.append((line_number, f"Missing operand for '{instruction}'", line))
error_flag = True
continue
string_literal = " ".join(parts[1:]).strip("\"")
instruction_count += len(string_literal) + 1 # Include null terminator
elif instruction in valid_instructions:
if instruction in {"ldw", "mov", "add", "sub", "str", "ldr", "xor", "and", "mul", "div", "ldb", "stb"}:
instruction_count += 3
elif instruction in {"bne", "beq", "blt"}:
instruction_count += 4
elif instruction in {"push", "pop", "int", "jmp", "jsr", "ret"}:
instruction_count += 3
else:
errors.append((line_number, f"Unknown instruction '{instruction}'", line))
error_flag = True
program_length = instruction_count # Final length of the program
# Second pass: Validate instructions and operands
for line_number, line in enumerate(expanded_lines, start=1):
code = line.split(";")[0].strip()
if not code:
continue
# Handle labels
if code.endswith(":"):
continue
parts = code.split()
instruction = parts[0].lower()
operands = parts[1:] if len(parts) > 1 else []
# Handle db strings
if instruction == "db":
string_literal = " ".join(operands).strip("\"")
if not string_literal:
errors.append((line_number, "Empty string literal in 'db'", line))
error_flag = True
continue
# Strip commas from operands
operands = [op.replace(",", "") for op in operands]
# Validate instruction and operands (same as before)
if instruction == "ldw" and len(operands) == 2:
reg, value = operands
if reg not in valid_registers:
errors.append((line_number, f"Invalid register '{reg}'", line))
error_flag = True
elif instruction == "str" and len(operands) == 2:
reg, address = operands
if reg not in valid_registers:
errors.append((line_number, f"Invalid register '{reg}'", line))
error_flag = True
try:
if str(address).startswith("0x"):
mem_address = int(address, 16)
else:
mem_address = int(address)
if mem_address < program_length:
errors.append((line_number, f"Illegal memory write to program space in '{code}'", line))
error_flag = True
if mem_address > memory_limit:
errors.append((line_number, f"Illegal memory write out of bounds in '{code}'", line))
error_flag = True
except ValueError:
errors.append((line_number, f"Invalid memory address '{address}'", line))
error_flag = True
elif instruction in {"add", "sub", "mov", "xor", "and", "mul", "div", "ldb", "stb"} and len(operands) == 2:
reg1, reg2 = operands
if reg1 not in valid_registers or reg2 not in valid_registers:
errors.append((line_number, f"Invalid register(s) in '{code}'", line))
error_flag = True
elif instruction in {"push", "pop"} and len(operands) == 1:
reg = operands[0]
if reg not in valid_registers:
errors.append((line_number, f"Invalid register '{reg}'", line))
error_flag = True
if instruction == "push":
stack_balance += 1
if stack_balance > 16: # Example stack limit
warnings.append((line_number, "Stack overflow detected", line))
elif instruction == "pop":
stack_balance -= 1
if stack_balance < 0:
errors.append((line_number, f"Stack underflow detected at '{code}'", line))
error_flag = True
# Validate branch instructions with two registers and one label
elif instruction in {"bne", "beq", "blt"}:
if len(operands) != 3:
errors.append((line_number, f"Branch instruction '{instruction}' should have 2 registers and 1 label", line))
error_flag = True
else:
reg1, reg2, label = operands
if reg1 not in valid_registers or reg2 not in valid_registers:
errors.append((line_number, f"Invalid register(s) in '{instruction}'", line))
error_flag = True
label_references.append((line_number, label, line)) # The third operand should be a label
elif instruction in {"jmp", "jsr"}:
if len(operands) != 1:
errors.append((line_number, f"'{instruction}' instruction should have 1 operand (label)", line))
error_flag = True
label = operands[0] # The only operand should be a label
label_references.append((line_number, label, line))
# Check undefined labels
for line_number, label, line in label_references:
if label not in labels:
errors.append((line_number, f"Undefined label '{label}'", line))
error_flag = True
# Check stack balance
if stack_balance != 0:
warnings.append((0, "Stack imbalance detected, unbalanced push/pop operations", ""))
# Print errors and warnings
for line_number, message, code_line in errors:
print(colored(f"{filename}:{line_number}: error: {message}", "red"))
print(colored(f" {line_number} | {code_line}", "white"))
print(colored(f" | {'^' * len(code_line)}", "cyan"))
for line_number, message, code_line in warnings:
if line_number == 0:
print(colored(f"{filename}: warning: {message}", "yellow"))
else:
print(colored(f"{filename}:{line_number}: warning: {message}", "yellow"))
print(colored(f" {line_number} | {code_line}", "white"))
if program_length >= memory_limit:
error_flag = True
print(colored(f"GLOBAL: error: Program too big, size: {program_length}", "red"))
if not error_flag:
print(colored(f"{filename}: Done!", "green"))
else:
exit(1)
return expanded_lines
lines = preprocess(lines, filename=filename)
lineNumber = 0
COMPILE_ERROR = False
def _ValueError(message):
global COMPILE_ERROR
COMPILE_ERROR = True
print("ValueError: %s on line:" % message, lineNumber)
def _IndexError(message):
global COMPILE_ERROR
COMPILE_ERROR = True
print("IndexError: %s on line:" % message, lineNumber)
def _InstructionError(message):
global COMPILE_ERROR
COMPILE_ERROR = True
print("InstructionError: %s on line:" % message, lineNumber)
def load_include(filename):
with open(filename, "r") as f:
lines = f.readlines()
return lines
# for line in lines:
# line = line.split(";")[0] # filter out comments
# print(line)
# Dictionary to store labels and associated instructions
label_to_instructions = {}
current_label = None
itterrator = 0
for line in lines:
# Remove leading and trailing whitespace
stripped_line = line.strip()
if stripped_line.endswith(':'):
# It's a label, use it as the new key in the dictionary
current_label = stripped_line[:-1] # Remove the colon
current_label = current_label.upper()
label_to_instructions[current_label] = [] # Initialize empty instruction list
elif stripped_line.startswith('%'):
# Get the command within the current line
command = ""
for char in stripped_line:
if char == "%":
continue
if char == " ": # Space
break
command += char
#print(command)
if command == "define": #! defines are handled in the preprocessor
pass
elif command == "include":
# Get value between quotes
inQuote = False
include_filepath = ""
for char in stripped_line:
if char == '"':
inQuote = not inQuote
elif inQuote:
include_filepath += char
if include_filepath == '':
continue
lines += ["\n"] + load_include(include_filepath)
elif stripped_line:
# It's an instruction; add it to the current label's list
if current_label is not None:
label_to_instructions[current_label].append(stripped_line)
itterrator+=1
# register letter to identifyer
registerDict = {'a':0x0,'b':0x1,'c':0x2,'d':0x3,'e':0x4,'f':0x5}
current_byte_offset = 0 # Tracks the current byte address
label_addresses = {} # Maps label names to their resolved byte addresses
for label in label_to_instructions:
label_addresses[label] = current_byte_offset
for line in label_to_instructions[label]:
line = line.strip().split(";")[0] # Strip comments
line = line.rstrip(" ") # Strip spaces at end
line = line.replace(",", "") # Remove commas
line = line.split(" ") # Get each part of the instruction
line[0] = line[0].lower() # Make instruction lowercase
if line[0] == '':
continue
if line[0] in {"ldw", "mov", "add", "sub", "str", "ldr", "int", "push", "pop", "jsr", "ret", 'xor', 'and', 'jmp', 'mul', 'div', 'ldb', 'stb'}: # 3 byte instructions
current_byte_offset += 3
elif line[0] in {'bne', 'beq', 'blt'}: # 4 byte instructions
current_byte_offset += 4
current_byte_offset = 0 # Tracks the current byte address
#print(label_addresses)
outputBytes = []
for label in label_to_instructions:
#print(label)
if label_addresses[label] != current_byte_offset and not COMPILE_ERROR:
raise IndexError(f"address mismatch, expected {label_addresses[label]}, got {current_byte_offset}")
# Output the results
for line in label_to_instructions[label]:
line = line.strip().split(";")[0] # Strip comments
line = line.rstrip(" ") # Strip trailing spaces
line = line.replace(",", "") # Remove commas
line = line.split(" ") # Split into instruction parts
if not line[0]:
continue
line[0] = line[0].lower() # Normalize instruction to lowercase
# Regular instruction processing (already present in your code)
index = 0
for operator in line:
if operator in macro_definitions:
line[index] = macro_definitions[line[index]]
index += 1
#print(line)
#! Code to convert to bytes
bytes = []
try:
# Handle 'db' directive for defining strings or raw bytes
if line[0] == 'db':
if len(line) < 3:
_InstructionError("Missing data for 'db' directive")
raw_data = " ".join(line[1:]).strip("'\"")
bytes = [ord(char) for char in raw_data] # Convert characters to ASCII values
bytes.append(0) # Null terminator
outputBytes += bytes
current_byte_offset += len(bytes)
continue
# Add 'ldb' instruction handling
elif line[0] == 'ldb': # Load byte to register
bytes.append(0x15) # Assuming 0x15 for 'ldb'
register = registerDict.get(line[1].lower(), -1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(), -1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'stb': # Load byte to register
bytes.append(0x16) # Assuming 0x15 for 'ldb'
register = registerDict.get(line[1].lower(), -1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(), -1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'ldw': # Load immediate to register
bytes.append(0x1) # byte for load immediate value
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
bytes.append(convert_to_int(line[2])) # the actual value as an int
elif line[0] == 'mov': # Load immediate to register
bytes.append(0x2) # byte for load immediate value
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
#bytes.append(0x0)
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
#bytes.append(convert_to_int(line[2])) # the actual value as an int
elif line[0] == 'add': # Load immediate to register
bytes.append(0x3)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'sub': # Load immediate to register
bytes.append(0x4)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'str': # Load immediate to register
bytes.append(0x5)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
bytes.append(convert_to_int(line[2])) # the actual value as an int
elif line[0] == 'ldr': # Load immediate to register
bytes.append(0x6)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
bytes.append(convert_to_int(line[2])) # the actual value as an int
elif line[0] == 'int': # Load immediate to register
bytes.append(0xA)
bytes.append(convert_to_int(line[1])) # the actual value as an int
bytes.append(0x0) #! NEED THIS TO KEEP THE INSTRUCTION AT 3 BYTES
elif line[0] == 'bne': # Load immediate to register
bytes.append(0x8)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
label = line[3].upper()
if label == -1:
_InstructionError("Missing Label")
continue
if label in label_to_instructions:
bytes.append(label_addresses[label])
else:
_InstructionError("Unknown Label")
elif line[0] == 'beq': # Load immediate to register
bytes.append(0x9)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
label = line[3].upper()
if label == -1:
_InstructionError("Missing Label")
continue
if label in label_to_instructions:
bytes.append(label_addresses[label])
else:
_InstructionError("Unknown Label")
elif line[0] == 'push': # Load immediate to register
bytes.append(0xB)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
bytes.append(0x0) # padding
elif line[0] == 'pop': # Load immediate to register
bytes.append(0xC)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
bytes.append(0x0) # padding
elif line[0] == 'jsr': # Load immediate to register
bytes.append(0xD)
# set register ID:
label = line[1].upper()
if label == -1:
_InstructionError("Missing Label")
continue
if label in label_to_instructions:
bytes.append(label_addresses[label])
else:
_InstructionError("Unknown Label")
bytes.append(0x0) # padding
elif line[0] == 'ret': # Load immediate to register
bytes.append(0xE)
bytes.append(0x0) # padding
bytes.append(0x0) # padding
elif line[0] == 'xor': # Load immediate to register
bytes.append(0xF)
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'and': # Load immediate to register
bytes.append(0x10)
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'jmp': # Load immediate to register
bytes.append(0x11)
# set register ID:
label = line[1].upper()
if label == -1:
_InstructionError("Missing Label")
continue
if label in label_to_instructions:
bytes.append(label_addresses[label])
else:
_InstructionError("Unknown Label")
bytes.append(0x0) # padding
elif line[0] == 'mul': # Load immediate to register
bytes.append(0x12)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'div': # Load immediate to register
bytes.append(0x13)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
elif line[0] == 'blt': # Load immediate to register
bytes.append(0x14)
# set register ID:
register = registerDict.get(line[1].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
register = registerDict.get(line[2].lower(),-1)
if register >= 0 and register <= 5:
bytes.append(register)
else:
_ValueError("Invalid Register")
label = line[3].upper()
if label == -1:
_InstructionError("Missing Label")
continue
if label in label_to_instructions:
bytes.append(label_addresses[label])
else:
_InstructionError("Unknown Label")
else:
print(line)
_InstructionError("Unknown Instruction")
except IndexError:
_IndexError("Maformed Instruction")
except ValueError:
print(line)
_ValueError("Unknown Error")
current_byte_offset += len(bytes)
lineNumber+=1
outputBytes += bytes
if not COMPILE_ERROR:
with open("program.py", "w") as f:
bytecode = []
for _, y in enumerate(outputBytes):
bytecode.append(str(y))
prg = "program = [" + ",".join(bytecode) + "]"
f.write(prg)
else:
print(f"Compilation Error")
print(f"This is most likely due to invalid macro, please check your code for typos ")