filename = "test.asm" # # Change the filename here to the path of your asm file # then copy the output to 'main.py' and replace the 'program' variable # with the list, then run the 'main.py' file with python 3.11+ # # there are 2 example programs, one that demonstrates bitmap mode # and one that demonstrates text mode, with typing useing the bios # interupts. # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # import re from termcolor import colored with open(filename,"r") as f: lines = f.readlines() def convert_to_int(value): if isinstance(value, str): # Check if the value is a string if value.startswith("0x"): # Handle hexadecimal strings return int(value, 16) else: # Handle decimal strings return int(value) elif isinstance(value, int): # Value is already an integer return value else: raise ValueError(f"Unsupported type for conversion: {type(value)}") import os from termcolor import colored macro_definitions = {} def preprocess(lines, filename="main.asm", included_files=None): if included_files is None: included_files = set() # Tracks included files to prevent recursion errors = [] warnings = [] error_flag = False # Memory and stack tracking instruction_count = 0 memory_limit = 1024 # Total memory available stack_balance = 0 program_length = 0 # To calculate and validate memory access # Valid registers and instructions valid_registers = {"a", "b", "c", "d", "e", "f"} valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int", "push", "pop", "jsr", "ret", "xor", "and", "jmp", "mul", "div", "bne", "beq", "blt", "ldb", "stb"} label_references = [] labels = {} # Expand include directives expanded_lines = [] for line_number, line in enumerate(lines, start=1): code = line.strip() if code.startswith("%include"): # Handle include directives parts = code.split(maxsplit=1) if len(parts) != 2: errors.append((line_number, "Invalid %include syntax", line)) error_flag = True continue include_file = parts[1].strip("\"") if include_file in included_files: errors.append((line_number, f"Recursive inclusion detected for file '{include_file}'", line)) error_flag = True continue if not os.path.exists(include_file): errors.append((line_number, f"Included file '{include_file}' not found", line)) error_flag = True continue try: included_files.add(include_file) with open(include_file, 'r') as f: included_lines = f.readlines() expanded_lines.extend(preprocess(included_lines, filename=include_file, included_files=included_files)) except Exception as e: errors.append((line_number, f"Failed to include file '{include_file}': {str(e)}", line)) error_flag = True elif code.startswith("%define"): # Handle macros parts = code.split(maxsplit=2) if len(parts) != 3: errors.append((line_number, "Invalid %define syntax", line)) error_flag = True continue macro_name, macro_value = parts[1], parts[2] if macro_name in macro_definitions: errors.append((line_number, f"Macro '{macro_name}' redefined", line)) error_flag = True continue macro_definitions[macro_name] = macro_value continue # Skip adding %define line to the output else: expanded_lines.append(line) # First pass: Parse instructions and calculate program length for line_number, line in enumerate(expanded_lines, start=1): code = line.split(";")[0].strip() # Strip comments and whitespace if not code: continue # Handle labels if code.endswith(":"): label_name = code[:-1] if label_name in labels: warnings.append((line_number, f"Duplicate label '{label_name}'", line)) labels[label_name] = instruction_count continue # Parse instruction parts = code.split() instruction = parts[0].lower() if instruction == "db": # Handle string definitions if len(parts) < 2: errors.append((line_number, f"Missing operand for '{instruction}'", line)) error_flag = True continue string_literal = " ".join(parts[1:]).strip("\"") instruction_count += len(string_literal) + 1 # Include null terminator elif instruction in valid_instructions: if instruction in {"ldw", "mov", "add", "sub", "str", "ldr", "xor", "and", "mul", "div", "ldb", "stb"}: instruction_count += 3 elif instruction in {"bne", "beq", "blt"}: instruction_count += 4 elif instruction in {"push", "pop", "int", "jmp", "jsr", "ret"}: instruction_count += 3 else: errors.append((line_number, f"Unknown instruction '{instruction}'", line)) error_flag = True program_length = instruction_count # Final length of the program # Second pass: Validate instructions and operands for line_number, line in enumerate(expanded_lines, start=1): code = line.split(";")[0].strip() if not code: continue # Handle labels if code.endswith(":"): continue parts = code.split() instruction = parts[0].lower() operands = parts[1:] if len(parts) > 1 else [] # Handle db strings if instruction == "db": string_literal = " ".join(operands).strip("\"") if not string_literal: errors.append((line_number, "Empty string literal in 'db'", line)) error_flag = True continue # Strip commas from operands operands = [op.replace(",", "") for op in operands] # Validate instruction and operands (same as before) if instruction == "ldw" and len(operands) == 2: reg, value = operands if reg not in valid_registers: errors.append((line_number, f"Invalid register '{reg}'", line)) error_flag = True elif instruction == "str" and len(operands) == 2: reg, address = operands if reg not in valid_registers: errors.append((line_number, f"Invalid register '{reg}'", line)) error_flag = True try: if str(address).startswith("0x"): mem_address = int(address, 16) else: mem_address = int(address) if mem_address < program_length: errors.append((line_number, f"Illegal memory write to program space in '{code}'", line)) error_flag = True if mem_address > memory_limit: errors.append((line_number, f"Illegal memory write out of bounds in '{code}'", line)) error_flag = True except ValueError: errors.append((line_number, f"Invalid memory address '{address}'", line)) error_flag = True elif instruction in {"add", "sub", "mov", "xor", "and", "mul", "div", "ldb", "stb"} and len(operands) == 2: reg1, reg2 = operands if reg1 not in valid_registers or reg2 not in valid_registers: errors.append((line_number, f"Invalid register(s) in '{code}'", line)) error_flag = True elif instruction in {"push", "pop"} and len(operands) == 1: reg = operands[0] if reg not in valid_registers: errors.append((line_number, f"Invalid register '{reg}'", line)) error_flag = True if instruction == "push": stack_balance += 1 if stack_balance > 16: # Example stack limit warnings.append((line_number, "Stack overflow detected", line)) elif instruction == "pop": stack_balance -= 1 if stack_balance < 0: errors.append((line_number, f"Stack underflow detected at '{code}'", line)) error_flag = True # Validate branch instructions with two registers and one label elif instruction in {"bne", "beq", "blt"}: if len(operands) != 3: errors.append((line_number, f"Branch instruction '{instruction}' should have 2 registers and 1 label", line)) error_flag = True else: reg1, reg2, label = operands if reg1 not in valid_registers or reg2 not in valid_registers: errors.append((line_number, f"Invalid register(s) in '{instruction}'", line)) error_flag = True label_references.append((line_number, label, line)) # The third operand should be a label elif instruction in {"jmp", "jsr"}: if len(operands) != 1: errors.append((line_number, f"'{instruction}' instruction should have 1 operand (label)", line)) error_flag = True label = operands[0] # The only operand should be a label label_references.append((line_number, label, line)) # Check undefined labels for line_number, label, line in label_references: if label not in labels: errors.append((line_number, f"Undefined label '{label}'", line)) error_flag = True # Check stack balance if stack_balance != 0: warnings.append((0, "Stack imbalance detected, unbalanced push/pop operations", "")) # Print errors and warnings for line_number, message, code_line in errors: print(colored(f"{filename}:{line_number}: error: {message}", "red")) print(colored(f" {line_number} | {code_line}", "white")) print(colored(f" | {'^' * len(code_line)}", "cyan")) for line_number, message, code_line in warnings: if line_number == 0: print(colored(f"{filename}: warning: {message}", "yellow")) else: print(colored(f"{filename}:{line_number}: warning: {message}", "yellow")) print(colored(f" {line_number} | {code_line}", "white")) if program_length >= memory_limit: error_flag = True print(colored(f"GLOBAL: error: Program too big, size: {program_length}", "red")) if not error_flag: print(colored(f"{filename}: Done!", "green")) else: exit(1) return expanded_lines lines = preprocess(lines, filename=filename) lineNumber = 0 COMPILE_ERROR = False def _ValueError(message): global COMPILE_ERROR COMPILE_ERROR = True print("ValueError: %s on line:" % message, lineNumber) def _IndexError(message): global COMPILE_ERROR COMPILE_ERROR = True print("IndexError: %s on line:" % message, lineNumber) def _InstructionError(message): global COMPILE_ERROR COMPILE_ERROR = True print("InstructionError: %s on line:" % message, lineNumber) def load_include(filename): with open(filename, "r") as f: lines = f.readlines() return lines # for line in lines: # line = line.split(";")[0] # filter out comments # print(line) # Dictionary to store labels and associated instructions label_to_instructions = {} current_label = None itterrator = 0 for line in lines: # Remove leading and trailing whitespace stripped_line = line.strip() if stripped_line.endswith(':'): # It's a label, use it as the new key in the dictionary current_label = stripped_line[:-1] # Remove the colon current_label = current_label.upper() label_to_instructions[current_label] = [] # Initialize empty instruction list elif stripped_line.startswith('%'): # Get the command within the current line command = "" for char in stripped_line: if char == "%": continue if char == " ": # Space break command += char #print(command) if command == "define": #! defines are handled in the preprocessor pass elif command == "include": # Get value between quotes inQuote = False include_filepath = "" for char in stripped_line: if char == '"': inQuote = not inQuote elif inQuote: include_filepath += char if include_filepath == '': continue lines += ["\n"] + load_include(include_filepath) elif stripped_line: # It's an instruction; add it to the current label's list if current_label is not None: label_to_instructions[current_label].append(stripped_line) itterrator+=1 # register letter to identifyer registerDict = {'a':0x0,'b':0x1,'c':0x2,'d':0x3,'e':0x4,'f':0x5} current_byte_offset = 0 # Tracks the current byte address label_addresses = {} # Maps label names to their resolved byte addresses for label in label_to_instructions: label_addresses[label] = current_byte_offset for line in label_to_instructions[label]: line = line.strip().split(";")[0] # Strip comments line = line.rstrip(" ") # Strip spaces at end line = line.replace(",", "") # Remove commas line = line.split(" ") # Get each part of the instruction line[0] = line[0].lower() # Make instruction lowercase if line[0] == '': continue if line[0] in {"ldw", "mov", "add", "sub", "str", "ldr", "int", "push", "pop", "jsr", "ret", 'xor', 'and', 'jmp', 'mul', 'div', 'ldb', 'stb'}: # 3 byte instructions current_byte_offset += 3 elif line[0] in {'bne', 'beq', 'blt'}: # 4 byte instructions current_byte_offset += 4 current_byte_offset = 0 # Tracks the current byte address #print(label_addresses) outputBytes = [] for label in label_to_instructions: #print(label) if label_addresses[label] != current_byte_offset and not COMPILE_ERROR: raise IndexError(f"address mismatch, expected {label_addresses[label]}, got {current_byte_offset}") # Output the results for line in label_to_instructions[label]: line = line.strip().split(";")[0] # Strip comments line = line.rstrip(" ") # Strip trailing spaces line = line.replace(",", "") # Remove commas line = line.split(" ") # Split into instruction parts if not line[0]: continue line[0] = line[0].lower() # Normalize instruction to lowercase # Regular instruction processing (already present in your code) index = 0 for operator in line: if operator in macro_definitions: line[index] = macro_definitions[line[index]] index += 1 #print(line) #! Code to convert to bytes bytes = [] try: # Handle 'db' directive for defining strings or raw bytes if line[0] == 'db': if len(line) < 3: _InstructionError("Missing data for 'db' directive") raw_data = " ".join(line[1:]).strip("'\"") bytes = [ord(char) for char in raw_data] # Convert characters to ASCII values bytes.append(0) # Null terminator outputBytes += bytes current_byte_offset += len(bytes) continue # Add 'ldb' instruction handling elif line[0] == 'ldb': # Load byte to register bytes.append(0x15) # Assuming 0x15 for 'ldb' register = registerDict.get(line[1].lower(), -1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(), -1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'stb': # Load byte to register bytes.append(0x16) # Assuming 0x15 for 'ldb' register = registerDict.get(line[1].lower(), -1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(), -1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'ldw': # Load immediate to register bytes.append(0x1) # byte for load immediate value # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") bytes.append(convert_to_int(line[2])) # the actual value as an int elif line[0] == 'mov': # Load immediate to register bytes.append(0x2) # byte for load immediate value # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") #bytes.append(0x0) register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") #bytes.append(convert_to_int(line[2])) # the actual value as an int elif line[0] == 'add': # Load immediate to register bytes.append(0x3) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'sub': # Load immediate to register bytes.append(0x4) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'str': # Load immediate to register bytes.append(0x5) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") bytes.append(convert_to_int(line[2])) # the actual value as an int elif line[0] == 'ldr': # Load immediate to register bytes.append(0x6) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") bytes.append(convert_to_int(line[2])) # the actual value as an int elif line[0] == 'int': # Load immediate to register bytes.append(0xA) bytes.append(convert_to_int(line[1])) # the actual value as an int bytes.append(0x0) #! NEED THIS TO KEEP THE INSTRUCTION AT 3 BYTES elif line[0] == 'bne': # Load immediate to register bytes.append(0x8) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") label = line[3].upper() if label == -1: _InstructionError("Missing Label") continue if label in label_to_instructions: bytes.append(label_addresses[label]) else: _InstructionError("Unknown Label") elif line[0] == 'beq': # Load immediate to register bytes.append(0x9) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") label = line[3].upper() if label == -1: _InstructionError("Missing Label") continue if label in label_to_instructions: bytes.append(label_addresses[label]) else: _InstructionError("Unknown Label") elif line[0] == 'push': # Load immediate to register bytes.append(0xB) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") bytes.append(0x0) # padding elif line[0] == 'pop': # Load immediate to register bytes.append(0xC) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") bytes.append(0x0) # padding elif line[0] == 'jsr': # Load immediate to register bytes.append(0xD) # set register ID: label = line[1].upper() if label == -1: _InstructionError("Missing Label") continue if label in label_to_instructions: bytes.append(label_addresses[label]) else: _InstructionError("Unknown Label") bytes.append(0x0) # padding elif line[0] == 'ret': # Load immediate to register bytes.append(0xE) bytes.append(0x0) # padding bytes.append(0x0) # padding elif line[0] == 'xor': # Load immediate to register bytes.append(0xF) register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'and': # Load immediate to register bytes.append(0x10) register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'jmp': # Load immediate to register bytes.append(0x11) # set register ID: label = line[1].upper() if label == -1: _InstructionError("Missing Label") continue if label in label_to_instructions: bytes.append(label_addresses[label]) else: _InstructionError("Unknown Label") bytes.append(0x0) # padding elif line[0] == 'mul': # Load immediate to register bytes.append(0x12) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'div': # Load immediate to register bytes.append(0x13) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") elif line[0] == 'blt': # Load immediate to register bytes.append(0x14) # set register ID: register = registerDict.get(line[1].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") register = registerDict.get(line[2].lower(),-1) if register >= 0 and register <= 5: bytes.append(register) else: _ValueError("Invalid Register") label = line[3].upper() if label == -1: _InstructionError("Missing Label") continue if label in label_to_instructions: bytes.append(label_addresses[label]) else: _InstructionError("Unknown Label") else: print(line) _InstructionError("Unknown Instruction") except IndexError: _IndexError("Maformed Instruction") except ValueError: print(line) _ValueError("Unknown Error") current_byte_offset += len(bytes) lineNumber+=1 outputBytes += bytes if not COMPILE_ERROR: with open("program.py", "w") as f: bytecode = [] for _, y in enumerate(outputBytes): bytecode.append(str(y)) prg = "program = [" + ",".join(bytecode) + "]" f.write(prg) else: print(f"Compilation Error") print(f"This is most likely due to invalid macro, please check your code for typos ")