From 5ea18b59585403adadd62dc43799b498894b4a3e Mon Sep 17 00:00:00 2001
From: Spencer Conlon <gigabitestudios@noreply.localhost>
Date: Tue, 24 Dec 2024 05:11:53 +0000
Subject: [PATCH] added files from previous

---
 asm-to-prg.py | 355 +++++++++++++++++++++++++-------
 c-to-asm.py   | 548 ++++++++++++++++++++++++++++++++++++++++++++++++++
 drive8.bin    | Bin 0 -> 68 bytes
 main.asm      |  27 +--
 main.c        |  12 ++
 5 files changed, 845 insertions(+), 97 deletions(-)
 create mode 100644 c-to-asm.py
 create mode 100644 main.c

diff --git a/asm-to-prg.py b/asm-to-prg.py
index 61a55eb..5a3c8b1 100644
--- a/asm-to-prg.py
+++ b/asm-to-prg.py
@@ -1,5 +1,5 @@
 
-filename = "try_to_fix_me.asm"
+filename = "test.asm"
 
 
 #
@@ -86,15 +86,24 @@ def convert_to_int(value):
 
 
 
+import os
+from termcolor import colored
+
+
+macro_definitions = {}
+
+
+def preprocess(lines, filename="main.asm", included_files=None):
+    if included_files is None:
+        included_files = set()  # Tracks included files to prevent recursion
 
-def preprocess(lines, filename="main.asm"):
     errors = []
     warnings = []
     error_flag = False
 
     # Memory and stack tracking
     instruction_count = 0
-    memory_limit = 256  # Total memory available
+    memory_limit = 1024  # Total memory available
     stack_balance = 0
     program_length = 0  # To calculate and validate memory access
 
@@ -102,12 +111,65 @@ def preprocess(lines, filename="main.asm"):
     valid_registers = {"a", "b", "c", "d", "e", "f"}
     valid_instructions = {"ldw", "mov", "add", "sub", "str", "ldr", "int",
                           "push", "pop", "jsr", "ret", "xor", "and", "jmp",
-                          "mul", "div", "bne", "beq", "blt"}
+                          "mul", "div", "bne", "beq", "blt", "ldb", "stb"}
     label_references = []
     labels = {}
 
-    # First pass: Parse instructions and calculate program length
+    # Expand include directives
+    expanded_lines = []
     for line_number, line in enumerate(lines, start=1):
+        code = line.strip()
+
+        if code.startswith("%include"):
+            # Handle include directives
+            parts = code.split(maxsplit=1)
+            if len(parts) != 2:
+                errors.append((line_number, "Invalid %include syntax", line))
+                error_flag = True
+                continue
+
+            include_file = parts[1].strip("\"")
+            if include_file in included_files:
+                errors.append((line_number, f"Recursive inclusion detected for file '{include_file}'", line))
+                error_flag = True
+                continue
+
+            if not os.path.exists(include_file):
+                errors.append((line_number, f"Included file '{include_file}' not found", line))
+                error_flag = True
+                continue
+
+            try:
+                included_files.add(include_file)
+                with open(include_file, 'r') as f:
+                    included_lines = f.readlines()
+                expanded_lines.extend(preprocess(included_lines, filename=include_file, included_files=included_files))
+            except Exception as e:
+                errors.append((line_number, f"Failed to include file '{include_file}': {str(e)}", line))
+                error_flag = True
+
+        elif code.startswith("%define"):
+            # Handle macros
+            parts = code.split(maxsplit=2)
+            if len(parts) != 3:
+                errors.append((line_number, "Invalid %define syntax", line))
+                error_flag = True
+                continue
+
+            macro_name, macro_value = parts[1], parts[2]
+            if macro_name in macro_definitions:
+                errors.append((line_number, f"Macro '{macro_name}' redefined", line))
+                error_flag = True
+                continue
+
+            macro_definitions[macro_name] = macro_value
+            continue  # Skip adding %define line to the output
+
+        else:
+            expanded_lines.append(line)
+
+    # First pass: Parse instructions and calculate program length
+    for line_number, line in enumerate(expanded_lines, start=1):
         code = line.split(";")[0].strip()  # Strip comments and whitespace
         if not code:
             continue
@@ -116,94 +178,114 @@ def preprocess(lines, filename="main.asm"):
         if code.endswith(":"):
             label_name = code[:-1]
             if label_name in labels:
-                warnings.append((line_number, f"duplicate label '{label_name}'", line))
+                warnings.append((line_number, f"Duplicate label '{label_name}'", line))
             labels[label_name] = instruction_count
             continue
 
         # Parse instruction
-        parts = re.split(r"\s+", code)
+        parts = code.split()
         instruction = parts[0].lower()
-        if instruction in valid_instructions:
-            if instruction in {"ldw", "mov", "add", "sub", "str", "ldr", "xor", "and", "mul", "div"}:
-                instruction_count += 3  # These are 3-byte instructions
+        if instruction == "db":
+            # Handle string definitions
+            if len(parts) < 2:
+                errors.append((line_number, f"Missing operand for '{instruction}'", line))
+                error_flag = True
+                continue
+
+            string_literal = " ".join(parts[1:]).strip("\"")
+            instruction_count += len(string_literal) + 1  # Include null terminator
+        elif instruction in valid_instructions:
+            if instruction in {"ldw", "mov", "add", "sub", "str", "ldr", "xor", "and", "mul", "div", "ldb", "stb"}:
+                instruction_count += 3
             elif instruction in {"bne", "beq", "blt"}:
-                instruction_count += 4  # Conditional branches are 4-byte instructions
+                instruction_count += 4
             elif instruction in {"push", "pop", "int", "jmp", "jsr", "ret"}:
-                instruction_count += 3  # Fixed size for other instructions
+                instruction_count += 3
         else:
-            errors.append((line_number, f"unknown instruction '{instruction}'", line))
+            errors.append((line_number, f"Unknown instruction '{instruction}'", line))
             error_flag = True
 
     program_length = instruction_count  # Final length of the program
 
     # Second pass: Validate instructions and operands
-    for line_number, line in enumerate(lines, start=1):
-        code = line.split(";")[0].strip()  # Strip comments and whitespace
+    for line_number, line in enumerate(expanded_lines, start=1):
+        code = line.split(";")[0].strip()
         if not code:
             continue
 
-        # Skip labels
+        # Handle labels
         if code.endswith(":"):
             continue
 
-        parts = re.split(r"\s+", code)
+        parts = code.split()
         instruction = parts[0].lower()
         operands = parts[1:] if len(parts) > 1 else []
 
-        # Strip commas from registers and operands
-        operands = [op.replace(",", "") for op in operands]
+        # Handle db strings
+        if instruction == "db":
+            string_literal = " ".join(operands).strip("\"")
+            if not string_literal:
+                errors.append((line_number, "Empty string literal in 'db'", line))
+                error_flag = True
+            continue
 
-        # Validate instruction and operands
+        # Strip commas from operands
+        operands = [op.replace(",", "") for op in operands]
+        # Validate instruction and operands (same as before)
         if instruction == "ldw" and len(operands) == 2:
             reg, value = operands
             if reg not in valid_registers:
-                errors.append((line_number, f"invalid register '{reg}'", line))
+                errors.append((line_number, f"Invalid register '{reg}'", line))
                 error_flag = True
         elif instruction == "str" and len(operands) == 2:
             reg, address = operands
             if reg not in valid_registers:
-                errors.append((line_number, f"invalid register '{reg}'", line))
+                errors.append((line_number, f"Invalid register '{reg}'", line))
                 error_flag = True
             try:
-                mem_address = int(address, 16)
+                if str(address).startswith("0x"):
+                    mem_address = int(address, 16)
+                else:
+                    mem_address = int(address)
+                
+                
                 if mem_address < program_length:
-                    errors.append((line_number, f"illegal memory write to program space in '{code}'", line))
+                    errors.append((line_number, f"Illegal memory write to program space in '{code}'", line))
                     error_flag = True
                 if mem_address > memory_limit:
-                    errors.append((line_number, f"illegal memory write out of bounds in '{code}'", line))
+                    errors.append((line_number, f"Illegal memory write out of bounds in '{code}'", line))
                     error_flag = True
-                    
             except ValueError:
-                errors.append((line_number, f"invalid memory address '{address}'", line))
+                errors.append((line_number, f"Invalid memory address '{address}'", line))
                 error_flag = True
-        elif instruction in {"add", "sub", "mov", "xor", "and", "mul", "div"} and len(operands) == 2:
+        elif instruction in {"add", "sub", "mov", "xor", "and", "mul", "div", "ldb", "stb"} and len(operands) == 2:
             reg1, reg2 = operands
             if reg1 not in valid_registers or reg2 not in valid_registers:
-                errors.append((line_number, f"invalid register(s) in '{code}'", line))
+                errors.append((line_number, f"Invalid register(s) in '{code}'", line))
                 error_flag = True
         elif instruction in {"push", "pop"} and len(operands) == 1:
             reg = operands[0]
             if reg not in valid_registers:
-                errors.append((line_number, f"invalid register '{reg}'", line))
+                errors.append((line_number, f"Invalid register '{reg}'", line))
                 error_flag = True
             if instruction == "push":
                 stack_balance += 1
                 if stack_balance > 16:  # Example stack limit
-                    warnings.append((line_number, "stack overflow detected", line))
+                    warnings.append((line_number, "Stack overflow detected", line))
             elif instruction == "pop":
                 stack_balance -= 1
                 if stack_balance < 0:
-                    errors.append((line_number, f"stack underflow detected at '{code}'", line))
+                    errors.append((line_number, f"Stack underflow detected at '{code}'", line))
                     error_flag = True
         # Validate branch instructions with two registers and one label
         elif instruction in {"bne", "beq", "blt"}:
             if len(operands) != 3:
-                errors.append((line_number, f"branch instruction '{instruction}' should have 2 registers and 1 label", line))
+                errors.append((line_number, f"Branch instruction '{instruction}' should have 2 registers and 1 label", line))
                 error_flag = True
             else:
                 reg1, reg2, label = operands
                 if reg1 not in valid_registers or reg2 not in valid_registers:
-                    errors.append((line_number, f"invalid register(s) in '{instruction}'", line))
+                    errors.append((line_number, f"Invalid register(s) in '{instruction}'", line))
                     error_flag = True
                 label_references.append((line_number, label, line))  # The third operand should be a label
         
@@ -217,12 +299,12 @@ def preprocess(lines, filename="main.asm"):
     # Check undefined labels
     for line_number, label, line in label_references:
         if label not in labels:
-            errors.append((line_number, f"undefined label '{label}'", line))
+            errors.append((line_number, f"Undefined label '{label}'", line))
             error_flag = True
 
-    # Check stack balance at the end
+    # Check stack balance
     if stack_balance != 0:
-        warnings.append((0, "stack imbalance detected, unbalanced push/pop operations", ""))
+        warnings.append((0, "Stack imbalance detected, unbalanced push/pop operations", ""))
 
     # Print errors and warnings
     for line_number, message, code_line in errors:
@@ -236,25 +318,22 @@ def preprocess(lines, filename="main.asm"):
         else:
             print(colored(f"{filename}:{line_number}: warning: {message}", "yellow"))
             print(colored(f"   {line_number} | {code_line}", "white"))
-            
+
     if program_length >= memory_limit:
         error_flag = True
         print(colored(f"GLOBAL: error: Program too big, size: {program_length}", "red"))
-        
 
-    # Final success message
     if not error_flag:
-        print(colored("Preprocessing complete. No errors detected!", "green"))
+        print(colored(f"{filename}: Done!", "green"))
     else:
         exit(1)
 
+    return expanded_lines
 
 
-
-preprocess(lines)
+lines = preprocess(lines, filename=filename)
 
 
-    
 
 
 
@@ -263,19 +342,39 @@ preprocess(lines)
 
 lineNumber = 0
 
+COMPILE_ERROR = False
+
 def _ValueError(message):
+    global COMPILE_ERROR
+    COMPILE_ERROR = True
     print("ValueError: %s on line:" % message, lineNumber)
     
 def _IndexError(message):
+    global COMPILE_ERROR
+    COMPILE_ERROR = True
+
     print("IndexError: %s on line:" % message, lineNumber)
 
 def _InstructionError(message):
+    global COMPILE_ERROR
+    COMPILE_ERROR = True
+
     print("InstructionError: %s on line:" % message, lineNumber)
     
     
     
 
 
+
+
+
+
+def load_include(filename):
+    with open(filename, "r") as f:
+        lines = f.readlines()
+        
+    return lines
+
 # for line in lines:
 #     line = line.split(";")[0] # filter out comments
 #     print(line)
@@ -287,6 +386,9 @@ current_label = None
 
 
 
+
+
+itterrator = 0
 for line in lines:
     # Remove leading and trailing whitespace
     stripped_line = line.strip()
@@ -296,10 +398,54 @@ for line in lines:
         current_label = stripped_line[:-1]  # Remove the colon
         current_label = current_label.upper()
         label_to_instructions[current_label] = []  # Initialize empty instruction list
+    elif stripped_line.startswith('%'):
+        # Get the command within the current line
+        
+        command = ""
+        
+        for char in stripped_line:
+            if char == "%":
+                continue
+                
+            if char == " ": # Space
+                break
+            
+            command += char
+            
+        #print(command)
+            
+        if command == "define": #! defines are handled in the preprocessor
+            pass
+        
+        elif command == "include":
+            # Get value between quotes
+            inQuote = False
+            include_filepath = ""
+            for char in stripped_line:
+                if char == '"':
+                    inQuote = not inQuote
+                elif inQuote:
+                    include_filepath += char
+                    
+            if include_filepath == '':
+                continue
+            
+                    
+            
+            lines += ["\n"] + load_include(include_filepath)
+        
+        
     elif stripped_line:
+        
+            
         # It's an instruction; add it to the current label's list
         if current_label is not None:
             label_to_instructions[current_label].append(stripped_line)
+        
+
+            
+    itterrator+=1
+            
             
             
             
@@ -307,68 +453,111 @@ for line in lines:
 # register letter to identifyer
 registerDict = {'a':0x0,'b':0x1,'c':0x2,'d':0x3,'e':0x4,'f':0x5}
 
-
-
 current_byte_offset = 0  # Tracks the current byte address
 label_addresses = {}  # Maps label names to their resolved byte addresses
 
 for label in label_to_instructions:
     label_addresses[label] = current_byte_offset
     for line in label_to_instructions[label]:
-        line = line.strip().split(";")[0] # strip comments
-        line = line.rstrip(" ") # strip spaces at end
+        line = line.strip().split(";")[0]  # Strip comments
+        line = line.rstrip(" ")           # Strip spaces at end
+        line = line.replace(",", "")      # Remove commas
+        line = line.split(" ")            # Get each part of the instruction
         
-        
-        line = line.replace(",", "") # stupid way to remove commas but it works
-        
-        line = line.split(" ") # get each part of the instruction
-        
-        
-        line[0] = line[0].lower() # make instruction lowercase
+        line[0] = line[0].lower()  # Make instruction lowercase
         
         if line[0] == '':
             continue
         
-        if line[0] in {"ldw","mov","add","sub","str","ldr","int","push","pop","jsr", "ret", 'xor', 'and', 'jmp', 'mul', 'div'}: # 3 byte instructions
+        if line[0] in {"ldw", "mov", "add", "sub", "str", "ldr", "int", "push", "pop", "jsr", "ret", 'xor', 'and', 'jmp', 'mul', 'div', 'ldb', 'stb'}:  # 3 byte instructions
             current_byte_offset += 3
-            
-        elif line[0] in {'bne', 'beq', 'blt'}: # 4 byte instructions
+        
+        elif line[0] in {'bne', 'beq', 'blt'}:  # 4 byte instructions
             current_byte_offset += 4
-    
-    
-    
 
 current_byte_offset = 0  # Tracks the current byte address
 #print(label_addresses)
 
+
+
+
 outputBytes = []
 for label in label_to_instructions:
     #print(label)
-    if label_addresses[label] != current_byte_offset:
+    if label_addresses[label] != current_byte_offset and not COMPILE_ERROR:
         raise IndexError(f"address mismatch, expected {label_addresses[label]}, got {current_byte_offset}")
     # Output the results
     for line in label_to_instructions[label]:
-        line = line.strip().split(";")[0] # strip comments
-        line = line.rstrip(" ") # strip spaces at end
+    
+        line = line.strip().split(";")[0]  # Strip comments
+        line = line.rstrip(" ")           # Strip trailing spaces
+        line = line.replace(",", "")      # Remove commas
+        line = line.split(" ")            # Split into instruction parts
         
-        
-        line = line.replace(",", "") # stupid way to remove commas but it works
-        
-        line = line.split(" ") # get each part of the instruction
-        
-        
-        line[0] = line[0].lower() # make instruction lowercase
-        
-        if line[0] == '':
+        if not line[0]:
             continue
+
+        line[0] = line[0].lower()  # Normalize instruction to lowercase
+        
+
+        # Regular instruction processing (already present in your code)
+        index = 0
+        for operator in line:
+            if operator in macro_definitions:
+                line[index] = macro_definitions[line[index]]
+            index += 1
+        
+        
             
         #print(line)
         
-        
         #! Code to convert to bytes
         bytes = []
         try:
-            if line[0] == 'ldw': # Load immediate to register
+        
+            # Handle 'db' directive for defining strings or raw bytes
+            if line[0] == 'db':
+                if len(line) < 3:
+                    _InstructionError("Missing data for 'db' directive")
+                raw_data = " ".join(line[1:]).strip("'\"")
+                bytes = [ord(char) for char in raw_data]  # Convert characters to ASCII values
+                bytes.append(0)  # Null terminator
+                outputBytes += bytes
+                current_byte_offset += len(bytes)
+                continue
+            
+            # Add 'ldb' instruction handling
+            elif line[0] == 'ldb':  # Load byte to register
+                
+                bytes.append(0x15)  # Assuming 0x15 for 'ldb'
+                register = registerDict.get(line[1].lower(), -1)
+                if register >= 0 and register <= 5:
+                    bytes.append(register)
+                else:
+                    _ValueError("Invalid Register")
+                register = registerDict.get(line[2].lower(), -1)
+                if register >= 0 and register <= 5:
+                    bytes.append(register)
+                else:
+                    _ValueError("Invalid Register")
+                    
+                    
+            elif line[0] == 'stb':  # Load byte to register
+                
+                bytes.append(0x16)  # Assuming 0x15 for 'ldb'
+                register = registerDict.get(line[1].lower(), -1)
+                if register >= 0 and register <= 5:
+                    bytes.append(register)
+                else:
+                    _ValueError("Invalid Register")
+                register = registerDict.get(line[2].lower(), -1)
+                if register >= 0 and register <= 5:
+                    bytes.append(register)
+                else:
+                    _ValueError("Invalid Register")
+            
+            
+            elif line[0] == 'ldw': # Load immediate to register
                 bytes.append(0x1) # byte for load immediate value
                 # set register ID:
                 register = registerDict.get(line[1].lower(),-1)
@@ -571,7 +760,6 @@ for label in label_to_instructions:
                 
             elif line[0] == 'ret': # Load immediate to register
                 bytes.append(0xE) 
-                # set register ID:
                 bytes.append(0x0) # padding
                 bytes.append(0x0) # padding
                 
@@ -692,6 +880,7 @@ for label in label_to_instructions:
                 
                 
             else:
+                print(line)
                 _InstructionError("Unknown Instruction")
                 
                 
@@ -699,6 +888,7 @@ for label in label_to_instructions:
         except IndexError:
             _IndexError("Maformed Instruction")
         except ValueError:
+            print(line)
             _ValueError("Unknown Error")
             
         current_byte_offset += len(bytes)
@@ -707,9 +897,18 @@ for label in label_to_instructions:
         lineNumber+=1
         outputBytes += bytes
         
-
-print(outputBytes)
+if not COMPILE_ERROR:
+    with open("program.py", "w") as f:
+        bytecode  = []   
+        for _, y in enumerate(outputBytes):
+            bytecode.append(str(y))
             
+        prg = "program = [" + ",".join(bytecode) + "]"
+        f.write(prg)
+else:
+    print(f"Compilation Error")     
+    print(f"This is most likely due to invalid macro, please check your code for typos ")           
+    
         
     
     
\ No newline at end of file
diff --git a/c-to-asm.py b/c-to-asm.py
new file mode 100644
index 0000000..55baac0
--- /dev/null
+++ b/c-to-asm.py
@@ -0,0 +1,548 @@
+import re
+import os
+from termcolor import colored
+
+
+class Variable:
+    def __init__(self, name, address, var_type="int"):
+        self.name = name
+        self.address = address
+        self.type = var_type
+
+class Compiler:
+    def __init__(self):
+        # 1024 bytes total: 0x000 to 0x3FF
+        self.data_ptr = 0x400
+        self.variables = {}
+        self.struct_definitions = {}
+        self.in_struct_def = False
+        self.current_struct_name = None
+        self.current_struct_fields = []
+        self.defines = {}  # For #define macros
+        self.typedefs = {} # For typedef
+        self.label_counter = 0
+        self.block_stack = []  # For if/while blocks
+
+    def new_label(self, prefix):
+        lbl = f"{prefix}{self.label_counter}"
+        self.label_counter += 1
+        return lbl
+
+    def preprocess(self, filename):
+        lines = self._read_file_recursive(filename)
+        processed_lines = self._apply_defines(lines)
+        return processed_lines
+
+    def _read_file_recursive(self, filename, included_files=None):
+        if included_files is None:
+            included_files = set()
+
+        if filename in included_files:
+            # Prevent infinite recursion on includes
+            return []
+
+        included_files.add(filename)
+
+        result_lines = []
+        try:
+            with open(filename, "r") as f:
+                for line in f:
+                    line_stripped = line.strip()
+    
+                    # #include "file"
+                    inc_match = re.match(r'#include\s+"([^"]+)"', line_stripped)
+                    if inc_match:
+                        inc_file = inc_match.group(1)
+                        included_content = self._read_file_recursive(inc_file, included_files)
+                        result_lines.extend(included_content)
+                        continue
+    
+                    # #define KEY VALUE
+                    def_match = re.match(r'#define\s+([a-zA-Z_]\w*)\s+(.*)', line_stripped)
+                    if def_match:
+                        key = def_match.group(1)
+                        value = def_match.group(2)
+                        self.defines[key] = value
+                        continue
+    
+                    # typedef oldtype newtype;
+                    tmatch = re.match(r'typedef\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;',     line_stripped)
+                    if tmatch:
+                        oldt = tmatch.group(1)
+                        newt = tmatch.group(2)
+                        # Resolve oldt if it's also a typedef
+                        oldt = self.apply_typedef(oldt)
+                        self.typedefs[newt] = oldt
+                        continue
+    
+                    result_lines.append(line)
+        except FileNotFoundError as e:
+            print(colored(f"{filename}:0: error: {e}", "red"))
+            
+
+        return result_lines
+
+    def _apply_defines(self, lines):
+        
+        token_pattern = re.compile(r'([A-Za-z0-9_]+)')
+
+        processed = []
+        for line in lines:
+            parts = token_pattern.split(line)
+            # parts: tokens and separators
+            for i, part in enumerate(parts):
+                if part in self.defines:
+                    print(f"Replaced {part} with {self.defines[part]}")
+                    part = self.defines[part]
+                parts[i] = part
+            new_line = "".join(parts)
+            processed.append(new_line)
+        return processed
+
+    def apply_typedef(self, t):
+        if t in self.typedefs:
+            return self.typedefs[t]
+        return t
+
+    def allocate_bytes(self, count):
+        start_addr = self.data_ptr - (count - 1)
+        if start_addr < 0x000:
+            raise Exception("Out of memory!")
+        self.data_ptr = start_addr - 1
+        return start_addr
+
+    def allocate_var(self, name, var_type="int"):
+        var_type = self.apply_typedef(var_type)
+        if name in self.variables:
+            return self.variables[name]
+
+        if var_type.startswith("struct:"):
+            sname = var_type.split(":")[1]
+            fields = self.struct_definitions[sname]
+            length = len(fields)  # each 1 byte
+            start_addr = self.allocate_bytes(length)
+            var = Variable(name, start_addr, var_type)
+            self.variables[name] = var
+            return var
+        else:
+            start_addr = self.allocate_bytes(1)
+            var = Variable(name, start_addr, var_type)
+            self.variables[name] = var
+            return var
+
+    def allocate_array(self, name, length, var_type="int"):
+        var_type = self.apply_typedef(var_type)
+        arr_start = self.allocate_bytes(length)
+        var_addr = self.allocate_bytes(1)
+        var = Variable(name, var_addr, "array")
+        self.variables[name] = var
+        return var, arr_start
+
+    def store_string(self, string_value):
+        string_value = string_value.replace('\\n', '\n')
+        length = len(string_value) + 1
+        start_addr = self.allocate_bytes(length)
+        asm = []
+        current_addr = start_addr
+        for ch in string_value:
+            ascii_val = ord(ch)
+            asm.append(f"ldw a, {ascii_val}")
+            asm.append(f"str a, 0x{current_addr:X}")
+            current_addr += 1
+        asm.append("ldw a, 0")
+        asm.append(f"str a, 0x{current_addr:X}")
+        return asm, start_addr
+
+    def get_struct_field_offset(self, struct_type, field_name):
+        sname = struct_type.split(":")[1]
+        fields = self.struct_definitions[sname]
+        for i, (fname, ftype) in enumerate(fields):
+            if fname == field_name:
+                return i
+        raise Exception(f"Field {field_name} not found in {struct_type}")
+
+    def parse_condition(self, cond_str):
+        # cond_str like "a == b" or "a != b"
+        m = re.match(r'([a-zA-Z_]\w*)\s*(==|!=)\s*([a-zA-Z_]\w*)', cond_str.strip())
+        if not m:
+            raise Exception("Unsupported condition: " + cond_str)
+        var1, op, var2 = m.groups()
+        return var1, op, var2
+
+    def compile_condition(self, var1, op, var2):
+        asm = []
+        v1 = self.allocate_var(var1)
+        v2 = self.allocate_var(var2)
+        asm.append(f"ldr a, 0x{v1.address:X}")
+        asm.append(f"ldr b, 0x{v2.address:X}")
+        # a = a - b
+        asm.append("sub a, b")
+        return asm, op
+
+    def extract_comment(self, line):
+        comment_index = line.find('//')
+        if comment_index != -1:
+            code_part = line[:comment_index]
+            comment_part = line[comment_index+2:].strip()
+            return code_part, comment_part
+        return line, None
+
+    def compile_line(self, code_part):
+        line = code_part.strip()
+        asm = []
+
+        if self.in_struct_def:
+            if line.startswith("};"):
+                self.struct_definitions[self.current_struct_name] = self.current_struct_fields
+                self.in_struct_def = False
+                self.current_struct_name = None
+                self.current_struct_fields = []
+                return asm
+            mfield = re.match(r'int\s+([a-zA-Z_]\w*)\s*;', line)
+            if mfield:
+                fname = mfield.group(1)
+                ftype = "int"
+                self.current_struct_fields.append((fname, ftype))
+            return asm
+
+        # struct definition start
+        msd = re.match(r'struct\s+([a-zA-Z_]\w*)\s*\{', line)
+        if msd:
+            self.in_struct_def = True
+            self.current_struct_name = msd.group(1)
+            self.current_struct_fields = []
+            return asm
+
+        # struct var declaration
+        msv = re.match(r'struct\s+([a-zA-Z_]\w*)\s+([a-zA-Z_]\w*)\s*;', line)
+        if msv:
+            sname, varname = msv.groups()
+            var_type = "struct:" + sname
+            self.allocate_var(varname, var_type)
+            return asm
+
+        # if statement
+        mif = re.match(r'if\s*\(([^)]+)\)\s*\{', line)
+        if mif:
+            cond_str = mif.group(1)
+            var1, op, var2 = self.parse_condition(cond_str)
+            end_label = self.new_label("endif")
+            cond_code, cmp_op = self.compile_condition(var1, op, var2)
+            asm.extend(cond_code)
+            # if '==': jump if not zero a != 0
+            # if '!=': jump if zero a == 0
+            if cmp_op == '==':
+                asm.append("bne a, 0, " + end_label)
+            else:
+                asm.append("beq a, 0, " + end_label)
+            self.block_stack.append(('if', end_label))
+            return asm
+
+        # while statement
+        mwhile = re.match(r'while\s*\(([^)]+)\)\s*\{', line)
+        if mwhile:
+            cond_str = mwhile.group(1)
+            var1, op, var2 = self.parse_condition(cond_str)
+            start_label = self.new_label("whilestart")
+            end_label = self.new_label("whileend")
+            asm.append(start_label + ":")
+            cond_code, cmp_op = self.compile_condition(var1, op, var2)
+            asm.extend(cond_code)
+            if cmp_op == '==':
+                asm.append("bne a, 0, " + end_label)
+            else:
+                asm.append("beq a, 0, " + end_label)
+            self.block_stack.append(('while', start_label, end_label))
+            return asm
+
+        # end of block
+        if line == "}":
+            if not self.block_stack:
+                return asm
+            blk = self.block_stack.pop()
+            if blk[0] == 'if':
+                end_label = blk[1]
+                asm.append(end_label + ":")
+            elif blk[0] == 'while':
+                start_label = blk[1]
+                end_label = blk[2]
+                # jump back to start
+                asm.append(f"jmp {start_label}")
+                asm.append(end_label + ":")
+            return asm
+
+        # p.x = number;
+        m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
+        if m:
+            varname, fieldname, value = m.groups()
+            value = int(value)
+            v = self.allocate_var(varname)
+            offset = self.get_struct_field_offset(v.type, fieldname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            if offset != 0:
+                asm.append(f"ldw b, {offset}")
+                asm.append("add a, b")
+            asm.append(f"ldw c, {value}")
+            asm.append("stb c, a")
+            return asm
+
+        # p.x = var + number;
+        m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
+        if m:
+            varname, fieldname, srcvar, number = m.groups()
+            number = int(number)
+            v = self.allocate_var(varname)
+            offset = self.get_struct_field_offset(v.type, fieldname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            if offset != 0:
+                asm.append(f"ldw b, {offset}")
+                asm.append("add a, b")
+            v2 = self.allocate_var(srcvar)
+            asm.append(f"ldr c, 0x{v2.address:X}")
+            asm.append(f"ldw d, {number}")
+            asm.append("add c, d")
+            asm.append("stb c, a")
+            return asm
+
+        # p.x = srcvar;
+        m = re.match(r'([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*;', line)
+        if m:
+            varname, fieldname, srcvar = m.groups()
+            v = self.allocate_var(varname)
+            offset = self.get_struct_field_offset(v.type, fieldname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            if offset != 0:
+                asm.append(f"ldw b, {offset}")
+                asm.append("add a, b")
+            v2 = self.allocate_var(srcvar)
+            asm.append(f"ldr c, 0x{v2.address:X}")
+            asm.append("stb c, a")
+            return asm
+
+        # x = p.x;
+        m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\s*;', line)
+        if m:
+            dst, varname, fieldname = m.groups()
+            v = self.allocate_var(varname)
+            offset = self.get_struct_field_offset(v.type, fieldname)
+            vd = self.allocate_var(dst)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            if offset != 0:
+                asm.append(f"ldw b, {offset}")
+                asm.append("add a, b")
+            asm.append("ldb c, a")
+            asm.append(f"str c, 0x{vd.address:X}")
+            return asm
+
+        # print_int(p.x);
+        m = re.match(r'print_int\(([a-zA-Z_]\w*)\.([a-zA-Z_]\w*)\)\s*;', line)
+        if m:
+            varname, fieldname = m.groups()
+            v = self.allocate_var(varname)
+            offset = self.get_struct_field_offset(v.type, fieldname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            if offset != 0:
+                asm.append(f"ldw b, {offset}")
+                asm.append("add a, b")
+            asm.append("ldb a, a")
+            asm.append("int 0x01")
+            return asm
+
+        # int arr[10];
+        m = re.match(r'int\s+([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
+        if m:
+            varname = m.group(1)
+            length = int(m.group(2))
+            arr_var, start_addr = self.allocate_array(varname, length)
+            asm.append(f"ldw a, 0x{start_addr:X}")
+            asm.append(f"str a, 0x{arr_var.address:X}")
+            return asm
+
+        # int x = number;
+        m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
+        if m:
+            varname = m.group(1)
+            value = int(m.group(2))
+            var = self.allocate_var(varname, "int")
+            asm.append(f"ldw a, {value}")
+            asm.append(f"str a, 0x{var.address:X}")
+            return asm
+
+        # int y = x + number;
+        m = re.match(r'int\s+([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
+        if m:
+            varname, var2, number = m.groups()
+            number = int(number)
+            v1 = self.allocate_var(varname, "int")
+            v2 = self.allocate_var(var2, "int")
+            asm.append(f"ldr a, 0x{v2.address:X}")
+            asm.append(f"ldw b, {number}")
+            asm.append("add a, b")
+            asm.append(f"str a, 0x{v1.address:X}")
+            return asm
+
+        # char *msg = "Hello\n";
+        m = re.match(r'char\s*\*\s*([a-zA-Z_]\w*)\s*=\s*"([^"]*)"\s*;', line)
+        if m:
+            varname, string_val = m.groups()
+            v = self.allocate_var(varname, "char*")
+            code, start_addr = self.store_string(string_val)
+            asm.extend(code)
+            asm.append(f"ldw a, 0x{start_addr:X}")
+            asm.append(f"str a, 0x{v.address:X}")
+            return asm
+
+        # var = number;
+        m = re.match(r'([a-zA-Z_]\w*)\s*=\s*(\d+)\s*;', line)
+        if m:
+            varname, value = m.groups()
+            value = int(value)
+            v = self.allocate_var(varname, "int")
+            asm.append(f"ldw a, {value}")
+            asm.append(f"str a, 0x{v.address:X}")
+            return asm
+
+        # var = var2 + number;
+        m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\s*\+\s*(\d+)\s*;', line)
+        if m:
+            varname, var2, number = m.groups()
+            number = int(number)
+            v1 = self.allocate_var(varname, "int")
+            v2 = self.allocate_var(var2, "int")
+            asm.append(f"ldr a, 0x{v2.address:X}")
+            asm.append(f"ldw b, {number}")
+            asm.append("add a, b")
+            asm.append(f"str a, 0x{v1.address:X}")
+            return asm
+
+        # var[index] = number;
+        m = re.match(r'([a-zA-Z_]\w*)\[(\d+)\]\s*=\s*(\d+)\s*;', line)
+        if m:
+            arr, index, value = m.groups()
+            index = int(index)
+            value = int(value)
+            arr_var = self.allocate_var(arr)
+            asm.append(f"ldr a, 0x{arr_var.address:X}")
+            asm.append(f"ldw b, {index}")
+            asm.append("add a, b")
+            asm.append(f"ldw c, {value}")
+            asm.append("stb c, a")
+            return asm
+
+        # x = arr[index];
+        m = re.match(r'([a-zA-Z_]\w*)\s*=\s*([a-zA-Z_]\w*)\[(\d+)\]\s*;', line)
+        if m:
+            varname, arr, index = m.groups()
+            index = int(index)
+            v = self.allocate_var(varname, "int")
+            arr_var = self.allocate_var(arr)
+            asm.append(f"ldr a, 0x{arr_var.address:X}")
+            asm.append(f"ldw b, {index}")
+            asm.append("add a, b")
+            asm.append("ldb d, a")
+            asm.append(f"str d, 0x{v.address:X}")
+            return asm
+
+        # print_char(var);
+        m = re.match(r'print_char\(([a-zA-Z_]\w*)\)\s*;', line)
+        if m:
+            varname = m.group(1)
+            v = self.allocate_var(varname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            asm.append("int 0x00")
+            return asm
+
+        # print_char(arr[index]);
+        m = re.match(r'print_char\(([a-zA-Z_]\w*)\[(\d+)\]\)\s*;', line)
+        if m:
+            arr, index = m.groups()
+            index = int(index)
+            arr_var = self.allocate_var(arr)
+            asm.append(f"ldr a, 0x{arr_var.address:X}")
+            asm.append(f"ldw b, {index}")
+            asm.append("add a, b")
+            asm.append("ldb a, a")
+            asm.append("int 0x00")
+            return asm
+
+        # print_int(var);
+        m = re.match(r'print_int\(([a-zA-Z_]\w*)\)\s*;', line)
+        if m:
+            varname = m.group(1)
+            v = self.allocate_var(varname)
+            asm.append(f"ldr a, 0x{v.address:X}")
+            asm.append("int 0x01")
+            return asm
+
+        # print_string(var);
+        m = re.match(r'print_string\(([a-zA-Z_]\w*)\)\s*;', line)
+        if m:
+            varname = m.group(1)
+            v = self.allocate_var(varname, "char*")
+            asm.append("ldw d, 0")
+            asm.append(f"ldr b, 0x{v.address:X}")
+            asm.append("ldw c, 1")
+            asm.append("string_loop:")
+            asm.append("ldb a, b")
+            asm.append("beq a, d, string_end")
+            asm.append("int 0x00")
+            asm.append("add b, c")
+            asm.append("jmp string_loop")
+            asm.append("string_end:")
+            return asm
+
+        # return number;
+        m = re.match(r'return\s+(\d+)\s*;', line)
+        if m:
+            asm.append("int 0xFF")
+            return asm
+
+        # Unrecognized line or empty
+        return asm
+
+    def compile_c(self, c_code):
+        # First, parse everything to detect structs and typedef done in preprocess
+        all_lines = c_code.split('\n')
+        # struct definitions might appear outside main
+        for cline in all_lines:
+            self.compile_line(cline)
+
+        # Extract lines inside main
+        lines = []
+        in_main = False
+        for cline in all_lines:
+            cline = cline.rstrip()
+            if 'int main(' in cline:
+                in_main = True
+                continue
+            if in_main:
+                if cline.startswith('}'):
+                    in_main = False
+                    break
+                lines.append(cline)
+
+        asm = ["main:"]
+        for line in lines:
+            code_part, comment_part = self.extract_comment(line)
+            instructions = self.compile_line(code_part)
+            if instructions:
+                for i, instr in enumerate(instructions):
+                    if i == 0 and comment_part:
+                        asm.append(f"    {instr}    ; {comment_part}")
+                    else:
+                        asm.append(f"    {instr}")
+            else:
+                if comment_part:
+                    asm.append(f"    ; {comment_part}")
+
+        return asm
+
+if __name__ == "__main__":
+    compiler = Compiler()
+    preprocessed_lines = compiler.preprocess("main.c")
+    c_code = "\n".join(preprocessed_lines)
+    asm_code = compiler.compile_c(c_code)
+
+    with open("test.asm", "w") as out:
+        for line in asm_code:
+            out.write(line + "\n")
diff --git a/drive8.bin b/drive8.bin
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..77b77dec0732f82c6f724c88a614c8cc9a52fb71 100644
GIT binary patch
literal 68
jcmZQ5Pb^MBMmgD8i79Dtlmll08QDM#l7*@OQ6PB$l2sb4

literal 0
HcmV?d00001

diff --git a/main.asm b/main.asm
index 6610d1c..b69d3d4 100644
--- a/main.asm
+++ b/main.asm
@@ -1,9 +1,12 @@
 ; Initialize text mode
+
+
 main:
-    ldw a, 1        ; Mode: 1 for text mode
+    ldw a, 0        ; Mode: 1 for text mode
     ldw b, 800      ; Horizontal resolution
     ldw c, 600      ; Vertical resolution
     int 0x70        ; Initialize display
+    jsr test
     ldw b, 0        ; Cursor position (character cell index)
     ldw c, 0xFFFFFF ; White color
     
@@ -25,22 +28,8 @@ main_loop:
 
     ; Render the character
     int 0x72        ; Render the character (using the keycode from register 0x0 at position b)
-    
-        ;  Write letter to disk at index   ;push a
-        ;  Write letter to disk at index   ;push b
-        ;  Write letter to disk at index   ;push c
-        ;  Write letter to disk at index   ;push d
-        ;  Write letter to disk at index   ;
-        ;  Write letter to disk at index   ;ldw a, 8        ; disk number
-        ;  Write letter to disk at index   ;ldw b, 0       ; sector number
-        ;  Write letter to disk at index   ;ldr c, 0xEE     ; byte offset
-        ;  Write letter to disk at index   ;ldr d, 0xEF     ; value to write
-        ;  Write letter to disk at index   ;
-        ;  Write letter to disk at index   ;int 0x81        ; Write
-        ;  Write letter to disk at index   ;
-        ;  Write letter to disk at index   ;pop d
-        ;  Write letter to disk at index   ;pop c
-        ;  Write letter to disk at index   ;pop b
-        ;  Write letter to disk at index   ;pop a
-    ; Reset cursor to the top of the screen
     jmp main_loop   ; Jump back to the main loop
+
+
+
+%include "std.asm"
\ No newline at end of file
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..40bf4cc
--- /dev/null
+++ b/main.c
@@ -0,0 +1,12 @@
+int main()
+{
+
+    char *msg = "Welcome!\n";
+    print_string(msg);
+
+    while (1) // Not implemented 
+    {
+    }
+
+    return 0;
+}