// main.cpp
#include <cstdio>
#include <cstdint>
#include <cstring>
#include <sstream>
#include <string>
#include <vector>
#include <chrono>
#include <thread>

// OpenGL/GLFW and GLEW headers.
#include <GL/glew.h>
#include <GLFW/glfw3.h>

// ImGui headers.
#include "imgui.h"
#include "imgui_impl_glfw.h"
#include "imgui_impl_opengl3.h"

#include "out.h"





// --------------------------------------------------------------------------------
// CPU Flags structure (for debugging and visualization)
// --------------------------------------------------------------------------------
struct CPUFlags {
    bool zero;
    bool negative;
    bool carry;
    bool overflow;
};

// --------------------------------------------------------------------------------
// RISC-V Emulator Class.
// This example implements a subset of the RV32I instructions plus ECALL for
// basic system calls that print data to a virtual terminal.
class RiscVEmulator {
public:
    // 32 general-purpose registers and the program counter.
    uint32_t registers[32];
    uint32_t pc;
    // Memory implemented as an array of 32-bit words.
    std::vector<uint32_t> memory;
    // CPU flags for arithmetic operations.
    CPUFlags flags;
    // A flag to halt execution in case of error.
    bool halted;
    // A buffer to collect terminal output (our “screen”).
    std::string terminalOutput;

    // Constructor: allocate memory (memSize in words) and reset the CPU.
    RiscVEmulator(size_t memSize)
        : pc(0), memory(memSize, 0), halted(false)
    {
        reset();
    }

    // Reset CPU state and (optionally) memory.
    void reset() {
        pc = 0;
        std::memset(registers, 0, sizeof(registers));
        flags = { false, false, false, false };
        halted = false;
        terminalOutput.clear();
    }

    // Load a program (vector of 32-bit instructions) into memory.
    void loadProgram(const std::vector<uint32_t>& program) {
        for (size_t i = 0; i < program.size() && i < memory.size(); i++) {
            memory[i] = program[i];
        }
    }

    // Helper to update flags on addition.
    void updateFlagsForAddition(uint32_t op1, uint32_t op2, uint32_t result) {
        flags.zero = (result == 0);
        flags.negative = (static_cast<int32_t>(result) < 0);
        flags.carry = (result < op1);
        bool sign1 = (static_cast<int32_t>(op1) < 0);
        bool sign2 = (static_cast<int32_t>(op2) < 0);
        bool signr = (static_cast<int32_t>(result) < 0);
        flags.overflow = ((sign1 == sign2) && (signr != sign1));
    }

    // Helper to update flags on subtraction.
    void updateFlagsForSubtraction(uint32_t op1, uint32_t op2, uint32_t result) {
        flags.zero = (result == 0);
        flags.negative = (static_cast<int32_t>(result) < 0);
        flags.carry = (op1 < op2);
        bool sign1 = (static_cast<int32_t>(op1) < 0);
        bool sign2 = (static_cast<int32_t>(op2) < 0);
        bool signr = (static_cast<int32_t>(result) < 0);
        flags.overflow = ((sign1 != sign2) && (signr != sign1));
    }

    // Helper: Read a null-terminated string from memory starting at the given address.
    // This function assumes that the string is stored in little-endian order.
    std::string readString(uint32_t address) {
        std::string result;
        while (true) {
            if (address / 4 >= memory.size())
                break;
            uint32_t word = memory[address / 4];
            // Extract 4 bytes from the word.
            for (int i = 0; i < 4; i++) {
                char ch = (char)((word >> (i * 8)) & 0xFF);
                if (ch == '\0') return result;
                result.push_back(ch);
            }
            address += 4;
        }
        return result;
    }

    // Execute one instruction (fetch-decode-execute).
    void step() {
        if (halted) return;
        if (pc % 4 != 0 || (pc / 4) >= memory.size()) {
            halted = true;
            return;
        }
        uint32_t inst = memory[pc / 4];
        uint32_t opcode = inst & 0x7F;

        switch (opcode) {

        // R-type instructions.
        case 0x33: {
            uint32_t rd    = (inst >> 7)  & 0x1F;
            uint32_t rs1   = (inst >> 15) & 0x1F;
            uint32_t rs2   = (inst >> 20) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            uint32_t funct7 = (inst >> 25) & 0x7F;
            switch (funct3) {
            case 0x0:
                if (funct7 == 0x00) { // ADD
                    uint32_t res = registers[rs1] + registers[rs2];
                    registers[rd] = res;
                    updateFlagsForAddition(registers[rs1], registers[rs2], res);
                } else if (funct7 == 0x20) { // SUB
                    uint32_t res = registers[rs1] - registers[rs2];
                    registers[rd] = res;
                    updateFlagsForSubtraction(registers[rs1], registers[rs2], res);
                }
                break;
            case 0x1: // SLL
                registers[rd] = registers[rs1] << (registers[rs2] & 0x1F);
                break;
            case 0x2: // SLT
                registers[rd] = ((int32_t)registers[rs1] < (int32_t)registers[rs2]) ? 1 : 0;
                break;
            case 0x3: // SLTU
                registers[rd] = (registers[rs1] < registers[rs2]) ? 1 : 0;
                break;
            case 0x4: // XOR
                registers[rd] = registers[rs1] ^ registers[rs2];
                break;
            case 0x5:
                if (funct7 == 0x00) { // SRL
                    registers[rd] = registers[rs1] >> (registers[rs2] & 0x1F);
                } else if (funct7 == 0x20) { // SRA
                    registers[rd] = ((int32_t)registers[rs1]) >> (registers[rs2] & 0x1F);
                }
                break;
            case 0x6: // OR
                registers[rd] = registers[rs1] | registers[rs2];
                break;
            case 0x7: // AND
                registers[rd] = registers[rs1] & registers[rs2];
                break;
            }
            pc += 4;
            break;
        }

        // I-type arithmetic instructions.
        case 0x13: {
            uint32_t rd  = (inst >> 7) & 0x1F;
            uint32_t rs1 = (inst >> 15) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            int32_t imm = ((int32_t)inst) >> 20;
            switch (funct3) {
            case 0x0: { // ADDI
                uint32_t res = registers[rs1] + imm;
                registers[rd] = res;
                updateFlagsForAddition(registers[rs1], imm, res);
                break;
            }
            case 0x2: // SLTI
                registers[rd] = (((int32_t)registers[rs1]) < imm) ? 1 : 0;
                break;
            case 0x3: // SLTIU
                registers[rd] = (registers[rs1] < (uint32_t)imm) ? 1 : 0;
                break;
            case 0x4: // XORI
                registers[rd] = registers[rs1] ^ imm;
                break;
            case 0x6: // ORI
                registers[rd] = registers[rs1] | imm;
                break;
            case 0x7: // ANDI
                registers[rd] = registers[rs1] & imm;
                break;
            case 0x1: { // SLLI
                uint32_t shamt = imm & 0x1F;
                registers[rd] = registers[rs1] << shamt;
                break;
            }
            case 0x5: {
                uint32_t shamt = imm & 0x1F;
                uint32_t imm_high = (inst >> 25) & 0x7F;
                if (imm_high == 0x00) { // SRLI
                    registers[rd] = registers[rs1] >> shamt;
                } else if (imm_high == 0x20) { // SRAI
                    registers[rd] = ((int32_t)registers[rs1]) >> shamt;
                }
                break;
            }
            }
            pc += 4;
            break;
        }

        // Branch instructions (B-type).
        case 0x63: {
            uint32_t rs1 = (inst >> 15) & 0x1F;
            uint32_t rs2 = (inst >> 20) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            int32_t imm = (((inst >> 31) & 0x1) << 12) |
                          (((inst >> 25) & 0x3F) << 5) |
                          (((inst >> 8) & 0xF) << 1) |
                          (((inst >> 7) & 0x1) << 11);
            if (imm & (1 << 12))
                imm |= 0xFFFFE000;
            switch (funct3) {
            case 0x0: // BEQ
                pc = (registers[rs1] == registers[rs2]) ? (pc + imm) : (pc + 4);
                break;
            case 0x1: // BNE
                pc = (registers[rs1] != registers[rs2]) ? (pc + imm) : (pc + 4);
                break;
            case 0x4: // BLT
                pc = (((int32_t)registers[rs1]) < ((int32_t)registers[rs2])) ? (pc + imm) : (pc + 4);
                break;
            case 0x5: // BGE
                pc = (((int32_t)registers[rs1]) >= ((int32_t)registers[rs2])) ? (pc + imm) : (pc + 4);
                break;
            case 0x6: // BLTU
                pc = (registers[rs1] < registers[rs2]) ? (pc + imm) : (pc + 4);
                break;
            case 0x7: // BGEU
                pc = (registers[rs1] >= registers[rs2]) ? (pc + imm) : (pc + 4);
                break;
            }
            break;
        }

        // JAL: J-type jump.
        case 0x6F: {
            uint32_t rd = (inst >> 7) & 0x1F;
            int32_t imm = (((inst >> 31) & 0x1) << 20) |
                          (((inst >> 21) & 0x3FF) << 1) |
                          (((inst >> 20) & 0x1) << 11) |
                          (((inst >> 12) & 0xFF) << 12);
            if (imm & (1 << 20))
                imm |= 0xFFF00000;
            registers[rd] = pc + 4;
            pc += imm;
            break;
        }

        // JALR: I-type jump.
        case 0x67: {
            uint32_t rd = (inst >> 7)  & 0x1F;
            uint32_t rs1 = (inst >> 15) & 0x1F;
            int32_t imm = ((int32_t)inst) >> 20;
            uint32_t temp = pc + 4;
            pc = (registers[rs1] + imm) & ~1;
            registers[rd] = temp;
            break;
        }

        // LUI: U-type.
        case 0x37: {
            uint32_t rd = (inst >> 7) & 0x1F;
            registers[rd] = inst & 0xFFFFF000;
            pc += 4;
            break;
        }

        // AUIPC: U-type.
        case 0x17: {
            uint32_t rd = (inst >> 7) & 0x1F;
            registers[rd] = pc + (inst & 0xFFFFF000);
            pc += 4;
            break;
        }

        // Load instructions (LW only).
        case 0x03: {
            uint32_t rd = (inst >> 7)  & 0x1F;
            uint32_t rs1 = (inst >> 15) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            int32_t imm = ((int32_t)inst) >> 20;
            if (funct3 == 0x2) { // LW
                uint32_t addr = registers[rs1] + imm;
                if (addr % 4 == 0 && (addr / 4) < memory.size())
                    registers[rd] = memory[addr / 4];
            }
            pc += 4;
            break;
        }

        // Store instructions (SW only).
        case 0x23: {
            uint32_t rs1 = (inst >> 15) & 0x1F;
            uint32_t rs2 = (inst >> 20) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            int32_t imm = (((inst >> 25) & 0x7F) << 5) | ((inst >> 7) & 0x1F);
            if (imm & 0x800) imm |= 0xFFFFF000;
            if (funct3 == 0x2) { // SW
                uint32_t addr = registers[rs1] + imm;
                if (addr % 4 == 0 && (addr / 4) < memory.size())
                    memory[addr / 4] = registers[rs2];
            }
            pc += 4;
            break;
        }

        // ECALL: Opcode 0x73 (environment call).
        // This branch simulates interrupts/system calls to “print” data to our terminal.
        case 0x73: {
            // For ECALL, we follow a simple convention:
            //   - Register x17 (a7) holds the system call number.
            //   - Register x10 (a0) holds the argument.
            // Conventions used below:
            //   1: Print integer (from x10).
            //   2: Print character (the low-order 8 bits of x10).
            //   3: Print string (address pointer stored in x10).
            uint32_t funct3 = (inst >> 12) & 0x7;
            if (funct3 == 0) {
                uint32_t syscall = registers[17]; // a7
                switch (syscall) {
                case 1: { // Print integer.
                    int value = (int) registers[10]; // a0
                    terminalOutput += std::to_string(value) + "\n";
                    break;
                }
                case 2: { // Print character.
                    char ch = (char)(registers[10] & 0xFF);
                    terminalOutput.push_back(ch);
                    break;
                }
                case 3: { // Print string (pointer in a0).
                    std::string str = readString(registers[10]);
                    terminalOutput += str;
                    break;
                }
                default:
                    terminalOutput += "Unknown ECALL: " + std::to_string(syscall) + "\n";
                    break;
                }
            }
            pc += 4;
            break;
        }

        default:
            // Unknown opcode – halt the CPU.
            halted = true;
            break;
        }

        // Enforce that register x0 is always 0.
        registers[0] = 0;
    }

    // Disassemble an instruction at the given address.
    std::string disassembleInstruction(uint32_t address) {
        if (address % 4 != 0 || (address / 4) >= memory.size())
            return "Invalid address";
        uint32_t inst = memory[address / 4];
        uint32_t opcode = inst & 0x7F;
        std::stringstream ss;
        ss << "0x" << std::hex << inst << std::dec << "  ";
        switch (opcode) {
        case 0x33: { // R-type
            uint32_t rd = (inst >> 7)  & 0x1F;
            uint32_t rs1 = (inst >> 15) & 0x1F;
            uint32_t rs2 = (inst >> 20) & 0x1F;
            uint32_t funct3 = (inst >> 12) & 0x7;
            uint32_t funct7 = (inst >> 25) & 0x7F;
            if (funct3 == 0x0 && funct7 == 0x00)
                ss << "ADD  x" << rd << ", x" << rs1 << ", x" << rs2;
            else if (funct3 == 0x0 && funct7 == 0x20)
                ss << "SUB  x" << rd << ", x" << rs1 << ", x" << rs2;
            else
                ss << "R-type (0x" << std::hex << inst << ")";
            break;
        }
        case 0x13:
            ss << "ADDI (or other I-type)";
            break;
        case 0x63:
            ss << "Branch/Compare";
            break;
        case 0x6F:
            ss << "JAL";
            break;
        case 0x67:
            ss << "JALR";
            break;
        case 0x37:
            ss << "LUI";
            break;
        case 0x17:
            ss << "AUIPC";
            break;
        case 0x03:
            ss << "LW";
            break;
        case 0x23:
            ss << "SW";
            break;
        case 0x73:
            ss << "ECALL";
            break;
        default:
            ss << "Unknown opcode";
            break;
        }
        return ss.str();
    }
};

// --------------------------------------------------------------------------------
// Main: OpenGL/GLFW + ImGui Debugger and Terminal UI.
// --------------------------------------------------------------------------------
int main(int, char**) {
    // Initialize GLFW.
    if (!glfwInit()) {
        fprintf(stderr, "Failed to initialize GLFW\n");
        return 1;
    }
    const char* glsl_version = "#version 130";
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 0);
    GLFWwindow* window = glfwCreateWindow(1280, 720, "RISC-V Emulator with Terminal", NULL, NULL);
    if (window == nullptr)
        return 1;
    glfwMakeContextCurrent(window);
    glfwSwapInterval(1);

    // Initialize ImGui.
    IMGUI_CHECKVERSION();
    ImGui::CreateContext();
    ImGuiIO& io = ImGui::GetIO(); (void)io;
    ImGui::StyleColorsDark();
    ImGui_ImplGlfw_InitForOpenGL(window, true);
    ImGui_ImplOpenGL3_Init(glsl_version);

    // Create the emulator instance with 1024 words of memory.
    RiscVEmulator emulator(1024);


    emulator.loadProgram(program_data);

    // Debugger control flags.
    bool running = false;
    double lastStepTime = glfwGetTime();
    const double stepInterval = 0.1; // Auto-step every 0.1 sec in "running" mode.

    // Main loop.
    while (!glfwWindowShouldClose(window)) {
        glfwPollEvents();
        double currentTime = glfwGetTime();
        if (running && (currentTime - lastStepTime) >= stepInterval && !emulator.halted) {
            emulator.step();
            lastStepTime = currentTime;
        }

        ImGui_ImplOpenGL3_NewFrame();
        ImGui_ImplGlfw_NewFrame();
        ImGui::NewFrame();

        // CPU Control Panel.
        ImGui::Begin("CPU Control");
        if (ImGui::Button("Reset")) {
            emulator.reset();
            emulator.loadProgram(program);
            running = false;
        }
        ImGui::SameLine();
        if (ImGui::Button("Start"))
            running = true;
        ImGui::SameLine();
        if (ImGui::Button("Stop"))
            running = false;
        ImGui::SameLine();
        if (ImGui::Button("Step") && !emulator.halted)
            emulator.step();
        ImGui::Text("PC: 0x%08X", emulator.pc);
        if (emulator.halted)
            ImGui::TextColored(ImVec4(1, 0, 0, 1), "CPU Halted");
        ImGui::Separator();

        // Registers display.
        if (ImGui::BeginTable("Registers", 4, ImGuiTableFlags_Borders | ImGuiTableFlags_RowBg)) {
            ImGui::TableSetupColumn("Reg");
            ImGui::TableSetupColumn("Value");
            ImGui::TableSetupColumn("Reg");
            ImGui::TableSetupColumn("Value");
            ImGui::TableHeadersRow();
            for (int i = 0; i < 32; i += 2) {
                ImGui::TableNextRow();
                ImGui::TableSetColumnIndex(0);
                ImGui::Text("x%d", i);
                ImGui::TableSetColumnIndex(1);
                ImGui::Text("0x%08X", emulator.registers[i]);
                ImGui::TableSetColumnIndex(2);
                ImGui::Text("x%d", i+1);
                ImGui::TableSetColumnIndex(3);
                ImGui::Text("0x%08X", emulator.registers[i+1]);
            }
            ImGui::EndTable();
        }
        ImGui::Text("Flags: Z[%d] N[%d] C[%d] O[%d]",
            emulator.flags.zero, emulator.flags.negative,
            emulator.flags.carry, emulator.flags.overflow);
        ImGui::Separator();

        // Disassembly view.
        ImGui::BeginChild("Disassembly", ImVec2(0, 150), true);
        ImGui::Text("Disassembly:");
        for (int i = 0; i < 10; i++) {
            uint32_t addr = emulator.pc + i * 4;
            std::string line = emulator.disassembleInstruction(addr);
            if (i == 0)
                ImGui::TextColored(ImVec4(1, 1, 0, 1), "0x%08X: %s", addr, line.c_str());
            else
                ImGui::Text("0x%08X: %s", addr, line.c_str());
        }
        ImGui::EndChild();
        ImGui::End();

        // Memory Viewer in a grid.
        ImGui::Begin("Memory Viewer");
        const int columns = 16, rows = 16;
        if (ImGui::BeginTable("MemoryGrid", columns, ImGuiTableFlags_Borders)) {
            for (int col = 0; col < columns; col++) {
                ImGui::TableSetupColumn("");
            }
            ImGui::TableHeadersRow();
            int wordIndex = 0;
            for (int r = 0; r < rows; r++) {
                ImGui::TableNextRow();
                for (int c = 0; c < columns; c++) {
                    ImGui::TableSetColumnIndex(c);
                    if (wordIndex < emulator.memory.size())
                        ImGui::Text("0x%08X", emulator.memory[wordIndex]);
                    else
                        ImGui::Text("----");
                    wordIndex++;
                }
            }
            ImGui::EndTable();
        }
        ImGui::End();

        // Terminal screen to display ECALL outputs.
        ImGui::Begin("Terminal");
        // Optionally, you can add a "Clear" button.
        if (ImGui::Button("Clear"))
            emulator.terminalOutput.clear();
        ImGui::Separator();
        // Display the terminal output in a scrolling region.
        ImGui::BeginChild("ScrollingRegion", ImVec2(0, 150), false, ImGuiWindowFlags_HorizontalScrollbar);
        ImGui::TextUnformatted(emulator.terminalOutput.c_str());
        ImGui::EndChild();
        ImGui::End();

        // Rendering.
        ImGui::Render();
        int display_w, display_h;
        glfwGetFramebufferSize(window, &display_w, &display_h);
        glViewport(0, 0, display_w, display_h);
        glClearColor(0.45f, 0.55f, 0.60f, 1.00f);
        glClear(GL_COLOR_BUFFER_BIT);
        ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
        glfwSwapBuffers(window);
    }
    
    // Cleanup.
    ImGui_ImplOpenGL3_Shutdown();
    ImGui_ImplGlfw_Shutdown();
    ImGui::DestroyContext();
    glfwDestroyWindow(window);
    glfwTerminate();
    return 0;
}