diff --git a/docs/debug_mode.md b/docs/debug_mode.md new file mode 100644 index 0000000000..b88e6b5272 --- /dev/null +++ b/docs/debug_mode.md @@ -0,0 +1,193 @@ +# Vortex Debug Mode with GDB + +This guide explains how to debug Vortex programs using GDB and OpenOCD with the RISC-V debug interface. + +## Prerequisites + +Before running the debugger, ensure you have the following dependencies installed: + +- **OpenOCD**: Open On-Chip Debugger for JTAG communication +- **RISC-V GDB**: RISC-V cross-debugger (part of RISC-V toolchain, e.g., `riscv64-unknown-elf-gdb`) +- **Build tools**: Make and C++ compiler (g++) + +## Building the Simulator and Kernel Files + +### Building the Simulator + +The simulator must be built with `XLEN=64` to support 64-bit RISC-V binaries (including double-precision floating point): + +```bash +cd /vortex +cd build/sim/simx +make clean +make XLEN=64 +``` + +This builds the simulator with: +- **XLEN=64**: 64-bit integer registers +- **EXT_D enabled**: Double-precision floating point support (FLEN=64) +- **RISC-V Debug Module**: Full debug interface support + +### Building the Kernel Library + +The kernel library (`libvortex.a`) must be built with the same `XLEN` value as the simulator. For 64-bit support: + +```bash +cd /vortex/build/kernel +make clean +make XLEN=64 +``` + +This builds the kernel library that provides system calls, startup code, and runtime support for Vortex programs. + +### Building Test Binaries + +All test binaries must also be built with `XLEN=64` to match the simulator and kernel library: + +```bash +# Build a specific test (e.g., fibonacci) +cd /vortex/build/tests/kernel/fibonacci +make clean +make XLEN=64 + +# Or build all kernel tests +cd /vortex/build/tests/kernel +for dir in */; do + cd "$dir" + make clean + make XLEN=64 + cd .. +done +``` + +**Important:** The `XLEN` value must be consistent across: +- Simulator (`build/sim/simx`) +- Kernel library (`build/kernel`) +- All test binaries (`build/tests/kernel/*`) + +Mismatched `XLEN` values will cause linker errors or runtime failures. + +## Quick Start: Debugging Fibonacci + +### Step 1: Start Simulator in Debug Mode + +```bash +cd /vortex +./build/sim/simx/simx -d build/tests/kernel/fibonacci/fibonacci.bin +``` + +For verbose debug logging (optional, shows detailed debug module operations): +```bash +./build/sim/simx/simx -d -V 9824 build/tests/kernel/fibonacci/fibonacci.bin +``` + +The simulator starts halted, waiting for a debugger connection. + +### Step 2: Start OpenOCD + +```bash +openocd -f vortex.cfg +``` + +**Note:** `vortex.cfg` uses port 9824. If using default port 9823, either: +- Start simulator with `-p 9824`, or +- Update `vortex.cfg` to use port 9823 + +### Step 3: Connect GDB + +```bash +riscv64-unknown-elf-gdb build/tests/kernel/fibonacci/fibonacci.elf +``` + +In GDB: +``` +(gdb) target remote localhost:3333 +(gdb) monitor reset halt +(gdb) set $pc = 0x80000000 +(gdb) break main +(gdb) continue +``` + +## Common GDB Commands + +```bash +# Breakpoints +(gdb) break main +(gdb) break fibonacci +(gdb) break main.cpp:16 + +# Execution control +(gdb) continue # Continue execution +(gdb) step # Step into function +(gdb) next # Step over function +(gdb) stepi # Step one instruction +(gdb) nexti # Next instruction + +# Inspection +(gdb) print variable +(gdb) info registers +(gdb) x/10i $pc # Disassemble 10 instructions +(gdb) x/s 0x80005740 # Print string at address +``` + +## Command-Line Options + +```bash +./build/sim/simx/simx [options] + +Options: + -d Enable debug mode + -p Remote bitbang port (default: 9823) + -V Enable verbose debug module logging (shows detailed debug operations) + -c Number of cores + -w Number of warps per core + -t Number of threads per warp +``` + +**Note:** The `-V` flag enables verbose logging from the debug module, which shows detailed information about register accesses, memory operations, and debug commands. This is useful for debugging the debugger itself, but can produce a lot of output. Use it when you need to see what the debug module is doing internally. + +## Key Addresses (Fibonacci Binary) + +| Address | Function/Data | +|---------|---------------| +| 0x80000000 | `_start` (entry point) | +| 0x80000094 | `fibonacci()` | +| 0x80000114 | `main()` | +| 0x800001ac | `init_regs()` (final PC) | +| 0x80005740 | `"fibonacci(%d) = %d\n"` | +| 0x80005754 | `"Passed!\n"` | +| 0x8000575c | `"Failed! value=%d, expected=%d\n"` | + +## Troubleshooting + +**OpenOCD can't connect:** +- Verify simulator is running with `-d` flag +- Check port numbers match (default 9823, config uses 9824) +- Check simulator output for "Remote bitbang server ready" + +## Example Session + +```bash +# Terminal 1 (add -V for verbose debug logging) +./build/sim/simx/simx -d -p 9824 build/tests/kernel/fibonacci/fibonacci.bin + +# Terminal 2 +openocd -f vortex.cfg + +# Terminal 3 +riscv64-unknown-elf-gdb +(gdb) target remote localhost:3333 +(gdb) stepi +(gdb) b *0x80000094 +(gdb) continue +(gdb) i r +(gdb) continue +Continuing. +Program Stopped +``` + +## Additional Resources + +- [RISC-V Debug Specification](https://github.com/riscv/riscv-debug-spec) +- [OpenOCD Documentation](http://openocd.org/doc/html/index.html) +- [GDB User Manual](https://sourceware.org/gdb/current/onlinedocs/gdb/) diff --git a/hw/rtl/tcu/VX_tcu_fedp_bhf.sv b/hw/rtl/tcu/VX_tcu_fedp_bhf.sv index d11cfc0f07..2f71e2398e 100644 --- a/hw/rtl/tcu/VX_tcu_fedp_bhf.sv +++ b/hw/rtl/tcu/VX_tcu_fedp_bhf.sv @@ -47,12 +47,16 @@ module VX_tcu_fedp_bhf #( wire [TCK-1:0][15:0] a_row16; wire [TCK-1:0][15:0] b_col16; + wire [N-1:0][18:0] a_row_tf32; + wire [N-1:0][18:0] b_col_tf32; for (genvar i = 0; i < N; i++) begin : g_unpack assign a_row16[2*i] = a_row[i][15:0]; assign a_row16[2*i+1] = a_row[i][31:16]; assign b_col16[2*i] = b_col[i][15:0]; assign b_col16[2*i+1] = b_col[i][31:16]; + assign a_row_tf32[i] = a_row[i][18:0]; + assign b_col_tf32[i] = b_col[i][18:0]; end // Transprecision Multiply @@ -75,6 +79,7 @@ module VX_tcu_fedp_bhf #( for (genvar i = 0; i < TCK; i++) begin : g_prod wire [32:0] mult_result_fp16; wire [32:0] mult_result_bf16; + wire [32:0] mult_result_tf32; // FP16 multiplication VX_tcu_bhf_fmul #( @@ -118,11 +123,37 @@ module VX_tcu_fedp_bhf #( `UNUSED_PIN(fflags) ); + if ((i % 2) == 0) begin : g_tf32_even + localparam int TF32_IDX = i / 2; + VX_tcu_bhf_fmul #( + .IN_EXPW (8), + .IN_SIGW (10+1), + .OUT_EXPW(8), + .OUT_SIGW(24), + .IN_REC (0), // TF32 stored in IEEE-like format + .OUT_REC (1), // output in recoded format + .MUL_LATENCY (FMUL_LATENCY), + .RND_LATENCY (FRND_LATENCY) + ) tf32_mul ( + .clk (clk), + .reset (reset), + .enable (enable), + .frm (frm), + .a (a_row_tf32[TF32_IDX]), + .b (b_col_tf32[TF32_IDX]), + .y (mult_result_tf32), + `UNUSED_PIN(fflags) + ); + end else begin : g_tf32_odd + assign mult_result_tf32 = '0; + end + logic [32:0] mult_result_mux; always_comb begin case(fmt_s_delayed) 3'd1: mult_result_mux = mult_result_fp16; 3'd2: mult_result_mux = mult_result_bf16; + 3'd3: mult_result_mux = mult_result_tf32; default: mult_result_mux = 'x; endcase end diff --git a/hw/rtl/tcu/VX_tcu_pkg.sv b/hw/rtl/tcu/VX_tcu_pkg.sv index bd82aaa309..e9917d42d7 100644 --- a/hw/rtl/tcu/VX_tcu_pkg.sv +++ b/hw/rtl/tcu/VX_tcu_pkg.sv @@ -29,6 +29,7 @@ package VX_tcu_pkg; localparam TCU_FP32_ID = 0; localparam TCU_FP16_ID = 1; localparam TCU_BF16_ID = 2; + localparam TCU_TF32_ID = 3; localparam TCU_I32_ID = 8; localparam TCU_I8_ID = 9; localparam TCU_U8_ID = 10; @@ -82,18 +83,23 @@ package VX_tcu_pkg; // Tracing info `ifdef SIMULATION - task trace_fmt(input int level, input [3:0] fmt); + function automatic string fmt_string(input [3:0] fmt); case (fmt) - TCU_FP32_ID: `TRACE(level, ("fp32")) - TCU_FP16_ID: `TRACE(level, ("fp16")) - TCU_BF16_ID: `TRACE(level, ("bf16")) - TCU_I32_ID: `TRACE(level, ("i32")) - TCU_I8_ID: `TRACE(level, ("i8")) - TCU_U8_ID: `TRACE(level, ("u8")) - TCU_I4_ID: `TRACE(level, ("i4")) - TCU_U4_ID: `TRACE(level, ("u4")) - default: `TRACE(level, ("?")) + TCU_FP32_ID: fmt_string = "fp32"; + TCU_FP16_ID: fmt_string = "fp16"; + TCU_BF16_ID: fmt_string = "bf16"; + TCU_TF32_ID: fmt_string = "tf32"; + TCU_I32_ID: fmt_string = "i32"; + TCU_I8_ID: fmt_string = "i8"; + TCU_U8_ID: fmt_string = "u8"; + TCU_I4_ID: fmt_string = "i4"; + TCU_U4_ID: fmt_string = "u4"; + default: fmt_string = "unknown"; endcase + endfunction + + task trace_fmt(input int level, input [3:0] fmt); + `TRACE(level, (fmt_string(fmt))) endtask task trace_ex_op(input int level, diff --git a/sim/common/tensor_cfg.h b/sim/common/tensor_cfg.h index 9622ba922a..556bc6838d 100644 --- a/sim/common/tensor_cfg.h +++ b/sim/common/tensor_cfg.h @@ -43,6 +43,13 @@ struct bf16 { static constexpr const char* name = "bf16"; }; +struct tf32 { + using dtype = uint32_t; + static constexpr uint32_t id = 3; + static constexpr uint32_t bits = 32; + static constexpr const char* name = "tf32"; +}; + struct int32 { using dtype = int32_t; static constexpr uint32_t id = 8; @@ -83,12 +90,13 @@ inline const char* fmt_string(uint32_t fmt) { case fp32::id: return fp32::name; case fp16::id: return fp16::name; case bf16::id: return bf16::name; + case tf32::id: return tf32::name; case int32::id: return int32::name; case int8::id: return int8::name; case uint8::id: return uint8::name; case int4::id: return int4::name; case uint4::id: return uint4::name; - default: return ""; + default: return "unknown"; } } diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 1eca622701..4efec311f8 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -26,6 +26,7 @@ SRCS += $(SRC_DIR)/decode.cpp $(SRC_DIR)/opc_unit.cpp $(SRC_DIR)/dispatcher.cpp SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +SRCS += $(SRC_DIR)/jtag_dtm.cpp $(SRC_DIR)/debug_module.cpp $(SRC_DIR)/remote_bitbang.cpp # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) diff --git a/sim/simx/cluster.h b/sim/simx/cluster.h index d31aa1672b..213eebf630 100644 --- a/sim/simx/cluster.h +++ b/sim/simx/cluster.h @@ -69,6 +69,10 @@ class Cluster : public SimObject { PerfStats perf_stats() const; + const std::vector& sockets() const { + return sockets_; + } + private: uint32_t cluster_id_; ProcessorImpl* processor_; diff --git a/sim/simx/core.h b/sim/simx/core.h index a8b674d0bb..aa938ac1df 100644 --- a/sim/simx/core.h +++ b/sim/simx/core.h @@ -159,6 +159,10 @@ class Core : public SimObject { return emulator_.dcache_write(data, addr, size); } + Emulator& emulator() { + return emulator_; + } + #ifdef EXT_TCU_ENABLE TensorUnit::Ptr& tensor_unit() { return tensor_unit_; diff --git a/sim/simx/debug_module.cpp b/sim/simx/debug_module.cpp new file mode 100644 index 0000000000..6c1f406323 --- /dev/null +++ b/sim/simx/debug_module.cpp @@ -0,0 +1,1126 @@ +#include "debug_module.h" +#include +#include +#include +#include "emulator.h" +#include +#include + +namespace { + +std::atomic g_debug_module_verbose{false}; + +void dm_log(const char* fmt, ...) { + if (!g_debug_module_verbose.load(std::memory_order_relaxed)) { + return; + } + va_list args; + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); +} + +} + +// Enables or disables verbose logging for debug module operations. +// Use case: Used to control debug output during development and troubleshooting. +void DebugModule::set_verbose_logging(bool enable) { + g_debug_module_verbose.store(enable, std::memory_order_relaxed); +} + +bool DebugModule::verbose_logging() { + return g_debug_module_verbose.load(std::memory_order_relaxed); +} + +// Constructor: Initializes the RISC-V Debug Module with a simulated memory space. +// Use case: Creates a debug module instance that implements the RISC-V Debug Specification 0.13. +DebugModule::DebugModule(vortex::Emulator* emulator, size_t mem_size) + : emulator_(emulator), + command(0), + resumereq_prev(false), + data1(0), + data2(0), + data3(0), + memory(mem_size, 0), + access_mem_addr(0), + halt_requested_(false), + single_step_active_(false), + debug_mode_enabled_(false) +{ + for (unsigned i = 0; i < datacount; i++) { + dmdata[i] = 0; + } + + reset(); +} + +// Resets the debug module to its initial state: clears all registers and resets the hart. +// Use case: Called when dmactive is set from 0 to 1, or during initialization. +void DebugModule::reset() +{ + dmcontrol = dmcontrol_t(); + dmstatus = dmstatus_t(); + abstractcs = abstractcs_t(); + + // Initialize debug state + dcsr_ = DCSR(); + dpc_ = 0; + resumeack_ = false; + havereset_ = false; + is_halted_ = false; + + dmcontrol.dmactive = true; + dmstatus.authenticated = true; + dmstatus.authbusy = false; + dmstatus.version = 2; + // Set XLEN support bits: sr32, sr64, sr128 (bits 20, 21, 22) + // OpenOCD checks these FIRST to determine XLEN + dmstatus.sr32 = (XLEN == 32); + dmstatus.sr64 = (XLEN == 64); + dmstatus.sr128 = false; // No 128-bit support + + dmstatus.allnonexistent = false; + dmstatus.anynonexistent = false; + dmstatus.allunavail = false; + dmstatus.anyunavail = false; + + access_mem_addr = 0; + access_mem_addr_valid = false; + + update_dmstatus(); + + dmstatus.authenticated = true; + dmstatus.authbusy = false; +} + +// Updates the dmstatus register fields based on current hart state. +// Use case: Called before reading dmstatus to ensure it reflects current hart state (halted/running/etc.). +// Preserves authentication state which must remain true for OpenOCD compatibility. +void DebugModule::update_dmstatus() +{ + bool saved_authenticated = dmstatus.authenticated; + bool saved_authbusy = dmstatus.authbusy; + unsigned saved_version = dmstatus.version; + + // Check running state - emulator notifies us when program completes, so we just check our flags + if (is_halted_ || halt_requested_) { + dmstatus.allhalted = true; + dmstatus.anyhalted = true; + dmstatus.allrunning = false; + dmstatus.anyrunning = false; + } else { + dmstatus.allhalted = false; + dmstatus.anyhalted = false; + dmstatus.allrunning = true; + dmstatus.anyrunning = true; + } + + dmstatus.allresumeack = resumeack_; + dmstatus.anyresumeack = resumeack_; + dmstatus.allhavereset = havereset_; + dmstatus.anyhavereset = havereset_; + + // Check if selected hartsel (thread) is valid (0-31) + // In our implementation, we have 32 threads, so hartsel must be 0-31 + unsigned thread_id = dmcontrol.hartsel & 0x1F; + bool hart_exists = (thread_id < 32); // We support threads 0-31 + + dmstatus.allnonexistent = !hart_exists; + dmstatus.anynonexistent = !hart_exists; + dmstatus.allunavail = false; + dmstatus.anyunavail = false; + + + dmstatus.authenticated = saved_authenticated; + dmstatus.authbusy = saved_authbusy; + dmstatus.version = saved_version; +} + +// Reads a value from a DMI (Debug Module Interface) register by address. +// Use case: Called by JTAG DTM to read debug module registers (dmcontrol, dmstatus, abstractcs, etc.). +// Returns true on success, false for unimplemented addresses. +bool DebugModule::dmi_read(unsigned address, uint32_t *value) +{ + switch (address) { + case DM_DMCONTROL: + *value = read_dmcontrol(); + break; + case DM_DMSTATUS: { + update_dmstatus(); + *value = read_dmstatus(); + // Log DMSTATUS reads to help debug thread discovery + unsigned current_thread = dmcontrol.hartsel & 0x1F; + bool exists = (current_thread < 32); + // dm_log("[DM] DMSTATUS read: hartsel=0x%x (thread=%u), anynonexistent=%d, value=0x%08x\n", + // dmcontrol.hartsel, current_thread, dmstatus.anynonexistent ? 1 : 0, *value); + + + if (exists) { + *value &= ~((1U << 14) | (1U << 15)); // Clear anynonexistent and allnonexistent bits + } + + // Ensure authenticated bit (bit 7) is always set - critical for OpenOCD compatibility + if ((*value & (1U << 7)) == 0) { + dm_log("[DM] ERROR: authenticated bit (bit 7) not set! value=0x%x\n", *value); + *value |= (1U << 7); + } + break; + } + case DM_HARTINFO: + // Hart info: nscratch=1, dataaccess=1, datasize=datacount, dataaddr=0x380 + *value = (1 << 20) | (1 << 19) | (datacount << 16) | 0x380; + break; + case DM_ABSTRACTCS: + *value = read_abstractcs(); + break; + case DM_COMMAND: + *value = 0; + break; + case DM_ABSTRACTAUTO: + *value = 0; + break; + case DM_DATA0: + *value = read_data0(); + break; + case 0x5: // DATA1 + *value = data1; + dm_log("[DM] DATA1 read: 0x%08x\n", data1); + break; + case 0x6: // DATA2 + *value = data2; + break; + case 0x7: // DATA3 + *value = data3; + break; + case DM_AUTHDATA: + *value = read_authdata(); + break; + case DM_SBCS: + // System Bus Control and Status: return 0 to indicate no system bus access available + // This is optional functionality, so returning 0 is acceptable + *value = 0; + break; + default: + *value = 0; + dm_log("[DM] DMI READ addr=0x%x -> 0x%x (unimplemented)\n", address, *value); + return false; + } + + // dm_log("[DM] DMI READ addr=0x%x -> 0x%x\n", address, *value); + return true; +} + +// Writes a value to a DMI (Debug Module Interface) register by address. +// Use case: Called by JTAG DTM to write debug module registers (dmcontrol, command, data0, etc.). +// Returns true on success, false for unimplemented addresses. +bool DebugModule::dmi_write(unsigned address, uint32_t value) +{ + dm_log("[DM] DMI WRITE addr=0x%x data=0x%x\n", address, value); + + switch (address) { + case DM_DMCONTROL: + return write_dmcontrol(value); + case DM_COMMAND: + return write_command(value); + case DM_DATA0: + return write_data0(value); + case 0x5: // DATA1 + data1 = value; + dm_log("[DM] DATA1 written: 0x%08x\n", value); + return true; + case 0x6: // DATA2 + data2 = value; + dm_log("[DM] DATA2 written: 0x%08x\n", value); + return true; + case 0x7: // DATA3 + data3 = value; + dm_log("[DM] DATA3 written: 0x%08x\n", value); + return true; + case DM_AUTHDATA: + return write_authdata(value); + case DM_ABSTRACTAUTO: + // Auto-execute not implemented in stub + return true; + case DM_ABSTRACTCS: + // Clear command error if writing 1 to error bits (bits [10:8]) + if (value & (7 << 8)) { + abstractcs.cmderr = 0; + } + return true; + case DM_SBCS: + // System Bus Control and Status: accept writes but do nothing (no system bus access) + return true; + default: + dm_log("[DM] DMI WRITE addr=0x%x unimplemented\n", address); + return false; + } +} + +// Reads the dmcontrol register, encoding all control fields into a 32-bit value. +// Use case: Returns the current debug module control state (dmactive, haltreq, resumereq, hartsel, etc.). +uint32_t DebugModule::read_dmcontrol() +{ + uint32_t result = 0; + result = set_field_pos(result, 0x1U, 0, dmcontrol.dmactive ? 1U : 0U); + result = set_field_pos(result, 0x1U, 1, dmcontrol.ndmreset ? 1U : 0U); + result = set_field_pos(result, 0x1U, 2, dmcontrol.clrresethaltreq ? 1U : 0U); + result = set_field_pos(result, 0x1U, 3, dmcontrol.setresethaltreq ? 1U : 0U); + result = set_field_pos(result, 0x1U, 16, dmcontrol.hartreset ? 1U : 0U); + result = set_field_pos(result, 0x1U, 28, dmcontrol.ackhavereset ? 1U : 0U); + result = set_field_pos(result, 0x1U, 30, dmcontrol.resumereq ? 1U : 0U); + result = set_field_pos(result, 0x1U, 31, dmcontrol.haltreq ? 1U : 0U); + + + result = set_field_pos(result, 0x3ffU << 6, 6, dmcontrol.hartsel); + result = set_field_pos(result, 0x1U, 26, dmcontrol.hasel ? 1U : 0U); + + + result |= 1; + + return result; +} + +// Reads the dmstatus register, encoding all status fields into a 32-bit value per RISC-V Debug Spec 0.13.2. +// Use case: Returns the current debug module status (version, authenticated, halted/running state, etc.). +// Always ensures authenticated bit (bit 7) is set - critical for OpenOCD compatibility. +uint32_t DebugModule::read_dmstatus() +{ + + dmstatus.authenticated = true; + dmstatus.authbusy = false; + + uint32_t result = 0; + + + + result |= (dmstatus.version & 0xf); + + + if (dmstatus.confstrptrvalid) result |= (1U << 4); + + + if (dmstatus.hasresethaltreq) result |= (1U << 5); + + + + + + result |= (1U << 7); + + + if (dmstatus.anyhalted) result |= (1U << 8); + + + if (dmstatus.allhalted) result |= (1U << 9); + + + if (dmstatus.anyrunning) result |= (1U << 10); + + + if (dmstatus.allrunning) result |= (1U << 11); + + + if (dmstatus.anyunavail) result |= (1U << 12); + + + if (dmstatus.allunavail) result |= (1U << 13); + + + if (dmstatus.anynonexistent) result |= (1U << 14); + + + if (dmstatus.allnonexistent) result |= (1U << 15); + + + if (dmstatus.anyresumeack) result |= (1U << 16); + + + if (dmstatus.allresumeack) result |= (1U << 17); + + + if (dmstatus.anyhavereset) result |= (1U << 18); + + + if (dmstatus.allhavereset) result |= (1U << 19); + + + + + if (dmstatus.impebreak) result |= (1U << 22); + + + + + if ((result & (1U << 7)) == 0) { + dm_log("[DM] ERROR: authenticated bit (bit 7) not set! result=0x%x\n", result); + result |= (1U << 7); + } + + return result; +} + +// Reads the abstractcs register, encoding abstract command status fields. +// Use case: Returns abstract command status (datacount, progbufsize, busy flag, cmderr). +uint32_t DebugModule::read_abstractcs() +{ + uint32_t result = 0; + result = set_field_pos(result, 0x1fU << 8, 8, abstractcs.datacount); + result = set_field_pos(result, 0xffU << 16, 16, abstractcs.progbufsize); + result = set_field_pos(result, 0x1U, 28, abstractcs.busy ? 1U : 0U); + result = set_field_pos(result, 0x7U << 8, 8, abstractcs.cmderr); + result = set_field_pos(result, 0xfU << 24, 24, 1); + return result; +} + +// Reads the DATA0 register value (used for abstract command data transfer). +// Use case: Returns the value stored in DATA0, typically used to read register/memory values. +uint32_t DebugModule::read_data0() +{ + uint32_t value = dmdata[0]; + dm_log("[DM] DATA0 read: 0x%08x\n", value); + return value; +} + +// Reads the authdata register (authentication not implemented in stub). +// Use case: Returns 0 since authentication protocol is not implemented. +uint32_t DebugModule::read_authdata() +{ + return 0; +} + +// Writes the dmcontrol register, updating control fields and processing requests (halt/resume). +// Use case: Called to control the debug module (halt/resume hart, select hart, reset, etc.). +// Processes haltreq and resumereq immediately, and resets module if dmactive transitions from 0 to 1. +bool DebugModule::write_dmcontrol(uint32_t value) +{ + // If setting dmactive from 0 to 1, reset the module + if (!dmcontrol.dmactive && (value & 1)) { + reset(); + } + + + dmcontrol.dmactive = (value & 0x1) != 0; + dmcontrol.ndmreset = (value & (0x1 << 1)) != 0; + dmcontrol.clrresethaltreq = (value & (0x1 << 2)) != 0; + dmcontrol.setresethaltreq = (value & (0x1 << 3)) != 0; + dmcontrol.hartreset = (value & (0x1 << 16)) != 0; + dmcontrol.ackhavereset = (value & (0x1 << 28)) != 0; + dmcontrol.resumereq = (value & (0x1 << 30)) != 0; + dmcontrol.haltreq = (value & (0x1 << 31)) != 0; + + // Extract hartsel (hart selection) and hasel (hart array select) fields + dmcontrol.hartsel = (value >> 6) & 0x3ff; + dmcontrol.hasel = (value & (0x1 << 26)) != 0; + + // Use lower 5 bits of hartsel to select thread within warp (0-31) + // Note: We always use thread 0 from emulator's warp 0 + unsigned thread_id = dmcontrol.hartsel & 0x1F; + if (thread_id < 32) { + dm_log("[DM] Thread selection: hartsel=0x%x, selected thread=%u (using thread 0)\n", dmcontrol.hartsel, thread_id); + } else { + dm_log("[DM] Invalid thread selection: hartsel=0x%x, thread_id=%u (max 31)\n", dmcontrol.hartsel, thread_id); + } + + // Always keep dmactive set for stub (always active) + dmcontrol.dmactive = true; + + // Handle halt request immediately (cause = 3 per spec) + if (dmcontrol.haltreq) { + halt_hart(3); + dmcontrol.haltreq = false; + } + + if (dmcontrol.resumereq && !resumereq_prev) { + resumeack_ = false; + resume_hart(false); + } + + resumereq_prev = dmcontrol.resumereq; + + if (dmcontrol.ackhavereset) { + havereset_ = false; + } + + update_dmstatus(); + return true; +} + +// Writes the abstract command register and executes the command if not busy. +// Use case: Called to execute abstract commands (e.g., access register, quick access). +// Returns false if busy, otherwise executes command and returns true. +bool DebugModule::write_command(uint32_t value) +{ + command = value; + dm_log("[DM] COMMAND written: 0x%08x\n", value); + + + // Execute command immediately if not busy (stub implementation) + if (!abstractcs.busy) { + return perform_abstract_command(); + } else { + abstractcs.cmderr = 1; // BUSY error + dm_log("[DM] COMMAND error: BUSY (cmderr=1)\n"); + return false; + } +} + +// Writes the DATA0 register value (used for abstract command data transfer). +// Use case: Called to set data for abstract commands (e.g., register value to write). +bool DebugModule::write_data0(uint32_t value) +{ + dmdata[0] = value; + dm_log("[DM] DATA0 written: 0x%08x\n", value); + return true; +} + +// Writes the authdata register (authentication not implemented in stub). +// Use case: Accepts any authdata write and marks as authenticated (stub always authenticates). +bool DebugModule::write_authdata(uint32_t) +{ + dmstatus.authenticated = true; + dmstatus.authbusy = false; + return true; +} + +// Performs the abstract command stored in the command register. +// Use case: Executes abstract commands (supports Access Register cmdtype=0 and Access Memory cmdtype=0x02). +// Returns false if busy or unsupported command type, sets cmderr accordingly. +bool DebugModule::perform_abstract_command() +{ + if (abstractcs.busy) { + abstractcs.cmderr = 1; + dm_log("[DM] COMMAND error: BUSY (cmderr=1)\n"); + return false; + } + + + unsigned cmdtype = (command >> 24) & 0xff; + + if (cmdtype == 0 || cmdtype == 0x02) { + // Access Register (cmdtype=0) or Access Memory (cmdtype=0x02) + abstractcs.busy = true; + execute_command(command); + abstractcs.busy = false; + return true; + } else { + abstractcs.cmderr = 2; + dm_log("[DM] COMMAND error: NOTSUP (cmderr=2), cmdtype=0x%02x\n", cmdtype); + return false; + } +} + +// Executes an abstract command (supports Access Register and Access Memory commands). +// Use case: Processes abstract commands to read/write hart registers or memory, optionally with postexec step. +// Command format: [cmdtype][aarsize/aamsize][postexec][transfer][write][regaddr/aamaddress] +void DebugModule::execute_command(uint32_t value) +{ + uint8_t cmdtype = (value >> 24) & 0xFF; + if (cmdtype == 0) { + // Access Register command + uint8_t aarsize = (value >> 20) & 0x7; + bool postexec = value & (1 << 18); + bool transfer = value & (1 << 17); + bool write = value & (1 << 16); + uint16_t regaddr = value & 0xFFFF; + + dm_log("[DM] EXECUTE COMMAND: Access Register, regaddr=0x%04x, write=%d, transfer=%d, postexec=%d, aarsize=%d\n", + regaddr, write ? 1 : 0, transfer ? 1 : 0, postexec ? 1 : 0, aarsize); + + if (transfer) { + if (write) { + // For 64-bit systems, combine data0 (low) and data1 (high) if available + vortex::Word val; + if (XLEN == 64 && abstractcs.datacount >= 2) { + val = static_cast(data0()) | (static_cast(data1) << 32); + } else { + val = static_cast(data0()); + } + write_register(regaddr, val); + } else { + vortex::Word val = read_register(regaddr); + // For 64-bit systems, split into data0 (low) and data1 (high) if available + if (XLEN == 64 && abstractcs.datacount >= 2) { + data0() = static_cast(val); + data1 = static_cast(val >> 32); + } else { + data0() = static_cast(val); + } + } + } + + if (postexec) { + // Get PC from emulator + vortex::Word pc = 0; + if (emulator_ != nullptr) { + auto& warp0 = emulator_->get_warp(0); + pc = warp0.PC; + } + + // Check for software breakpoint: if instruction at PC is EBREAK, halt + vortex::Word instruction = read_mem(pc, sizeof(uint32_t)); // Read 4-byte instruction + if ((instruction & 0xFFFFFFFF) == 0x00100073) { + // EBREAK instruction - software breakpoint + dm_log("[DM] Software breakpoint hit at 0x%0*llx (EBREAK), halting hart\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)pc); + halt_hart(1); // Cause 1 = ebreak instruction + return; // Don't execute the instruction + } + + // Note: Step is handled by emulator, not here + dm_log("[DM] STEP: PC=0x%08x (step handled by emulator)\n", pc); + } + } else if (cmdtype == 0x02) { + // Access Memory command (per updated RISC-V Debug Spec) + // Fields: + // [31:24] cmdtype (0x02) + // [23] aamvirtual + // [22:20] aamsize (0=8-bit, 1=16-bit, 2=32-bit, 3=64-bit) + // [19] aampostincrement + // [18:17] 0 + // [16] write (1=write, 0=read) + // [15:14] target-specific-info + // [13:0] 0 + + uint8_t aamvirtual = (value >> 23) & 0x1; // currently unused + uint8_t aamsize = (value >> 20) & 0x7; + bool aampostincrement = ((value >> 19) & 0x1) != 0; + bool write = ((value >> 16) & 0x1) != 0; + (void)aamvirtual; // suppress unused warning for now + + size_t access_size = (aamsize == 0) ? 1 : + (aamsize == 1) ? 2 : + (aamsize == 2) ? 4 : 8; + + // Decide base address and remember where it came from so we can apply postincrement correctly. + enum AddrSource { + ADDR_NONE, + ADDR_DATA2, + ADDR_DATA1, + ADDR_DATA0, + ADDR_PREV + } addr_src = ADDR_NONE; + + vortex::Word mem_addr = 0; + + // If this looks like a continuation of a postincrement sequence (no explicit address + // in DATA[0-3]), reuse the last address. + if (aampostincrement && + access_mem_addr_valid && + data3 == 0 && data2 == 0 && data1 == 0 && data0() == 0) { + mem_addr = access_mem_addr; + addr_src = ADDR_PREV; + } else if (data3 != 0 || data2 != 0) { + // For 64-bit addresses, combine DATA3 (high) and DATA2 (low) + if (XLEN == 64 && data3 != 0) { + mem_addr = (static_cast(data3) << 32) | static_cast(data2); + addr_src = ADDR_DATA2; + dm_log("[DM] Access Memory: Combining DATA3 (0x%08x) and DATA2 (0x%08x) -> addr=0x%016llx\n", + data3, data2, (unsigned long long)mem_addr); + } else { + mem_addr = static_cast(data2); + addr_src = ADDR_DATA2; + } + } else if (data1 != 0 || data0() != 0) { + // For 64-bit addresses, combine DATA1 (high) and DATA0 (low) + if (XLEN == 64 && data1 != 0) { + mem_addr = (static_cast(data1) << 32) | static_cast(data0()); + addr_src = ADDR_DATA0; + dm_log("[DM] Access Memory: Combining DATA1 (0x%08x) and DATA0 (0x%08x) -> addr=0x%016llx\n", + data1, data0(), (unsigned long long)mem_addr); + } else if (data1 != 0) { + mem_addr = static_cast(data1); + addr_src = ADDR_DATA1; + } else { + mem_addr = static_cast(data0()); + addr_src = ADDR_DATA0; + } + } else if (access_mem_addr_valid) { + // Fallback to previous address if we have one. + mem_addr = access_mem_addr; + addr_src = ADDR_PREV; + } else { + mem_addr = 0; + addr_src = ADDR_NONE; + } + + dm_log("[DM] EXECUTE COMMAND: Access Memory, addr=0x%0*llx, write=%d, aamsize=%u, postinc=%d\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)mem_addr, + write ? 1 : 0, aamsize, aampostincrement ? 1 : 0); + + // Always perform one memory access per command. + if (write) { + // Write memory: For 64-bit, combine data0 (low) and data1 (high) if available + vortex::Word write_data; + if (access_size == 8 && XLEN == 64 && abstractcs.datacount >= 2) { + write_data = static_cast(data0()) | (static_cast(data1) << 32); + } else { + write_data = static_cast(data0()); + } + + dm_log("[DM] Access Memory WRITE: addr=0x%llx, data=0x%0*llx, size=%zu\n", + (unsigned long long)mem_addr, (access_size == 8) ? 16 : 8, + (unsigned long long)write_data, access_size); + + if (access_size == 1) { + vortex::Word old_val = read_mem(mem_addr, access_size); + write_mem(mem_addr, (old_val & ~0xFF) | (write_data & 0xFF), access_size); + } else if (access_size == 2) { + // Detect compressed EBREAK (0x9002) being written - save original instruction + if ((write_data & 0xFFFF) == 0x9002 && !has_breakpoint(mem_addr)) { + add_breakpoint(mem_addr); + } + vortex::Word old_val = read_mem(mem_addr, access_size); + write_mem(mem_addr, (old_val & ~0xFFFF) | (write_data & 0xFFFF), access_size); + } else if (access_size == 4) { + // Detect EBREAK instruction (32-bit: 0x00100073 or compressed: 0x00009002) being written + bool is_ebreak = (write_data == 0x00100073) || ((write_data & 0xFFFF) == 0x9002); + if (is_ebreak && !has_breakpoint(mem_addr)) { + add_breakpoint(mem_addr); + } + write_mem(mem_addr, write_data, access_size); + } else if (access_size == 8) { + // 64-bit write + write_mem(mem_addr, write_data, access_size); + } else { + dm_log("[DM] Access Memory: unsupported write size %zu\n", access_size); + } + } else { + // Read memory: result goes into DATA0 (and DATA1 for 64-bit if available) + vortex::Word read_val = read_mem(mem_addr, access_size); + + if (access_size == 1) { + data0() = static_cast(read_val & 0xFF); + } else if (access_size == 2) { + data0() = static_cast(read_val & 0xFFFF); + } else if (access_size == 4) { + data0() = static_cast(read_val); + } else if (access_size == 8) { + // 64-bit read: split into data0 (low) and data1 (high) if available + if (XLEN == 64 && abstractcs.datacount >= 2) { + data0() = static_cast(read_val); + data1 = static_cast(read_val >> 32); + } else { + // Fallback: only return low 32 bits + data0() = static_cast(read_val); + } + } else { + dm_log("[DM] Access Memory: unsupported read size %zu\n", access_size); + data0() = 0; + } + } + + // Implement aampostincrement: advance the address and write it back to the same source. + vortex::Word new_addr = mem_addr; + if (aampostincrement) { + new_addr = mem_addr + access_size; + switch (addr_src) { + case ADDR_DATA2: + // If address came from DATA3+DATA2 (64-bit), write back both parts + if (XLEN == 64 && data3 != 0) { + data3 = static_cast(new_addr >> 32); + data2 = static_cast(new_addr); + } else { + data2 = static_cast(new_addr); + } + break; + case ADDR_DATA1: + data1 = static_cast(new_addr); + break; + case ADDR_DATA0: + // If address came from DATA1+DATA0 (64-bit), write back both parts + if (XLEN == 64 && data1 != 0) { + data1 = static_cast(new_addr >> 32); + data0() = static_cast(new_addr); + } else { + data0() = static_cast(new_addr); + } + break; + case ADDR_PREV: + // When address came from a previous implicit address sequence (access_mem_addr), + // DO NOT write back to data registers - only update internal state. + // This prevents overwriting data registers that OpenOCD might use for other purposes. + // The incremented address is stored in access_mem_addr for the next postincrement operation. + dm_log("[DM] Access Memory postincrement: address from access_mem_addr, NOT writing back to data registers (prev=0x%0*llx, new=0x%0*llx)\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)mem_addr, + (XLEN == 64) ? 16 : 8, (unsigned long long)new_addr); + break; + case ADDR_NONE: + default: + break; + } + access_mem_addr = new_addr; + access_mem_addr_valid = true; + } else { + access_mem_addr = mem_addr; + access_mem_addr_valid = true; + } + } else { + abstractcs.cmderr = 2; // NOTSUP + dm_log("[DM] COMMAND error: NOTSUP (cmderr=2), cmdtype=0x%02x\n", cmdtype); + } +} + +// Reads a hart register by abstract register address (used by access register commands). +// Use case: Called during abstract command execution to read GPRs, PC, DCSR, DPC, or CSRs. +// Register address mapping: 0x1000-0x101F (GPRs), 0x1020 (PC), 0x7B0 (DCSR), 0x7B1 (DPC), 0x0000-0x0FFF/0xC000-0xFFFF (CSRs). +vortex::Word DebugModule::read_register(uint16_t regaddr) +{ + // General purpose registers (x0–x31) at addresses 0x1000–0x101F + if (regaddr >= 0x1000 && regaddr <= 0x101F) { + int gpr_index = regaddr - 0x1000; + vortex::Word value; + if (emulator_ != nullptr) { + // Use emulator's warp 0, thread 0 register + auto& warp0 = emulator_->get_warp(0); + value = warp0.ireg_file.at(gpr_index).at(0); // Direct assignment, no cast needed + } else { + // No emulator available + value = 0; + } + dm_log("[DM] READ REG x%d (0x%04x) -> 0x%0*llx\n", gpr_index, regaddr, + (XLEN == 64) ? 16 : 8, (unsigned long long)value); + return value; + } + + if (regaddr == 0x1020) { + vortex::Word value; + if (emulator_ != nullptr) { + // Use emulator's warp 0 PC + auto& warp0 = emulator_->get_warp(0); + value = warp0.PC; // PC is already Word type + } else { + // No emulator available + value = 0; + } + dm_log("[DM] READ REG pc (0x1020) -> 0x%0*llx\n", (XLEN == 64) ? 16 : 8, (unsigned long long)value); + return value; + } + + if (regaddr == 0x07b0 || regaddr == 0x7B0) { + vortex::Word value = dcsr_.to_u32(); // DCSR is always 32-bit + dm_log("[DM] READ REG dcsr (0x7B0) -> 0x%08x\n", (uint32_t)value); + return value; + } + + if (regaddr == 0x07b1 || regaddr == 0x7B1) { + vortex::Word value = dpc_; + dm_log("[DM] READ REG dpc (0x7B1) -> 0x%0*llx\n", (XLEN == 64) ? 16 : 8, (unsigned long long)value); + return value; + } + + // Helper function to read CSR by number + // Note: CSRs are always 32-bit per RISC-V spec, but we return Word for consistency + auto read_csr = [this](uint16_t csr_num, uint16_t regaddr) -> vortex::Word { + if (csr_num == 0x0301) { + // Calculate MISA based on configured extensions + // MXL field (bits 31:30): 1=RV32, 2=RV64, 3=RV128 + uint32_t mxl = (vortex::log2floor(XLEN) - 4); + uint32_t value = (mxl << 30) | MISA_STD; + dm_log("[DM] READ REG misa (0x%03x via 0x%04x) -> 0x%08x (RV%d, MXL=%d, MISA_STD=0x%08x)\n", + csr_num, regaddr, value, XLEN, mxl, MISA_STD); + return value; + } + + if (csr_num == 0x0c22) { + uint32_t value = 0; + dm_log("[DM] READ REG vlenb (0x%03x via 0x%04x) -> 0x%08x (no vector support)\n", + csr_num, regaddr, value); + return value; + } + + dm_log("[DM] READ REG csr[0x%03x] (0x%04x) -> 0x00000000\n", csr_num, regaddr); + return 0; + }; + + // Direct CSR access: 0x0000-0x0FFF (CSR number = regaddr) + if (regaddr >= 0x0000 && regaddr <= 0x0FFF) { + return read_csr(static_cast(regaddr), regaddr); + } + + dm_log("[DM] READ REG unknown regaddr=0x%04x -> 0x00000000\n", regaddr); + return vortex::Word(0); +} + +void DebugModule::write_register(uint16_t regaddr, vortex::Word val) +{ + if (regaddr >= 0x1000 && regaddr <= 0x101F) { + int gpr_index = regaddr - 0x1000; + if (gpr_index == 0) { + dm_log("[DM] WRITE REG x0 (0x%04x) <- 0x%0*llx (ignored, x0 is read-only)\n", + regaddr, (XLEN == 64) ? 16 : 8, (unsigned long long)val); + return; + } + if (emulator_ != nullptr) { + auto& warp0 = emulator_->get_warp(0); + warp0.ireg_file.at(gpr_index).at(0) = val; // Direct assignment + } + dm_log("[DM] WRITE REG x%d (0x%04x) <- 0x%0*llx\n", + gpr_index, regaddr, (XLEN == 64) ? 16 : 8, (unsigned long long)val); + return; + } + + if (regaddr == 0x1020) { + if (emulator_ != nullptr) { + auto& warp0 = emulator_->get_warp(0); + warp0.PC = val; // Direct assignment, PC is Word type + } + dm_log("[DM] WRITE REG pc (0x1020) <- 0x%0*llx\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)val); + return; + } + + if (regaddr == 0x07b0 || regaddr == 0x7B0) { + dcsr_.from_u32(static_cast(val)); // DCSR is always 32-bit + dm_log("[DM] WRITE REG dcsr (0x7B0) <- 0x%08x\n", (uint32_t)val); + return; + } + + if (regaddr == 0x07b1 || regaddr == 0x7B1) { + dpc_ = val; + dm_log("[DM] WRITE REG dpc (0x7B1) <- 0x%0*llx\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)val); + return; + } + + if (regaddr >= 0xC000 && regaddr <= 0xFFFF) { + dm_log("[DM] WRITE REG csr[0x%04x] (0x%04x) <- 0x%0*llx (ignored)\n", + regaddr - 0xC000, regaddr, (XLEN == 64) ? 16 : 8, (unsigned long long)val); + return; + } + + dm_log("[DM] WRITE REG unknown regaddr=0x%04x <- 0x%0*llx (ignored)\n", + regaddr, (XLEN == 64) ? 16 : 8, (unsigned long long)val); +} + +vortex::Word DebugModule::read_mem(vortex::Word addr, size_t size) +{ + vortex::Word val = read_program_memory(addr, size); + dm_log("[DM] READ MEM addr=0x%0*llx -> 0x%0*llx (size=%zu)\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)addr, + (XLEN == 64) ? 16 : 8, (unsigned long long)val, size); + return val; +} + +void DebugModule::write_mem(vortex::Word addr, vortex::Word val, size_t size) +{ + write_program_memory(addr, val, size); + dm_log("[DM] WRITE MEM addr=0x%0*llx <- 0x%0*llx (size=%zu)\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)addr, + (XLEN == 64) ? 16 : 8, (unsigned long long)val, size); +} + +vortex::Word DebugModule::read_program_memory(vortex::Word addr, size_t size) const +{ + if (!emulator_) { + return 0; + } + // Read the specified number of bytes + uint8_t buffer[8] = {0}; // Max 8 bytes for 64-bit access + emulator_->dcache_read(buffer, static_cast(addr), size); + + // Convert to Word based on size + vortex::Word value = 0; + for (size_t i = 0; i < size && i < sizeof(vortex::Word); ++i) { + value |= static_cast(buffer[i]) << (i * 8); + } + return value; +} + +void DebugModule::write_program_memory(vortex::Word addr, vortex::Word value, size_t size) +{ + if (!emulator_) { + return; + } + + // Write the specified number of bytes + uint8_t buffer[8]; // Max 8 bytes for 64-bit access + for (size_t i = 0; i < size && i < sizeof(vortex::Word); ++i) { + buffer[i] = static_cast((value >> (i * 8)) & 0xFF); + } + emulator_->dcache_write(buffer, static_cast(addr), size); +} + +vortex::Word DebugModule::direct_read_register(uint16_t regaddr) +{ + return read_register(regaddr); +} + +void DebugModule::direct_write_register(uint16_t regaddr, vortex::Word value) +{ + write_register(regaddr, value); +} + +bool DebugModule::read_memory_block(uint64_t addr, uint8_t* dest, size_t len) const +{ + if (addr + len > memory.size()) { + return false; + } + std::memcpy(dest, memory.data() + addr, len); + return true; +} + +bool DebugModule::write_memory_block(uint64_t addr, const uint8_t* src, size_t len) +{ + if (addr + len > memory.size()) { + return false; + } + std::memcpy(memory.data() + addr, src, len); + return true; +} + + +// Halts the hart (CPU core) and enters debug mode with the specified cause. +// Use case: Called when debugger requests a halt or a breakpoint is hit. +// Cause values: 0=reserved, 1=ebreak, 2=trigger, 3=haltreq, 4=step, 5=resume after step, etc. +void DebugModule::halt_hart(uint8_t cause) +{ + dm_log("[DM] Halt requested - hart halted (cause=%u)\n", cause); + // Enter debug mode: update DCSR (DPC will be updated by emulator when it actually halts) + dcsr_.cause = cause & 0xF; + is_halted_ = true; + // Set halt flag so emulator will stop execution and update DPC + set_halt_requested(true); + update_dmstatus(); + // Log DCSR value after setting cause to verify encoding + uint32_t dcsr_val = dcsr_.to_u32(); + uint8_t cause_field = (dcsr_val >> 8) & 0xF; + dm_log("[DM] DCSR after halt: 0x%08x, cause field: 0x%x (should be 0x%x)\n", dcsr_val, cause_field, cause); +} + +// Resumes the hart execution, optionally in single-step mode. +// Use case: Called when debugger requests resume or step execution. +// If single_step is true or hart is in step mode, executes one instruction then halts again. +void DebugModule::resume_hart(bool single_step) +{ + dm_log("[DM] Resume requested (single_step=%d)\n", single_step ? 1 : 0); + is_halted_ = false; + + // Log current program state before resuming + if (emulator_ != nullptr) { + auto& warp0 = emulator_->get_warp(0); + vortex::Word current_pc = warp0.PC; + vortex::Word dpc = dpc_; + dm_log("[DM] Resume state: PC=0x%0*llx, DPC=0x%0*llx, halt_requested=%d\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)current_pc, + (XLEN == 64) ? 16 : 8, (unsigned long long)dpc, + halt_requested_ ? 1 : 0); + } + + bool do_step = single_step || dcsr_.step; + if (do_step) { + // Set single-step flag so emulator will execute one instruction then halt + set_single_step_active(true); + set_halt_requested(false); // Clear halt to allow execution + // No need to resume - just clearing halt_requested_ is enough + dm_log("[DM] Single-step mode: halt_requested cleared, will execute one instruction\n"); + resumeack_ = true; + } else { + // Clear halt flag to allow continuous execution + set_halt_requested(false); + set_single_step_active(false); + // No need to resume - just clearing halt_requested_ is enough + dm_log("[DM] Continuous execution resumed: halt_requested=%d, single_step_active=%d\n", + halt_requested_ ? 1 : 0, single_step_active_ ? 1 : 0); + resumeack_ = true; + } + update_dmstatus(); +} + +bool DebugModule::hart_is_halted() const +{ + return is_halted_ || halt_requested_; +} + +bool DebugModule::is_halt_requested() const +{ + return halt_requested_; +} + +bool DebugModule::is_single_step_active() const +{ + return single_step_active_; +} + +bool DebugModule::is_debug_mode_enabled() const +{ + return debug_mode_enabled_; +} + +void DebugModule::set_halt_requested(bool halt) +{ + halt_requested_ = halt; +} + +void DebugModule::set_single_step_active(bool step) +{ + single_step_active_ = step; +} + +void DebugModule::set_debug_mode_enabled(bool enabled) +{ + debug_mode_enabled_ = enabled; +} + +bool DebugModule::has_breakpoint(uint32_t addr) const +{ + return software_breakpoints_.find(addr) != software_breakpoints_.end(); +} + +void DebugModule::add_breakpoint(uint32_t addr) +{ + if (has_breakpoint(addr)) { + return; // Already has breakpoint + } + // Read and store the original instruction (should be called before EBREAK is written) + uint32_t original = static_cast(read_program_memory(addr, sizeof(uint32_t))); // Read 4-byte instruction + software_breakpoints_[addr] = original; +} + +void DebugModule::remove_breakpoint(uint32_t addr) +{ + auto it = software_breakpoints_.find(addr); + if (it == software_breakpoints_.end()) { + return; // No breakpoint at this address + } + // Restore the original instruction + write_program_memory(addr, it->second, sizeof(uint32_t)); // Write 4-byte instruction + software_breakpoints_.erase(it); +} + +// Notification from emulator when program completes naturally +void DebugModule::notify_program_completed(vortex::Word final_pc) +{ + // Only process if we weren't already explicitly halted + if (!is_halted_ && !halt_requested_) { + dm_log("[DM] Program completed naturally at PC=0x%0*llx, halting hart\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)final_pc); + + // Update DPC to final PC + direct_write_register(0x7B1, final_pc); + + // Mark as halted (cause 0 = reserved, but we use it for natural completion) + is_halted_ = true; + set_halt_requested(true); + dcsr_.cause = 0; // Natural completion + } +} + + +void DebugModule::run_test_idle() +{ + static uint64_t log_counter = 0; + if (!is_halted_ && !halt_requested_) { + if ((log_counter++ % 1000) == 0 && emulator_ != nullptr) { + auto& warp0 = emulator_->get_warp(0); + vortex::Word pc = warp0.PC; + dm_log("[DM] run_test_idle: hart running, PC=0x%0*llx\n", + (XLEN == 64) ? 16 : 8, (unsigned long long)pc); + } + } else { + // Only log occasionally when halted too + if ((log_counter++ % 1000) == 0) { + dm_log("[DM] run_test_idle: hart is halted, nothing to do\n"); + } + } +} + diff --git a/sim/simx/debug_module.h b/sim/simx/debug_module.h new file mode 100644 index 0000000000..d7ca5bd1d2 --- /dev/null +++ b/sim/simx/debug_module.h @@ -0,0 +1,308 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include "types.h" + +namespace vortex { + class Emulator; +} + + +#define DM_DATA0 0x04 +#define DM_DMCONTROL 0x10 +#define DM_DMSTATUS 0x11 +#define DM_HARTINFO 0x12 +#define DM_ABSTRACTCS 0x16 +#define DM_COMMAND 0x17 +#define DM_ABSTRACTAUTO 0x18 +#define DM_DMCONTROL2 0x1a +#define DM_AUTHDATA 0x30 +#define DM_SBCS 0x38 +#define DM_SBADDRESS0 0x39 +#define DM_SBDATA0 0x3c + + +template +static inline T set_field(T reg, T mask, T val) { + return (reg & ~mask) | (val & mask); +} + +template +static inline T get_field(T reg, T mask) { + return (reg & mask); +} + +template +static inline T set_field_pos(T reg, T mask, unsigned pos, unsigned val) { + return set_field(reg, mask, static_cast(val) << pos); +} + +template +static inline T get_field_pos(T reg, T mask, unsigned pos) { + return (reg & mask) >> pos; +} + + +struct dmcontrol_t { + bool dmactive; + bool ndmreset; + bool clrresethaltreq; + bool setresethaltreq; + bool hartreset; + bool ackhavereset; + bool resumereq; + bool haltreq; + unsigned hartsel; + bool hasel; + + dmcontrol_t() : dmactive(true), ndmreset(false), clrresethaltreq(false), + setresethaltreq(false), hartreset(false), ackhavereset(false), + resumereq(false), haltreq(false), hartsel(0), hasel(false) {} +}; + + +struct dmstatus_t { + unsigned version; + bool confstrptrvalid; + bool hasresethaltreq; + bool authbusy; + bool authenticated; + bool anyhalted; + bool allhalted; + bool anyrunning; + bool allrunning; + bool anyunavail; + bool allunavail; + bool anynonexistent; + bool allnonexistent; + bool anyresumeack; + bool allresumeack; + bool anyhavereset; + bool allhavereset; + + bool impebreak; + bool sr32; // hart supports 32-bit abstract register access + bool sr64; // hart supports 64-bit abstract register access + bool sr128; // hart supports 128-bit abstract register access + + + dmstatus_t() : version(2), confstrptrvalid(false), hasresethaltreq(false), + authbusy(false), authenticated(true), + anyhalted(false), allhalted(false), + anyrunning(false), allrunning(false), + anyunavail(false), allunavail(false), + anynonexistent(false), allnonexistent(false), + anyresumeack(false), allresumeack(true), + anyhavereset(false), allhavereset(false), + impebreak(false), + sr32(false), sr64(false), sr128(false) {} +}; + + +struct abstractcs_t { + unsigned datacount; + unsigned progbufsize; + bool busy; + unsigned cmderr; + + // datacount: number of data registers (1 for RV32, 2 for RV64) + // OpenOCD uses this to determine XLEN + abstractcs_t() : datacount((XLEN == 64) ? 2 : 1), progbufsize(0), busy(false), cmderr(0) {} +}; + +class DebugModule { +public: + // Constructor: Initializes the RISC-V Debug Module with a simulated memory space. + // Use case: Creates a debug module instance that implements the RISC-V Debug Specification 0.13. + DebugModule(vortex::Emulator* emulator = nullptr, size_t mem_size = 4096); + + // Reads a value from a DMI (Debug Module Interface) register by address. + // Use case: Called by JTAG DTM to read debug module registers (dmcontrol, dmstatus, abstractcs, etc.). + bool dmi_read(unsigned address, uint32_t *value); + + // Writes a value to a DMI (Debug Module Interface) register by address. + // Use case: Called by JTAG DTM to write debug module registers (dmcontrol, command, data0, etc.). + bool dmi_write(unsigned address, uint32_t value); + + + static void set_verbose_logging(bool enable); + static bool verbose_logging(); + + + vortex::Word direct_read_register(uint16_t regaddr); + void direct_write_register(uint16_t regaddr, vortex::Word value); + bool read_memory_block(uint64_t addr, uint8_t* dest, size_t len) const; + bool write_memory_block(uint64_t addr, const uint8_t* src, size_t len); + // Halts the warp (SIMD thread group) and enters debug mode with the specified cause. + // Use case: Called when debugger requests a halt or a breakpoint is hit. + void halt_hart(uint8_t cause); + + // Resumes the warp execution, optionally in single-step mode. + // Use case: Called when debugger requests resume or step execution. + void resume_hart(bool single_step); + + // Returns true if the warp is currently halted. + // Use case: Used to check warp state for status reporting. + bool hart_is_halted() const; + + // Called periodically when JTAG is in Run-Test-Idle state. + // Use case: Allows the debug module to process state updates during idle periods. + void run_test_idle(); + + // Debug flag query methods (read-only) + bool is_halt_requested() const; + bool is_single_step_active() const; + bool is_debug_mode_enabled() const; + + // Debug flag control methods (set flags) + void set_halt_requested(bool halt); + void set_single_step_active(bool step); + void set_debug_mode_enabled(bool enabled); + + // Software breakpoint management + bool has_breakpoint(uint32_t addr) const; + void add_breakpoint(uint32_t addr); + void remove_breakpoint(uint32_t addr); + + // Notification from emulator when program completes + void notify_program_completed(vortex::Word final_pc); + +private: + + dmcontrol_t dmcontrol; + dmstatus_t dmstatus; + abstractcs_t abstractcs; + + vortex::Emulator* emulator_; + + // Debug state flags + bool halt_requested_; + bool single_step_active_; + bool debug_mode_enabled_; + + // Debug Control and Status Register (DCSR) + struct DCSR { + uint32_t prv : 2; + uint32_t step : 1; + uint32_t ebreakm : 1; + uint32_t ebreaks : 1; + uint32_t ebreaku : 1; + uint32_t stopcount : 1; + uint32_t stoptime : 1; + uint32_t cause : 4; + uint32_t mprven : 1; + uint32_t nmip : 1; + uint32_t reserved : 14; + uint32_t xdebugver : 4; + + DCSR() : prv(3), step(0), ebreakm(0), ebreaks(0), ebreaku(0), + stopcount(0), stoptime(0), cause(0), mprven(0), + nmip(0), reserved(0), xdebugver(4) {} + + uint32_t to_u32() const { + uint32_t value = 0; + value |= (prv & 0x3); + value |= (step & 0x1) << 2; + value |= (ebreakm & 0x1) << 3; + value |= (ebreaks & 0x1) << 4; + value |= (ebreaku & 0x1) << 5; + value |= (stopcount & 0x1) << 6; + value |= (stoptime & 0x1) << 7; + value |= (cause & 0xF) << 8; + value |= (mprven & 0x1) << 12; + value |= (nmip & 0x1) << 13; + value |= (xdebugver & 0xF) << 28; + return value; + } + + void from_u32(uint32_t value) { + prv = value & 0x3; + step = (value >> 2) & 0x1; + ebreakm = (value >> 3) & 0x1; + ebreaks = (value >> 4) & 0x1; + ebreaku = (value >> 5) & 0x1; + stopcount = (value >> 6) & 0x1; + stoptime = (value >> 7) & 0x1; + cause = (value >> 8) & 0xF; + mprven = (value >> 12) & 0x1; + nmip = (value >> 13) & 0x1; + xdebugver = 4; + reserved = 0; + } + } dcsr_; + + // Debug Program Counter (DPC) - PC value when entering debug mode + vortex::Word dpc_; + + // Debug state tracking + bool resumeack_; + bool havereset_; + bool is_halted_; + + // Software breakpoint storage: address -> original instruction + std::map software_breakpoints_; + + // datacount: number of data registers (1 for RV32, 2 for RV64) + // OpenOCD uses this to determine XLEN + static constexpr unsigned datacount = (XLEN == 64) ? 2 : 1; + uint32_t dmdata[datacount]; + uint32_t data1; // DATA1 register (address 0x5) + uint32_t data2; // DATA2 register (address 0x6) + uint32_t data3; // DATA3 register (address 0x7) + + uint32_t& data0() { return dmdata[0]; } + + + static constexpr unsigned progbufsize = 0; + + + uint32_t command; + + + bool resumereq_prev; + + + std::vector memory; + + // Temporary storage for Access Memory command address + // OpenOCD sets address in DATA0, then data, then executes command + vortex::Word access_mem_addr; + bool access_mem_addr_valid; + + + void reset(); + void update_dmstatus(); + + + bool perform_abstract_command(); + void execute_command(uint32_t cmd); + + + vortex::Word read_register(uint16_t regaddr); + void write_register(uint16_t regaddr, vortex::Word val); + + + vortex::Word read_mem(vortex::Word addr, size_t size = sizeof(vortex::Word)); + void write_mem(vortex::Word addr, vortex::Word val, size_t size = sizeof(vortex::Word)); + + // Program memory access (via emulator) + vortex::Word read_program_memory(vortex::Word addr, size_t size = sizeof(uint32_t)) const; + void write_program_memory(vortex::Word addr, vortex::Word value, size_t size = sizeof(uint32_t)); + + + uint32_t read_dmcontrol(); + uint32_t read_dmstatus(); + uint32_t read_abstractcs(); + uint32_t read_data0(); + uint32_t read_authdata(); + + bool write_dmcontrol(uint32_t value); + bool write_command(uint32_t value); + bool write_data0(uint32_t value); + bool write_authdata(uint32_t value); +}; diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 3eb62f9c76..3fe6257db2 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -27,6 +27,7 @@ #include "cluster.h" #include "processor_impl.h" #include "local_mem.h" +#include "debug_module.h" using namespace vortex; @@ -76,6 +77,7 @@ Emulator::Emulator(const Arch &arch, const DCRS &dcrs, Core* core) : arch_(arch) , dcrs_(dcrs) , core_(core) + , debug_module_(nullptr) , warps_(arch.num_warps(), arch.num_threads()) , barriers_(arch.num_barriers(), 0) , ipdom_size_(arch.num_threads()-1) @@ -126,6 +128,10 @@ void Emulator::reset() { active_warps_.set(0); warps_[0].tmask.set(0); wspawn_.valid = false; + + // Reset last inactive warp tracking + last_inactive_warp_id_ = 0; + last_inactive_warp_pc_ = 0; } void Emulator::attach_ram(RAM* ram) { @@ -150,6 +156,11 @@ uint32_t Emulator::fetch(uint32_t wid, uint64_t uuid) { } instr_trace_t* Emulator::step() { + // Check debug module flags first - if halted, don't schedule anything + if (debug_module_ != nullptr && debug_module_->is_halt_requested()) { + return nullptr; + } + int scheduled_warp = -1; // process pending wspawn @@ -175,8 +186,14 @@ instr_trace_t* Emulator::step() { } } - if (scheduled_warp == -1) + if (scheduled_warp == -1) { + // No warp is ready to execute - check if program has completed + if (debug_module_ != nullptr && !active_warps_.any()) { + vortex::Word final_pc = (last_inactive_warp_pc_ != 0) ? static_cast(last_inactive_warp_pc_) : warps_.at(0).PC; + debug_module_->notify_program_completed(final_pc); + } return nullptr; + } // get scheduled warp auto& warp = warps_.at(scheduled_warp); @@ -197,6 +214,21 @@ instr_trace_t* Emulator::step() { // Fetch auto instr_code = this->fetch(scheduled_warp, uuid); + // Check for software breakpoint (EBREAK instruction) + if (debug_module_ != nullptr) { + bool is_ebreak = (instr_code == 0x00100073) || ((instr_code & 0xFFFF) == 0x9002); + if (is_ebreak && debug_module_->has_breakpoint(warp.PC)) { + // Software breakpoint hit - update DPC with emulator's PC and halt + std::cout << "[EMU] Breakpoint hit at PC=0x" << std::hex << warp.PC << std::dec << std::endl; + vortex::Word emulator_pc = warp.PC; // PC is already Word type + debug_module_->direct_write_register(0x7B1, emulator_pc); // Update DPC register + debug_module_->halt_hart(1); // Cause 1 = ebreak instruction + debug_module_->set_halt_requested(true); + // No need to suspend - halt_requested_ check at start of step() will prevent execution + return nullptr; + } + } + // decode this->decode(instr_code, scheduled_warp, uuid); } else { @@ -212,6 +244,16 @@ instr_trace_t* Emulator::step() { // Execute auto trace = this->execute(*instr, scheduled_warp); + // Check for single-step mode - halt after executing one instruction + if (debug_module_ != nullptr && debug_module_->is_single_step_active()) { + // Update DPC with current PC (which points to next instruction after execution) + vortex::Word emulator_pc = warp.PC; // PC is already Word type + debug_module_->direct_write_register(0x7B1, emulator_pc); // Update DPC register + debug_module_->set_halt_requested(true); + debug_module_->set_single_step_active(false); + // No need to suspend - halt_requested_ check at start of step() will prevent execution + } + return trace; } @@ -223,6 +265,14 @@ int Emulator::get_exitcode() const { return warps_.at(0).ireg_file.at(3).at(0); } +void Emulator::set_debug_module(::DebugModule* dm) { + debug_module_ = dm; +} + +::DebugModule* Emulator::get_debug_module() const { + return debug_module_; +} + void Emulator::suspend(uint32_t wid) { assert(!stalled_warps_.test(wid)); stalled_warps_.set(wid); diff --git a/sim/simx/emulator.h b/sim/simx/emulator.h index 0fb1e0b74d..677b6b9239 100644 --- a/sim/simx/emulator.h +++ b/sim/simx/emulator.h @@ -27,6 +27,8 @@ #include "vec_unit.h" #endif +class DebugModule; // Forward declaration (global scope) + namespace vortex { class Arch; @@ -108,6 +110,15 @@ class Emulator { void dcache_write(const void* data, uint64_t addr, uint32_t size); + // Get warp by index (for debug module access) + warp_t& get_warp(uint32_t wid) { + return warps_.at(wid); + } + + // Debug module interface + void set_debug_module(::DebugModule* dm); + ::DebugModule* get_debug_module() const; + private: uint32_t fetch(uint32_t wid, uint64_t uuid); @@ -144,6 +155,7 @@ class Emulator { const Arch& arch_; const DCRS& dcrs_; Core* core_; + ::DebugModule* debug_module_; std::vector warps_; WarpMask active_warps_; @@ -155,6 +167,10 @@ class Emulator { Word csr_mscratch_; wspawn_t wspawn_; + // Track last warp to become inactive (for program completion detection) + uint32_t last_inactive_warp_id_ = 0; // ID of last warp that became inactive + uint32_t last_inactive_warp_pc_ = 0; // PC of last warp when it became inactive + #ifdef EXT_TCU_ENABLE TensorUnit::Ptr tensor_unit_; #endif diff --git a/sim/simx/execute.cpp b/sim/simx/execute.cpp index 38292605b5..1084363ff0 100644 --- a/sim/simx/execute.cpp +++ b/sim/simx/execute.cpp @@ -1540,6 +1540,8 @@ instr_trace_t* Emulator::execute(const Instr &instr, uint32_t wid) { DP(3, "*** New Tmask=" << next_tmask); warp.tmask = next_tmask; if (!next_tmask.any()) { + last_inactive_warp_id_ = wid; + last_inactive_warp_pc_ = static_cast(warp.PC); active_warps_.reset(wid); } } diff --git a/sim/simx/jtag_dtm.cpp b/sim/simx/jtag_dtm.cpp new file mode 100644 index 0000000000..8077de0f32 --- /dev/null +++ b/sim/simx/jtag_dtm.cpp @@ -0,0 +1,129 @@ +#include "jtag_dtm.h" +#include + +// Constructor: Initializes the JTAG Debug Transport Module (DTM) with a reference to the Debug Module. +// Use case: Creates a DTM instance that bridges JTAG protocol to RISC-V debug module operations. +jtag_dtm_t::jtag_dtm_t(DebugModule* dm) + : dm(dm), + _tck(0), _tms(0), _tdi(0), _tdo(0), + _state(TEST_LOGIC_RESET), + ir(IR_IDCODE), dr(0), dr_length(1), + abits(7), busy_stuck(false), dmi(0) {} + +// Resets the DTM to its initial state (TEST_LOGIC_RESET). +// Use case: Called when JTAG reset is detected or when the debugger needs to reinitialize the DTM. +void jtag_dtm_t::reset() { + _state = TEST_LOGIC_RESET; + ir = IR_IDCODE; + busy_stuck = false; + dmi = 0; +} + +// Updates JTAG pin states and advances the TAP state machine based on TCK/TMS/TDI transitions. +// Use case: Called for each JTAG clock cycle to simulate the TAP controller state machine. +// The state transition table implements the standard JTAG TAP state machine where each state has +// two possible next states based on the TMS value (0 or 1). +void jtag_dtm_t::set_pins(bool tck, bool tms, bool tdi) { + static const jtag_state_t next[16][2] = { + {RUN_TEST_IDLE, TEST_LOGIC_RESET}, + {RUN_TEST_IDLE, SELECT_DR_SCAN}, + {CAPTURE_DR, SELECT_IR_SCAN}, + {SHIFT_DR, EXIT1_DR}, + {SHIFT_DR, EXIT1_DR}, + {PAUSE_DR, UPDATE_DR}, + {PAUSE_DR, EXIT2_DR}, + {SHIFT_DR, UPDATE_DR}, + {RUN_TEST_IDLE, SELECT_DR_SCAN}, + {CAPTURE_IR, TEST_LOGIC_RESET}, + {SHIFT_IR, EXIT1_IR}, + {SHIFT_IR, EXIT1_IR}, + {PAUSE_IR, UPDATE_IR}, + {PAUSE_IR, EXIT2_IR}, + {SHIFT_IR, UPDATE_IR}, + {RUN_TEST_IDLE, SELECT_DR_SCAN} + }; + + // Rising edge of TCK: sample TDI and shift data/instruction registers + if (!_tck && tck) { + switch (_state) { + case SHIFT_DR: dr >>= 1; dr |= (uint64_t)_tdi << (dr_length - 1); break; + case SHIFT_IR: ir >>= 1; ir |= _tdi << 4; break; + default: break; + } + _state = next[_state][_tms]; + } + // Falling edge of TCK: update TDO and trigger register operations + else if (_tck && !tck) { + switch (_state) { + case CAPTURE_DR: capture_dr(); break; + case UPDATE_DR: update_dr(); break; + case SHIFT_DR: _tdo = dr & 1; break; + case SHIFT_IR: _tdo = ir & 1; break; + default: break; + } + } + + _tck = tck; + _tms = tms; + _tdi = tdi; +} + +// Captures data from the selected register into the DR shift register based on current IR instruction. +// Use case: Called during CAPTURE_DR state to prepare data for shifting out via TDO. +// The captured data depends on the instruction register (IR) value: +// - IR_IDCODE: Returns a dummy ID code +// - IR_DTMCONTROL: Returns DTM control register with version, address bits, and status +// - IR_DBUS: Returns the result of the previous DMI operation (read data or write status) +// - IR_BYPASS: Returns 0 for bypass mode +void jtag_dtm_t::capture_dr() { + switch (ir) { + case IR_IDCODE: dr = 0xdeadbeef; dr_length = 32; break; + case IR_DTMCONTROL: { + uint32_t dmistat = busy_stuck ? 1 : 0; + dr = (dmistat << 18) | (abits << 4) | 1; + dr_length = 32; + break; + } + case IR_DBUS: + dr = dmi; + dr_length = abits + 34; + break; + case IR_BYPASS: dr = 0; dr_length = 1; break; + default: dr = 0; dr_length = 1; break; + } +} + +// Updates the selected register with data from the DR shift register after shifting is complete. +// Use case: Called during UPDATE_DR state to execute DMI read/write operations based on shifted data. +// For IR_DBUS, the DR contains: [addr][data][op] where op=1 (read), op=2 (write), op=0 (nop). +// After the operation, the result is stored in 'dmi' for the next capture_dr() call. +void jtag_dtm_t::update_dr() { + if (ir == IR_DBUS) { + uint32_t op = dr & 0x3; + uint32_t data = (dr >> 2) & 0xFFFFFFFF; + uint32_t addr = (dr >> 34) & ((1 << abits) - 1); + + bool success = true; + if (op == 1) { + // DMI read operation: read from debug module and store result with status bits [1:0] + uint32_t val = 0; + success = dm->dmi_read(addr, &val); + // Status codes: 0=success, 2=not supported, 3=failed + // Unimplemented addresses return false, which means "not supported" (status=2) + uint32_t status = success ? 0 : 2; + dmi = ((uint64_t)val << 2) | status; + } else if (op == 2) { + // DMI write operation: write to debug module and store only status bits [1:0] + success = dm->dmi_write(addr, data); + // Status codes: 0=success, 2=not supported, 3=failed + // Unimplemented addresses return false, which means "not supported" (status=2) + uint32_t status = success ? 0 : 2; + dmi = status; + } else { + // No-op: clear the result + dmi = 0; + } + + busy_stuck = !success; + } +} diff --git a/sim/simx/jtag_dtm.h b/sim/simx/jtag_dtm.h new file mode 100644 index 0000000000..83ff80c3ff --- /dev/null +++ b/sim/simx/jtag_dtm.h @@ -0,0 +1,74 @@ +#pragma once +#include +#include "debug_module.h" + +enum jtag_state_t { + TEST_LOGIC_RESET, + RUN_TEST_IDLE, + SELECT_DR_SCAN, + CAPTURE_DR, + SHIFT_DR, + EXIT1_DR, + PAUSE_DR, + EXIT2_DR, + UPDATE_DR, + SELECT_IR_SCAN, + CAPTURE_IR, + SHIFT_IR, + EXIT1_IR, + PAUSE_IR, + EXIT2_IR, + UPDATE_IR +}; + +class jtag_dtm_t { +public: + // Constructor: Initializes the JTAG Debug Transport Module (DTM) with a reference to the Debug Module. + // Use case: Creates a DTM instance that bridges JTAG protocol to RISC-V debug module operations. + jtag_dtm_t(DebugModule* dm); + + // Resets the DTM to its initial state (TEST_LOGIC_RESET). + // Use case: Called when JTAG reset is detected or when the debugger needs to reinitialize the DTM. + void reset(); + + // Updates JTAG pin states and advances the TAP state machine based on TCK/TMS/TDI transitions. + // Use case: Called for each JTAG clock cycle to simulate the TAP controller state machine. + void set_pins(bool tck, bool tms, bool tdi); + + // Returns the current TDO (Test Data Out) pin value. + // Use case: Used by the remote bitbang protocol to read data being shifted out of the DTM. + bool tdo() const { return _tdo; } + + // Returns the current JTAG TAP state machine state. + // Use case: Used to check if the DTM is in a specific state (e.g., RUN_TEST_IDLE) for protocol handling. + jtag_state_t state() const { return _state; } + + // Forwards run_test_idle() call to the debug module. + // Use case: Called periodically when JTAG is in Run-Test-Idle state to allow debug module to process state updates. + void run_test_idle() { dm->run_test_idle(); } + +private: + DebugModule* dm; + + bool _tck, _tms, _tdi, _tdo; + jtag_state_t _state; + uint32_t ir; + uint64_t dr; + unsigned dr_length; + const unsigned abits; + bool busy_stuck; + uint64_t dmi; + + static constexpr uint32_t IR_IDCODE = 0x01; + static constexpr uint32_t IR_DTMCONTROL = 0x10; + static constexpr uint32_t IR_DBUS = 0x11; + static constexpr uint32_t IR_BYPASS = 0x1F; + + // Captures data from the selected register into the DR shift register based on current IR instruction. + // Use case: Called during CAPTURE_DR state to prepare data for shifting out via TDO. + void capture_dr(); + + // Updates the selected register with data from the DR shift register after shifting is complete. + // Use case: Called during UPDATE_DR state to execute DMI read/write operations based on shifted data. + void update_dr(); +}; diff --git a/sim/simx/main.cpp b/sim/simx/main.cpp index cd79dc57c4..2bbda79e59 100644 --- a/sim/simx/main.cpp +++ b/sim/simx/main.cpp @@ -25,11 +25,15 @@ #include #include "core.h" #include "VX_types.h" +#include "emulator.h" +#include "debug_module.h" +#include "jtag_dtm.h" +#include "remote_bitbang.h" using namespace vortex; static void show_usage() { - std::cout << "Usage: [-c ] [-w ] [-t ] [-v: vector-test] [-s: stats] [-h: help] " << std::endl; + std::cout << "Usage: [-c ] [-w ] [-t ] [-v: vector-test] [-s: stats] [-d: debug-mode] [-p : RBB port] [-V: verbose debug logging] [-h: help] " << std::endl; } uint32_t num_threads = NUM_THREADS; @@ -37,11 +41,14 @@ uint32_t num_warps = NUM_WARPS; uint32_t num_cores = NUM_CORES; bool showStats = false; bool vector_test = false; +bool debug_mode = false; +bool debug_verbose = false; // Verbose debug module logging +uint16_t rbb_port = 9823; // Default OpenOCD remote bitbang port const char* program = nullptr; static void parse_args(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "t:w:c:vsh")) != -1) { + while ((c = getopt(argc, argv, "t:w:c:vshdp:V")) != -1) { switch (c) { case 't': num_threads = atoi(optarg); @@ -58,6 +65,15 @@ static void parse_args(int argc, char **argv) { case 's': showStats = true; break; + case 'd': + debug_mode = true; + break; + case 'p': + rbb_port = static_cast(atoi(optarg)); + break; + case 'V': + debug_verbose = true; + break; case 'h': show_usage(); exit(0); @@ -70,8 +86,10 @@ static void parse_args(int argc, char **argv) { if (optind < argc) { program = argv[optind]; - std::cout << "Running " << program << "..." << std::endl; - } else { + if (!debug_mode) { + std::cout << "Running " << program << "..." << std::endl; + } + } else if (!debug_mode) { show_usage(); exit(-1); } @@ -104,7 +122,7 @@ int main(int argc, char **argv) { processor.dcr_write(VX_DCR_BASE_MPM_CLASS, 0); // load program - { + if (program) { std::string program_ext(fileExtension(program)); if (program_ext == "bin") { ram.loadBinImage(program, startup_addr); @@ -118,16 +136,79 @@ int main(int argc, char **argv) { #ifndef NDEBUG std::cout << "[VXDRV] START: program=" << program << std::endl; #endif - // run simulation - #ifdef EXT_V_ENABLE - // vector test exitcode is a special case - if (vector_test) return (processor.run() != 1); - #endif - // else continue as normal - processor.run(); - // read exitcode from @MPM.1 - ram.read(&exitcode, (IO_MPM_ADDR + 8), 4); + if (debug_mode) { + // Debug mode: run RBB server in infinite loop + std::cout << "[DEBUG] Starting debug mode on port " << rbb_port << std::endl; + + // Set verbose logging for debug module based on command-line flag + DebugModule::set_verbose_logging(debug_verbose); + + // Get emulator from processor + Emulator* emulator = processor.get_first_emulator(); + + // Reset emulator to read startup address from DCRs and initialize PC + if (emulator != nullptr) { + std::cout << "[DEBUG] Resetting emulator to initialize PC from DCRs..." << std::endl; + emulator->reset(); + auto& warp0 = emulator->get_warp(0); + std::cout << "[DEBUG] Emulator reset complete. PC = 0x" << std::hex << warp0.PC << std::dec << std::endl; + } + + // Create debug module with emulator reference + DebugModule dm(emulator); + + // Set debug module in emulator so it can check flags + if (emulator != nullptr) { + emulator->set_debug_module(&dm); + } + + // Halt the program at startup so debugger can control execution + // This ensures the program doesn't run until the debugger explicitly resumes it + dm.set_debug_mode_enabled(true); + if (emulator != nullptr) { + // Update DPC with initial PC value before halting + auto& warp0 = emulator->get_warp(0); + vortex::Word initial_pc = warp0.PC; // PC is already Word type + dm.direct_write_register(0x7B1, initial_pc); // Set DPC to initial PC + // Note: We don't need to suspend the warp here because halt_requested_ + // check at the start of step() will prevent execution + } + // Halt the hart (cause 0 = reserved, but we use it for initial halt) + // This sets halt_requested and is_halted flags, and updates DCSR + dm.halt_hart(0); // Cause 0 for initial halt state + + // Initialize and reset simulation platform + SimPlatform::instance().initialize(); + SimPlatform::instance().reset(); + + // Create JTAG DTM + jtag_dtm_t dtm(&dm); + + // Create remote bitbang server + remote_bitbang_t rbb(rbb_port, &dtm); + + std::cout << "[DEBUG] Remote bitbang server ready. Waiting for OpenOCD connection..." << std::endl; + + // Debug loop: advance simulation and handle JTAG communication + while (true) { + // Advance simulation by one cycle + SimPlatform::instance().tick(); + // Handle JTAG/debugger communication + rbb.tick(); + } + } else { + // run simulation + #ifdef EXT_V_ENABLE + // vector test exitcode is a special case + if (vector_test) return (processor.run() != 1); + #endif + // else continue as normal + processor.run(); + + // read exitcode from @MPM.1 + ram.read(&exitcode, (IO_MPM_ADDR + 8), 4); + } } return exitcode; diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index d3b730dc8b..51f5a02c9c 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -13,6 +13,8 @@ #include "processor.h" #include "processor_impl.h" +#include "emulator.h" +#include "core.h" using namespace vortex; @@ -159,6 +161,25 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { return perf; } +Emulator* ProcessorImpl::get_first_emulator() const { + if (clusters_.empty()) { + return nullptr; + } + auto& cluster = clusters_.at(0); + if (!cluster || cluster->sockets().empty()) { + return nullptr; + } + auto& socket = cluster->sockets().at(0); + if (!socket || socket->cores().empty()) { + return nullptr; + } + auto& core = socket->cores().at(0); + if (!core) { + return nullptr; + } + return &core->emulator(); +} + /////////////////////////////////////////////////////////////////////////////// Processor::Processor(const Arch& arch) @@ -196,6 +217,10 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { return impl_->dcr_write(addr, value); } +Emulator* Processor::get_first_emulator() const { + return impl_->get_first_emulator(); +} + #ifdef VM_ENABLE int16_t Processor::set_satp_by_addr(uint64_t base_addr) { uint16_t asid = 0; diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 741b04f57d..acb0e015b9 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -22,6 +22,7 @@ namespace vortex { class Arch; class RAM; class ProcessorImpl; +class Emulator; #ifdef VM_ENABLE class SATP_t; #endif @@ -36,6 +37,9 @@ class Processor { int run(); void dcr_write(uint32_t addr, uint32_t value); + + Emulator* get_first_emulator() const; + #ifdef VM_ENABLE bool is_satp_unset(); uint8_t get_satp_mode(); diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index 952b28222f..29bb26b1ed 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -21,6 +21,8 @@ namespace vortex { +class Emulator; + class ProcessorImpl { public: struct PerfStats { @@ -46,6 +48,8 @@ class ProcessorImpl { PerfStats perf_stats() const; + Emulator* get_first_emulator() const; + private: void reset(); diff --git a/sim/simx/remote_bitbang.cpp b/sim/simx/remote_bitbang.cpp new file mode 100644 index 0000000000..eea2738f5a --- /dev/null +++ b/sim/simx/remote_bitbang.cpp @@ -0,0 +1,193 @@ +#include +#include +#include +#include +#include +#include + +#ifndef AF_INET +#include +#endif +#ifndef INADDR_ANY +#include +#endif + +#include +#include + +#include "remote_bitbang.h" + +#if 1 +# define D(x) x +#else +# define D(x) +#endif + +// Constructor: Creates a TCP server listening on the specified port for OpenOCD remote bitbang connections. +// Use case: Initializes the server that will receive JTAG commands from OpenOCD and forward them to the DTM. +remote_bitbang_t::remote_bitbang_t(uint16_t port, jtag_dtm_t *tap) : + tap(tap), + socket_fd(0), + client_fd(0), + recv_start(0), + recv_end(0) +{ + socket_fd = socket(AF_INET, SOCK_STREAM, 0); + if (socket_fd == -1) { + fprintf(stderr, "remote_bitbang failed to make socket: %s (%d)\n", + strerror(errno), errno); + abort(); + } + + fcntl(socket_fd, F_SETFL, O_NONBLOCK); + int reuseaddr = 1; + if (setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, + sizeof(int)) == -1) { + fprintf(stderr, "remote_bitbang failed setsockopt: %s (%d)\n", + strerror(errno), errno); + abort(); + } + + struct sockaddr_in addr; + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = INADDR_ANY; + addr.sin_port = htons(port); + + if (bind(socket_fd, (struct sockaddr *) &addr, sizeof(addr)) == -1) { + fprintf(stderr, "remote_bitbang failed to bind socket: %s (%d)\n", + strerror(errno), errno); + abort(); + } + + if (listen(socket_fd, 1) == -1) { + fprintf(stderr, "remote_bitbang failed to listen on socket: %s (%d)\n", + strerror(errno), errno); + abort(); + } + + socklen_t addrlen = sizeof(addr); + if (getsockname(socket_fd, (struct sockaddr *) &addr, &addrlen) == -1) { + fprintf(stderr, "remote_bitbang getsockname failed: %s (%d)\n", + strerror(errno), errno); + abort(); + } + + printf("Listening for remote bitbang connection on port %d.\n", + ntohs(addr.sin_port)); + fflush(stdout); +} + +// Accepts a new client connection if one is waiting (non-blocking). +// Use case: Called when no client is connected to check for and accept new OpenOCD connections. +void remote_bitbang_t::accept() +{ + client_fd = ::accept(socket_fd, NULL, NULL); + if (client_fd == -1) { + if (errno == EAGAIN) { + } else { + fprintf(stderr, "failed to accept on socket: %s (%d)\n", strerror(errno), + errno); + abort(); + } + } else { + fcntl(client_fd, F_SETFL, O_NONBLOCK); + } +} + +// Processes one iteration of the server loop: accepts connections or executes pending JTAG commands. +// Use case: Called repeatedly in the main loop to handle incoming OpenOCD connections and JTAG protocol. +void remote_bitbang_t::tick() +{ + if (client_fd > 0) { + execute_commands(); + } else { + this->accept(); + } +} + +// Executes remote bitbang protocol commands from the connected client. +// Use case: Processes JTAG pin control commands ('0'-'7'), reads TDO ('R'), and handles protocol flow. +// The protocol uses ASCII characters: '0'-'7' encode TCK/TMS/TDI, 'R' reads TDO, 'r' resets, 'Q' quits. +// Stops processing when entering RUN_TEST_IDLE to allow the debug module to process state updates. +void remote_bitbang_t::execute_commands() +{ + unsigned total_processed = 0; + bool quit = false; + bool in_rti = tap->state() == RUN_TEST_IDLE; + bool entered_rti = false; + while (1) { + if (recv_start < recv_end) { + unsigned send_offset = 0; + while (recv_start < recv_end) { + uint8_t command = recv_buf[recv_start]; + + switch (command) { + case 'B': break; + case 'b': break; + case 'r': tap->reset(); break; + case '0': tap->set_pins(0, 0, 0); break; + case '1': tap->set_pins(0, 0, 1); break; + case '2': tap->set_pins(0, 1, 0); break; + case '3': tap->set_pins(0, 1, 1); break; + case '4': tap->set_pins(1, 0, 0); break; + case '5': tap->set_pins(1, 0, 1); break; + case '6': tap->set_pins(1, 1, 0); break; + case '7': tap->set_pins(1, 1, 1); break; + case 'R': send_buf[send_offset++] = tap->tdo() ? '1' : '0'; break; + case 'Q': quit = true; break; + default: + fprintf(stderr, "remote_bitbang got unsupported command '%c'\n", + command); + } + recv_start++; + total_processed++; + if (!in_rti && tap->state() == RUN_TEST_IDLE) { + entered_rti = true; + // Call run_test_idle to allow debug module to process state updates + // This is where the hart executes instructions when running continuously + tap->run_test_idle(); + break; + } + in_rti = (tap->state() == RUN_TEST_IDLE); + } + unsigned sent = 0; + while (sent < send_offset) { + ssize_t bytes = write(client_fd, send_buf + sent, send_offset - sent); + if (bytes == -1) { + fprintf(stderr, "failed to write to socket: %s (%d)\n", strerror(errno), errno); + abort(); + } + sent += bytes; + } + } + + if (total_processed > buf_size || quit || entered_rti) { + break; + } + + recv_start = 0; + recv_end = read(client_fd, recv_buf, buf_size); + + if (recv_end == -1) { + if (errno == EAGAIN) { + break; + } else { + fprintf(stderr, "remote_bitbang failed to read on socket: %s (%d)\n", + strerror(errno), errno); + abort(); + } + } + + if (quit) { + fprintf(stderr, "Remote Bitbang received 'Q'\n"); + } + + if (recv_end == 0 || quit) { + fprintf(stderr, "Received nothing. Quitting.\n"); + close(client_fd); + client_fd = 0; + break; + } + } +} diff --git a/sim/simx/remote_bitbang.h b/sim/simx/remote_bitbang.h new file mode 100644 index 0000000000..a347fa75f2 --- /dev/null +++ b/sim/simx/remote_bitbang.h @@ -0,0 +1,39 @@ +#ifndef REMOTE_BITBANG_H +#define REMOTE_BITBANG_H + +#include + +#include "jtag_dtm.h" + +class remote_bitbang_t +{ +public: + // Constructor: Creates a TCP server listening on the specified port for OpenOCD remote bitbang connections. + // Use case: Initializes the server that will receive JTAG commands from OpenOCD and forward them to the DTM. + remote_bitbang_t(uint16_t port, jtag_dtm_t *tap); + + // Processes one iteration of the server loop: accepts connections or executes pending JTAG commands. + // Use case: Called repeatedly in the main loop to handle incoming OpenOCD connections and JTAG protocol. + void tick(); + +private: + jtag_dtm_t *tap; + + int socket_fd; + int client_fd; + + static const ssize_t buf_size = 64 * 1024; + char send_buf[buf_size]; + char recv_buf[buf_size]; + ssize_t recv_start, recv_end; + + // Accepts a new client connection if one is waiting (non-blocking). + // Use case: Called when no client is connected to check for and accept new OpenOCD connections. + void accept(); + + // Executes remote bitbang protocol commands from the connected client. + // Use case: Processes JTAG pin control commands ('0'-'7'), reads TDO ('R'), and handles protocol flow. + void execute_commands(); +}; + +#endif diff --git a/sim/simx/socket.h b/sim/simx/socket.h index f8c266d05f..f538042e75 100644 --- a/sim/simx/socket.h +++ b/sim/simx/socket.h @@ -71,6 +71,10 @@ class Socket : public SimObject { PerfStats perf_stats() const; + const std::vector& cores() const { + return cores_; + } + private: uint32_t socket_id_; Cluster* cluster_; diff --git a/sim/simx/tensor_unit.cpp b/sim/simx/tensor_unit.cpp index 1bb6da3a37..763f6c9488 100644 --- a/sim/simx/tensor_unit.cpp +++ b/sim/simx/tensor_unit.cpp @@ -71,6 +71,18 @@ struct FMA { } }; +template <> +struct FMA { + static float eval(uint32_t a, uint32_t b, float c) { + auto xa = rv_xtof_s(a, 8, 10, 0, nullptr); + auto xb = rv_xtof_s(b, 8, 10, 0, nullptr); + auto xab= rv_fmul_s(xa, xb, 0, nullptr); + auto xc = bit_cast(c); + auto xd = rv_fadd_s(xab, xc, 0, nullptr); + return bit_cast(xd); + } +}; + template <> struct FMA { static uint16_t eval(uint16_t a, uint16_t b, uint16_t c) { @@ -145,55 +157,48 @@ struct FEDP{ using PFN_FEDP = uint32_t (*)(const reg_data_t*, const reg_data_t*, uint32_t); static PFN_FEDP select_FEDP(uint32_t IT, uint32_t OT) { - switch (OT) { - case vt::fp32::id: - switch (IT) { - case vt::fp16::id: - return FEDP::eval; - case vt::bf16::id: - return FEDP::eval; - default: - std::cout << "Error: unsupported mma format: " << IT << " -> " << OT << "!" << std::endl; - std::abort(); - } - break; + switch (IT) { case vt::fp16::id: - switch (IT) { - case vt::fp16::id: - return FEDP::eval; - default: - std::cout << "Error: unsupported mma format: " << IT << " -> " << OT << "!" << std::endl; - std::abort(); + switch (OT) { + case vt::fp32::id: return FEDP::eval; + case vt::fp16::id: return FEDP::eval; + default: break; } break; case vt::bf16::id: - switch (IT) { - case vt::bf16::id: - return FEDP::eval; - default: - std::cout << "Error: unsupported mma format: " << IT << " -> " << OT << "!" << std::endl; - std::abort(); + switch (OT) { + case vt::fp32::id: return FEDP::eval; + case vt::bf16::id: return FEDP::eval; + default: break; } break; - case vt::int32::id: - switch (IT) { - case vt::int8::id: + case vt::tf32::id: + if (OT == vt::fp32::id) { + return FEDP::eval; + } + break; + case vt::int8::id: + if (OT == vt::int32::id) return FEDP::eval; - case vt::uint8::id: + break; + case vt::uint8::id: + if (OT == vt::int32::id) return FEDP::eval; - case vt::int4::id: + break; + case vt::int4::id: + if (OT == vt::int32::id) return FEDP::eval; - case vt::uint4::id: + break; + case vt::uint4::id: + if (OT == vt::int32::id) return FEDP::eval; - default: - std::cout << "Error: unsupported mma format: " << IT << " -> " << OT << "!" << std::endl; - std::abort(); - } break; default: - std::cout << "Error: unsupported output type: " << OT << "!" << std::endl; - std::abort(); + break; } + + std::cout << "Error: unsupported mma format: " << IT << " -> " << OT << "!" << std::endl; + std::abort(); } class TensorUnit::Impl { diff --git a/vortex.cfg b/vortex.cfg new file mode 100644 index 0000000000..3cdd10e5ea --- /dev/null +++ b/vortex.cfg @@ -0,0 +1,24 @@ +adapter driver remote_bitbang +remote_bitbang_port 9823 +remote_bitbang_host localhost + +# Target setup +transport select jtag +adapter speed 100 + +set _CHIPNAME vortex +jtag newtap $_CHIPNAME cpu -irlen 5 + +# --- BEGIN Single-Hart Configuration --- + +# Create a single target (Hart 0) +target create $_CHIPNAME.riscv0 riscv -chain-position $_CHIPNAME.cpu -coreid 0 -rtos hwthread + +# Configure the RISC-V specific settings. +riscv set_reset_timeout_sec 2 +riscv set_command_timeout_sec 2 + +# --- END Single-Hart Configuration --- + +init +halt \ No newline at end of file