diff --git a/.gitignore b/.gitignore index 46f1b628..3ddd056f 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,45 @@ driver/xrt/src/m2m driver/xrt/build .vscode +<<<<<<< HEAD +======= +coyote_build* +*xcu55c-fsvh2892-2L-e* +*xcu280-fsvh2892-2L-e* +sol1/ +*_prj +*.gen +*.ip_user_files +*.cache +*.srcs +**/fpga_ips.txt + +CMakeFiles/ +CMakeCache.txt +CMakeDoxy* +*.cmake +accl_on_coyote +*.xsa +accl_log/ +*.xci +packaged_kernel/ +*.hw +*.xml +*.xpr +*/xrt/_deps/ +>>>>>>> dev + +# HLS-generated files to ignore +sol1/ +sources_1/ +packaged_kernel/ +*.app +*.xsa +*.xpr +*.lpr +*.wpc +<<<<<<< HEAD +======= +.run/ +_deps/ +>>>>>>> dev diff --git a/driver/utils/accl_network_utils/include/accl_network_utils.hpp b/driver/utils/accl_network_utils/include/accl_network_utils.hpp index eb5655bd..a253e130 100644 --- a/driver/utils/accl_network_utils/include/accl_network_utils.hpp +++ b/driver/utils/accl_network_utils/include/accl_network_utils.hpp @@ -56,7 +56,7 @@ initialize_accl(std::vector &ranks, int local_rank, xrt::device device = xrt::device(), std::filesystem::path xclbin = "", unsigned int nbufs = 16, unsigned int bufsize = 1024, unsigned int egrsize = 0, - bool rsfec = false); + bool rsfec = false, bool eagerRx_host = false); // Configure the VNX kernel, this function is called by initialize_accl void configure_vnx(vnx::CMAC &cmac, vnx::Networklayer &network_layer, diff --git a/driver/utils/accl_network_utils/src/accl_network_utils.cpp b/driver/utils/accl_network_utils/src/accl_network_utils.cpp index ad14c448..bc2467ab 100644 --- a/driver/utils/accl_network_utils/src/accl_network_utils.cpp +++ b/driver/utils/accl_network_utils/src/accl_network_utils.cpp @@ -452,7 +452,7 @@ std::unique_ptr initialize_accl(std::vector &ranks, int local_rank, bool simulator, acclDesign design, xrt::device device, fs::path xclbin, unsigned int nbufs, unsigned int bufsize, - unsigned int egrsize, bool rsfec) { + unsigned int egrsize, bool rsfec, bool eagerRx_host) { std::size_t world_size = ranks.size(); std::unique_ptr accl; @@ -519,7 +519,7 @@ initialize_accl(std::vector &ranks, int local_rank, accl = std::make_unique(device, cclo_ip, hostctrl_ip, devicemem, rxbufmem); } - accl.get()->initialize(ranks, local_rank, nbufs, bufsize, egrsize, std::min(nbufs*bufsize, (unsigned int)4*1024*1024)); + accl.get()->initialize(ranks, local_rank, nbufs, bufsize, egrsize, std::min(nbufs*bufsize, (unsigned int)4*1024*1024), eagerRx_host); return accl; } } // namespace accl_network_utils diff --git a/driver/xrt/include/accl.hpp b/driver/xrt/include/accl.hpp index a98667fd..cbe7ffe8 100644 --- a/driver/xrt/include/accl.hpp +++ b/driver/xrt/include/accl.hpp @@ -101,7 +101,7 @@ class ACCL { */ void initialize(const std::vector &ranks, int local_rank, int n_egr_rx_bufs = 16, addr_t egr_rx_buf_size = 1024, - addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024); + addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024, bool rxEager_host = false); /** * Get the return code of the last ACCL call. @@ -1101,7 +1101,7 @@ ACCLRequest *barrier(communicatorId comm_id = GLOBAL_COMM, void configure_arithmetic(); void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, - const std::vector &devicemem); + const std::vector &devicemem, bool host=false); void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, int devicemem) { std::vector mems = {devicemem}; return setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, mems); diff --git a/driver/xrt/src/accl.cpp b/driver/xrt/src/accl.cpp index 9e98efb7..8e21ee01 100644 --- a/driver/xrt/src/accl.cpp +++ b/driver/xrt/src/accl.cpp @@ -128,7 +128,6 @@ ACCLRequest *ACCL::send(BaseBuffer &srcbuf, unsigned int count, if (from_fpga == false) { srcbuf.sync_to_device(); } - options.scenario = operation::send; options.comm = communicators[comm_id].communicators_addr(); options.addr_0 = &srcbuf; @@ -143,7 +142,6 @@ ACCLRequest *ACCL::send(BaseBuffer &srcbuf, unsigned int count, wait(handle); check_return_value("send", handle); } - return handle; } @@ -262,7 +260,6 @@ ACCLRequest *ACCL::recv(BaseBuffer &dstbuf, unsigned int count, } check_return_value("recv", handle); } - return handle; } @@ -302,11 +299,9 @@ ACCLRequest *ACCL::copy(BaseBuffer *srcbuf, BaseBuffer *dstbuf, unsigned int cou "sync_from_device() after waiting" << std::endl; } - if (from_fpga == false) { srcbuf->sync_to_device(); } - options.scenario = operation::copy; options.addr_0 = srcbuf; options.addr_2 = dstbuf; @@ -316,7 +311,6 @@ ACCLRequest *ACCL::copy(BaseBuffer *srcbuf, BaseBuffer *dstbuf, unsigned int cou options.stream_flags = stream_flags; options.waitfor = waitfor; ACCLRequest *handle = call_async(options); - if (!run_async) { wait(handle); if (to_fpga == false) { @@ -324,7 +318,6 @@ ACCLRequest *ACCL::copy(BaseBuffer *srcbuf, BaseBuffer *dstbuf, unsigned int cou } check_return_value("copy", handle); } - return handle; } @@ -1011,6 +1004,8 @@ std::string ACCL::dump_eager_rx_buffers(size_t n_egr_rx_bufs, bool dump_data) { address += 4; val_t addrh = cclo->read(address); address += 4; + val_t max_len = cclo->read(address); + address += 4; val_t rxtag = cclo->read(address); address += 4; val_t rxlen = cclo->read(address); @@ -1018,21 +1013,27 @@ std::string ACCL::dump_eager_rx_buffers(size_t n_egr_rx_bufs, bool dump_data) { val_t rxsrc = cclo->read(address); address += 4; val_t seq = cclo->read(address); + address += 4; + val_t hostBit = cclo->read(address); stream << "Spare RX Buffer " << i << ":\t address: 0x" << std::hex << addrh * (1UL << 32) + addrl << std::dec << " \t status: " << status << " \t occupancy: " << rxlen << "/" << maxsize << " \t MPI tag: " << std::hex << rxtag << std::dec - << " \t seq: " << seq << " \t src: " << rxsrc; + << " \t seq: " << seq << " \t src: " << rxsrc + << " \t hostBit: " << hostBit; if(dump_data) { - eager_rx_buffers[i]->sync_from_device(); + //add if else, to check if is host or not and sync accordingly + if(!(hostBit && cclo->get_device_type() == CCLO::coyote_device)){ + eager_rx_buffers[i]->sync_from_device(); + } stream << " \t data: " << std::hex << "["; for (size_t j = 0; j < eager_rx_buffers[i]->size(); ++j) { stream << "0x" - << static_cast(static_cast( - eager_rx_buffers[i]->byte_array())[j]); + << static_cast(static_cast( + eager_rx_buffers[i]->byte_array())[j]); if (j != eager_rx_buffers[i]->size() - 1) { stream << ", "; } @@ -1065,7 +1066,7 @@ void ACCL::parse_hwid(){ void ACCL::initialize(const std::vector &ranks, int local_rank, int n_egr_rx_bufs, addr_t egr_rx_buf_size, - addr_t max_egr_size, addr_t max_rndzv_size) { + addr_t max_egr_size, addr_t max_rndzv_size, bool rxEager_host) { parse_hwid(); @@ -1077,7 +1078,7 @@ void ACCL::initialize(const std::vector &ranks, int local_rank, } debug("Configuring Eager RX Buffers"); - setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, rxbufmem); + setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, rxbufmem, rxEager_host); debug("Configuring Rendezvous Spare Buffers"); setup_rendezvous_spare_buffers(max_rndzv_size, rxbufmem); @@ -1129,7 +1130,7 @@ addr_t ACCL::get_arithmetic_config_addr(std::pair id) { } void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, - const std::vector &devicemem) { + const std::vector &devicemem, bool host) { addr_t address = CCLO_ADDR::EGR_RX_BUF_SIZE_OFFSET; eager_rx_buffer_size = egr_rx_buf_size; for (size_t i = 0; i < n_egr_rx_bufs; ++i) { @@ -1137,15 +1138,32 @@ void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, Buffer *buf; if (sim_mode) { - buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, + if(host){ + buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, + static_cast(cclo)->get_context(), true, ACCL_SIM_DEFAULT_BANK); + }else{ + buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, static_cast(cclo)->get_context()); + } } else if(cclo->get_device_type() == CCLO::xrt_device ){ - buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + if(host){ + //TODO: how to define host buffers in XRT? + buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + }else{ + buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + } } else if(cclo->get_device_type() == CCLO::coyote_device){ - buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + if(host){ + //buffers in coyote per default on host + buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + }else{ + buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + } + } + //add if else as well, test for coyote backend + eager on host + if(!(host && cclo->get_device_type() == CCLO::coyote_device)){ + buf->sync_to_device(); } - - buf->sync_to_device(); eager_rx_buffers.emplace_back(buf); // program this buffer into the accelerator address += 4; @@ -1155,10 +1173,18 @@ void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, address += 4; cclo->write(address, (buf->address() >> 32) & 0xffffffff); // clear remaining 4 fields - for (size_t j = 0; j < 4; ++j) { + for (size_t j = 0; j < 5; ++j) { address += 4; cclo->write(address, 0); } + //set the host flag + // NOTE: the host flag is set to true if the buffer is a host buffer + address += 4; + if(host){ + cclo->write(address, 1); // set host flag + }else{ + cclo->write(address, 0); // set host flag + } } //write buffer len @@ -1184,7 +1210,7 @@ void ACCL::setup_rendezvous_spare_buffers(addr_t rndzv_spare_buf_size, const std } else if(cclo->get_device_type() == CCLO::coyote_device){ buf = new CoyoteBuffer(max_rndzv_msg_size, dataType::int8, static_cast(cclo)); } - buf->sync_to_device(); + //buf->sync_to_device(); utility_spares.emplace_back(buf); } cclo->write(CCLO_ADDR::SPARE1_OFFSET, utility_spares.at(0)->address() & 0xffffffff); @@ -1246,7 +1272,9 @@ void ACCL::prepare_call(CCLO::Options &options) { } else { dtypes.insert(options.addr_0->type()); - if(options.addr_0->is_host_only()) options.host_flags |= hostFlags::OP0_HOST; + if(options.addr_0->is_host_only()){ + options.host_flags |= hostFlags::OP0_HOST; + } } if (options.addr_1 == nullptr) { @@ -1255,7 +1283,9 @@ void ACCL::prepare_call(CCLO::Options &options) { } else { dtypes.insert(options.addr_1->type()); - if(options.addr_1->is_host_only()) options.host_flags |= hostFlags::OP1_HOST; + if(options.addr_1->is_host_only()) { + options.host_flags |= hostFlags::OP1_HOST; + } } if (options.addr_2 == nullptr) { @@ -1264,7 +1294,9 @@ void ACCL::prepare_call(CCLO::Options &options) { } else { dtypes.insert(options.addr_2->type()); - if(options.addr_2->is_host_only()) options.host_flags |= hostFlags::RES_HOST; + if(options.addr_2->is_host_only()) { + options.host_flags |= hostFlags::RES_HOST; + } } dtypes.erase(dataType::none); @@ -1351,7 +1383,6 @@ void ACCL::prepare_call(CCLO::Options &options) { } } } - options.arithcfg_addr = arithcfg->addr(); } diff --git a/driver/xrt/src/coyotedevice.cpp b/driver/xrt/src/coyotedevice.cpp index fd96b904..8d2f9628 100644 --- a/driver/xrt/src/coyotedevice.cpp +++ b/driver/xrt/src/coyotedevice.cpp @@ -46,7 +46,6 @@ void CoyoteRequest::start() { function = static_cast(options.reduce_function); } uint32_t flags = static_cast(options.host_flags) << 8 | static_cast(options.stream_flags); - auto coyote_proc = reinterpret_cast(cclo())->get_device(); if ((coyote_proc->getCSR((OFFSET_HOSTCTRL + HOSTCTRL_ADDR::AP_CTRL)>>2) & 0x4) == 0) { // read AP_CTRL and check bit 3 (the idle bit) @@ -251,6 +250,7 @@ void CoyoteRequest::start() { } case ACCL::operation::config:{ coyote_proc->setCSR(static_cast(options.scenario), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::SCEN)>>2); + coyote_proc->setCSR(static_cast(options.count), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::LEN)>>2); coyote_proc->setCSR(static_cast(function), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::FUNCTION_R)>>2); //coyote_proc->setCSR(static_cast(flags), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::STREAM_FLAGS)>>2); //safe to delete? break; diff --git a/driver/xrt/src/simdevice.cpp b/driver/xrt/src/simdevice.cpp index b0519f0d..dbb2f245 100644 --- a/driver/xrt/src/simdevice.cpp +++ b/driver/xrt/src/simdevice.cpp @@ -21,6 +21,7 @@ #include #include #include "zmq_client.h" +#include static void finish_sim_request(ACCL::SimRequest *req) { ACCL::SimDevice *cclo = reinterpret_cast(req->cclo()); @@ -43,6 +44,8 @@ void SimRequest::start() { options.addr_0->sync_bo_to_device(); options.addr_1->sync_bo_to_device(); options.addr_2->sync_bo_to_device(); + std::cout << "SimRequest::start: addr_0: " << options.addr_0->address() << std::endl; + std::cout << "SimRequest::start: addr_2: " << options.addr_2->address() << std::endl; if (options.scenario == operation::config) { function = static_cast(options.cfg_function); @@ -51,6 +54,7 @@ void SimRequest::start() { } uint32_t flags = static_cast(options.host_flags) << 8 | static_cast(options.stream_flags); + std::cout << "host flags " << std::bitset<32>(static_cast(options.host_flags)) << " shifted: " << std::bitset<32>(static_cast(options.host_flags)<<8) << std::endl; zmq_client_startcall( reinterpret_cast(cclo_ptr)->get_context(), @@ -164,7 +168,7 @@ void SimDevice::write(addr_t offset, val_t val) { CCLO::deviceType SimDevice::get_device_type() { - std::cout<<"get_device_type: sim_device"< #include +#include static void finish_fpga_request(ACCL::FPGARequest *req) { req->wait_kernel(); @@ -44,6 +45,7 @@ void FPGARequest::start() { function = static_cast(options.reduce_function); } uint32_t flags = static_cast(options.host_flags) << 8 | static_cast(options.stream_flags); + std::cout << "host flags xrt " << std::bitset<32>(static_cast(options.host_flags)) << " shifted: " << std::bitset<32>(static_cast(options.host_flags)<<8) << std::endl; switch(options.scenario) { case ACCL::operation::copy: run.set_arg(ACCL::XRT_ARG_ID::SCENARIO_ID, static_cast(options.scenario)); diff --git a/kernels/cclo/Makefile b/kernels/cclo/Makefile index dc76095c..d38dbda4 100644 --- a/kernels/cclo/Makefile +++ b/kernels/cclo/Makefile @@ -15,13 +15,13 @@ # # *******************************************************************************/ -PLATFORM ?= xilinx_u280_xdma_201920_3 +PLATFORM ?= xilinx_u55c_gen3x16_xdma_3_202210_1 HW_DEBUG ?= none -STACK_TYPE ?= UDP +STACK_TYPE ?= RDMA MODE ?= xo EN_DMA ?= 1 EN_ARITH ?= 1 -EN_COMPRESS ?= 1 +EN_COMPRESS ?= 0 EN_EXT_KRNL ?= 1 MB_DEBUG_LEVEL ?= 0 SIM_MEM_SIZE_LOG ?= 28 @@ -32,7 +32,7 @@ FW_SOURCES = $(shell find fw -name '*.c') $(shell find fw -name '*.h') $(shell f ifeq ($(MODE), simdll) EN_DMA=1 EN_ARITH=1 - EN_COMPRESS=1 + EN_COMPRESS=0 EN_EXT_KRNL=1 MB_DEBUG_LEVEL=0 endif @@ -61,8 +61,6 @@ GEN_KERNEL_TCL := tcl/generate_kernel.tcl REBUILD_BD_TCL := tcl/rebuild_bd.tcl tcl/control_bd.tcl tcl/rx_bd.tcl tcl/tx_bd.tcl ifeq ($(MODE), simdll) - FPGAPART=xcu280-fsvh2892-2L-e - BOARD=u280 BUILD_FOLDER = $(STACK_TYPE)_sim else BUILD_FOLDER = $(STACK_TYPE)_$(EN_DMA)$(EN_ARITH)$(EN_COMPRESS)$(EN_EXT_KRNL)$(MB_DEBUG_LEVEL)_$(FPGAPART) diff --git a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.c b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.c index eff26184..6b20816e 100755 --- a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.c +++ b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.c @@ -435,6 +435,7 @@ void start_move( opcode |= op0_opcode; opcode |= op1_opcode << 3; opcode |= res_opcode << 6; + //printf("rx_src_rank: %u\n", rx_src_rank); uint32_t compression_flags = flags & 0xff; uint32_t remote_flags = (flags>>8) & 0xff; @@ -581,6 +582,7 @@ int send( unsigned int buftype ) { unsigned int host = (buftype >> 8) & 0xff; + //printf("host bit in send: %u\n", host); unsigned int stream = buftype & 0xff; //get count in bytes unsigned int bytes_count = datatype_nbytes*count; @@ -609,6 +611,8 @@ int send( 0, 0, dst_rank, dst_tag ); } else { + //printf("eager send, dst_rank: %u, src_addr: %lu, dst_tag: %u, buftype: %u\n", + // dst_rank, src_addr, dst_tag, buftype); //Eager with segmentation //if ETH_COMPRESSED is set, also set RES_COMPRESSED compression |= (compression & ETH_COMPRESSED) >> 1; @@ -674,6 +678,8 @@ int recv( } return rendezvous_get_completion(src_rank, dst_addr, is_host, count, src_tag); } else { + //printf("eager recv, src_rank: %u, dst_addr: %lu, src_tag: %u, buftype: %u\n", + //src_rank, dst_addr, src_tag, buftype); //Eager with segmentation //if ETH_COMPRESSED is set, also set OP1_COMPRESSED compression |= (compression & ETH_COMPRESSED) >> 2; @@ -2381,9 +2387,11 @@ void run() { retval = combine(count, function, op0_addr, op1_addr, res_addr, datapath_cfg, compression_flags, buftype_flags); break; case ACCL_SEND: + //printf("Sending %d bytes to %d\n", count, root_src_dst); retval = send(root_src_dst, count, op0_addr, comm, datapath_cfg, msg_tag, compression_flags, buftype_flags); break; case ACCL_RECV: + //printf("Receiving %d bytes from %d\n", count, root_src_dst); retval = recv(root_src_dst, count, res_addr, comm, datapath_cfg, msg_tag, compression_flags, buftype_flags); break; case ACCL_BCAST: diff --git a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h index d523b4ff..d291bdb6 100644 --- a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h +++ b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h @@ -273,16 +273,25 @@ typedef struct { unsigned int rx_len; unsigned int rx_src; unsigned int sequence_number; + unsigned int host; } rx_buffer; #define STATUS_OFFSET 0 #define ADDRL_OFFSET 1 #define ADDRH_OFFSET 2 -#define RX_TAG_OFFSET 3 +#define MAX_LEN_OFFSET 3 +#define RX_TAG_OFFSET 4 +#define RX_LEN_OFFSET 5 +#define RX_SRC_OFFSET 6 +#define SEQUENCE_NUMBER_OFFSET 7 +#define HOST_OFFSET 8 //host address offset for the buffer +#define SPARE_BUFFER_FIELDS 9 +/*#define RX_TAG_OFFSET 3 #define RX_LEN_OFFSET 4 #define RX_SRC_OFFSET 5 -#define SEQUENCE_NUMBER_OFFSET 6 -#define SPARE_BUFFER_FIELDS 7 +#define SEQUENCE_NUMBER_OFFSET 6 +#define HOST_OFFSET 7 //host address offset for the buffer +#define SPARE_BUFFER_FIELDS 8*/ #define STATUS_IDLE 0x00 #define STATUS_ENQUEUED 0x01 diff --git a/kernels/cclo/hdl/sim_mem.v b/kernels/cclo/hdl/sim_mem.v index 9775e6c7..1ad55394 100644 --- a/kernels/cclo/hdl/sim_mem.v +++ b/kernels/cclo/hdl/sim_mem.v @@ -54,7 +54,24 @@ module sim_mem (* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B DIN" *) input [MEM_WIDTH-1:0] din_b, (* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_B DOUT" *) - output [MEM_WIDTH-1:0] dout_b + output [MEM_WIDTH-1:0] dout_b, + +(* X_INTERFACE_PARAMETER = "MODE Slave, MASTER_TYPE BRAM_CTRL, MEM_ECC NONE, READ_WRITE_MODE READ_WRITE" *) +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C CLK" *) + input clk_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C RST" *) + input rst_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C EN" *) + input en_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C ADDR" *) + input [MEM_DEPTH_LOG-1:0] addr_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C WE" *) + input [MEM_WIDTH/8-1:0] we_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C DIN" *) + input [MEM_WIDTH-1:0] din_c, +(* X_INTERFACE_INFO = "xilinx.com:interface:bram_rtl:1.0 MEM_PORT_C DOUT" *) + output [MEM_WIDTH-1:0] dout_c + ); @@ -71,11 +88,16 @@ generate for(byte_idx=0; byte_idx status; + ap_uint<32> host; ap_uint<64> addr; status = rx_buffers[(i * SPARE_BUFFER_FIELDS) + STATUS_OFFSET]; + host = rx_buffers[(i * SPARE_BUFFER_FIELDS) + HOST_OFFSET]; + //std::cout << "rxbuf_enqueue host bit: " << host << std::endl; + addr(31, 0) = rx_buffers[(i * SPARE_BUFFER_FIELDS) + ADDRL_OFFSET]; addr(63, 32) = rx_buffers[(i * SPARE_BUFFER_FIELDS) + ADDRH_OFFSET]; @@ -64,7 +68,8 @@ void rxbuf_enqueue( cmd.tag = tag++; cmd_word.data = cmd; cmd_word.last = 1;//unused for now - cmd_word.dest = 0;//unused for now + //set destination to host if host is set + cmd_word.dest = host;//unused for now STREAM_WRITE(dma_cmd, cmd_word); //update spare buffer status rx_buffers[(i * SPARE_BUFFER_FIELDS) + STATUS_OFFSET] = STATUS_ENQUEUED; diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h b/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h index 4b2b443e..35c68201 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h @@ -20,6 +20,7 @@ typedef struct { ap_uint<32> index; ap_uint<32> len; bool valid; + bool host; } rxbuf_seek_result; typedef struct { diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp b/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp index 78812821..66427480 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp @@ -58,10 +58,13 @@ void rxbuf_seek( if((pending_notif.signature.tag == seek_sig.tag || pending_notif.signature.tag == TAG_ANY) && pending_notif.signature.src == seek_sig.src && pending_notif.signature.seqn == seek_sig.seqn){ seek_res.addr(31,0) = rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + ADDRL_OFFSET]; + //add host bit seek_res.addr(63,32) = rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + ADDRH_OFFSET]; + seek_res.host = (rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + HOST_OFFSET] == 1) ? true : false; seek_res.len = pending_notif.signature.len; seek_res.index = pending_notif.index; seek_res.valid = true; + //std::cout << "rxbuf_seek with host bit: " << rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + HOST_OFFSET] << std::endl; num_pending--; break; } else{ diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp b/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp index 55751558..0becffe8 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp @@ -84,7 +84,9 @@ void rxbuf_session_command( cmd.length = notif.length; cmd_word.data = cmd; cmd_word.last = 1;//always last, each command is a single word - cmd_word.dest = 0;//always write RX data to device (not host) + //TODO: keep this? + cmd_word.dest = desc.mem_index;//always write RX data to device (not host) + //std::cout << "rxbuf_session_command host bit: " << cmd_word.dest << std::endl; STREAM_WRITE(fragment_dma_cmd, cmd_word); } else { //if EOF update address in descriptor @@ -116,6 +118,7 @@ void rxbuf_session_command( desc.remaining = desc.header.count; //prime the command to status parser sts_command.first = true; + //std::cout << "rxbuf_session_command host bit else case: " << desc.mem_index << std::endl; } //store descriptor mem[notif.session_id] = desc; diff --git a/kernels/cclo/tcl/generate_sim.tcl b/kernels/cclo/tcl/generate_sim.tcl index 185cc990..3250b587 100644 --- a/kernels/cclo/tcl/generate_sim.tcl +++ b/kernels/cclo/tcl/generate_sim.tcl @@ -127,6 +127,18 @@ if { $en_dma != 0 } { connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_2/BRAM_PORTA] [get_bd_intf_pins sim_mem_2/MEM_PORT_A] connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_2/BRAM_PORTB] [get_bd_intf_pins sim_mem_2/MEM_PORT_B] + create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_3 + set_property -dict [list CONFIG.SINGLE_PORT_BRAM {1} CONFIG.DATA_WIDTH {512} CONFIG.ECC_TYPE {0} CONFIG.READ_LATENCY $latency] [get_bd_cells axi_bram_ctrl_3] + connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_3/BRAM_PORTA] [get_bd_intf_pins sim_mem_0/MEM_PORT_C] + + create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_4 + set_property -dict [list CONFIG.SINGLE_PORT_BRAM {1} CONFIG.DATA_WIDTH {512} CONFIG.ECC_TYPE {0} CONFIG.READ_LATENCY $latency] [get_bd_cells axi_bram_ctrl_4] + connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_4/BRAM_PORTA] [get_bd_intf_pins sim_mem_1/MEM_PORT_C] + + create_bd_cell -type ip -vlnv xilinx.com:ip:axi_bram_ctrl:4.1 axi_bram_ctrl_5 + set_property -dict [list CONFIG.SINGLE_PORT_BRAM {1} CONFIG.DATA_WIDTH {512} CONFIG.ECC_TYPE {0} CONFIG.READ_LATENCY $latency] [get_bd_cells axi_bram_ctrl_5] + connect_bd_intf_net [get_bd_intf_pins axi_bram_ctrl_5/BRAM_PORTA] [get_bd_intf_pins sim_mem_2/MEM_PORT_C] + create_bd_cell -type ip -vlnv xilinx.com:ip:axi_crossbar:2.1 axi_crossbar_0 set_property -dict [list CONFIG.NUM_SI {3} CONFIG.NUM_MI {2}] [get_bd_cells axi_crossbar_0] connect_bd_intf_net [get_bd_intf_pins axi_crossbar_0/M00_AXI] [get_bd_intf_pins axi_bram_ctrl_0/S_AXI] @@ -137,9 +149,10 @@ if { $en_dma != 0 } { connect_bd_intf_net [get_bd_intf_pins axi_crossbar_1/M00_AXI] [get_bd_intf_pins axi_bram_ctrl_2/S_AXI] create_bd_cell -type ip -vlnv xilinx.com:ip:axi_crossbar:2.1 axi_crossbar_2 - set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2}] [get_bd_cells axi_crossbar_2] - connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M00_AXI] [get_bd_intf_pins axi_crossbar_0/S02_AXI] - connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M01_AXI] [get_bd_intf_pins axi_crossbar_1/S02_AXI] + set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {3}] [get_bd_cells axi_crossbar_2] + connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M00_AXI] [get_bd_intf_pins axi_bram_ctrl_3/S_AXI] + connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M01_AXI] [get_bd_intf_pins axi_bram_ctrl_4/S_AXI] + connect_bd_intf_net [get_bd_intf_pins axi_crossbar_2/M02_AXI] [get_bd_intf_pins axi_bram_ctrl_5/S_AXI] create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma_2port:1.0 external_dma_0 connect_bd_net [get_bd_ports ap_clk] [get_bd_pins external_dma_0/ap_clk] @@ -219,31 +232,27 @@ if { $en_dma != 0 } { connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins axi_bram_ctrl_1/s_axi_aresetn] connect_bd_net [get_bd_ports ap_clk] [get_bd_pins axi_bram_ctrl_2/s_axi_aclk] connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins axi_bram_ctrl_2/s_axi_aresetn] + connect_bd_net [get_bd_ports ap_clk] [get_bd_pins axi_bram_ctrl_3/s_axi_aclk] + connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins axi_bram_ctrl_3/s_axi_aresetn] + connect_bd_net [get_bd_ports ap_clk] [get_bd_pins axi_bram_ctrl_4/s_axi_aclk] + connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins axi_bram_ctrl_4/s_axi_aresetn] + connect_bd_net [get_bd_ports ap_clk] [get_bd_pins axi_bram_ctrl_5/s_axi_aclk] + connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins axi_bram_ctrl_5/s_axi_aresetn] # #assign addresses and set ranges save_bd_design - assign_bd_address - - set_property offset [expr { 0*$memsize }] [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_0_Mem0}] - set_property offset [expr { 1*$memsize }] [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_1_Mem0}] - set_property offset [expr { 2*$memsize }] [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_2_Mem0}] - set_property range $memsize [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_0_Mem0}] - set_property range $memsize [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_1_Mem0}] - set_property range $memsize [get_bd_addr_segs {s_axi_data/SEG_axi_bram_ctrl_2_Mem0}] - - set_property offset [expr { 0*$memsize }] [get_bd_addr_segs {external_dma_0/m_axi_0/SEG_axi_bram_ctrl_0_Mem0}] - set_property offset [expr { 1*$memsize }] [get_bd_addr_segs {external_dma_0/m_axi_0/SEG_axi_bram_ctrl_1_Mem0}] - set_property offset [expr { 2*$memsize }] [get_bd_addr_segs {external_dma_0/m_axi_1/SEG_axi_bram_ctrl_2_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_0/m_axi_0/SEG_axi_bram_ctrl_0_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_0/m_axi_0/SEG_axi_bram_ctrl_1_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_0/m_axi_1/SEG_axi_bram_ctrl_2_Mem0}] - - set_property offset [expr { 0*$memsize }] [get_bd_addr_segs {external_dma_1/m_axi_0/SEG_axi_bram_ctrl_0_Mem0}] - set_property offset [expr { 1*$memsize }] [get_bd_addr_segs {external_dma_1/m_axi_0/SEG_axi_bram_ctrl_1_Mem0}] - set_property offset [expr { 2*$memsize }] [get_bd_addr_segs {external_dma_1/m_axi_1/SEG_axi_bram_ctrl_2_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_1/m_axi_0/SEG_axi_bram_ctrl_0_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_1/m_axi_0/SEG_axi_bram_ctrl_1_Mem0}] - set_property range $memsize [get_bd_addr_segs {external_dma_1/m_axi_1/SEG_axi_bram_ctrl_2_Mem0}] + + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space s_axi_data [get_bd_addr_segs axi_bram_ctrl_3/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 1*$memsize }] -range $memsize -target_address_space s_axi_data [get_bd_addr_segs axi_bram_ctrl_4/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 2*$memsize }] -range $memsize -target_address_space s_axi_data [get_bd_addr_segs axi_bram_ctrl_5/S_AXI/Mem0] -force + + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space external_dma_0/m_axi_0 [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 1*$memsize }] -range $memsize -target_address_space external_dma_0/m_axi_0 [get_bd_addr_segs axi_bram_ctrl_1/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space external_dma_0/m_axi_1 [get_bd_addr_segs axi_bram_ctrl_2/S_AXI/Mem0] -force + + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space external_dma_1/m_axi_0 [get_bd_addr_segs axi_bram_ctrl_0/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 1*$memsize }] -range $memsize -target_address_space external_dma_1/m_axi_0 [get_bd_addr_segs axi_bram_ctrl_1/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space external_dma_1/m_axi_1 [get_bd_addr_segs axi_bram_ctrl_2/S_AXI/Mem0] -force group_bd_cells external_memory [get_bd_cells axi_bram_ctrl_*] [get_bd_cells sim_mem_*] [get_bd_cells axi_crossbar_*] group_bd_cells dma [get_bd_cells external_dma_*] [get_bd_cells cyt_dma_0] [get_bd_cells cyt_dma_adapter_0] @@ -297,20 +306,17 @@ if { $stacktype == "RDMA" } { connect_bd_intf_net [get_bd_intf_pins dummy_cyt_rdma_stack/recv_data] [get_bd_intf_pins cclo/s_axis_eth_rx_data] connect_bd_intf_net [get_bd_intf_pins cclo/m_axis_eth_tx_data] [get_bd_intf_pins dummy_cyt_rdma_stack/send_data] - set_property -dict [list CONFIG.NUM_SI {2}] [get_bd_cells external_memory/axi_crossbar_2] - - create_bd_cell -type ip -vlnv xilinx.com:ip:axi_datamover:5.1 cyt_wr_dma - set_property -dict [list CONFIG.c_enable_mm2s {0} CONFIG.c_include_s2mm_dre {true} CONFIG.c_s2mm_support_indet_btt {true} ] [get_bd_cells cyt_wr_dma] - set_property -dict [list CONFIG.c_m_axi_s2mm_data_width.VALUE_SRC USER CONFIG.c_s_axis_s2mm_tdata_width.VALUE_SRC USER] [get_bd_cells cyt_wr_dma] - set_property -dict [list CONFIG.c_addr_width {64} CONFIG.c_m_axi_s2mm_data_width {512} CONFIG.c_s_axis_s2mm_tdata_width {512} ] [get_bd_cells cyt_wr_dma] - connect_bd_intf_net [get_bd_intf_pins cyt_wr_dma/S_AXIS_S2MM] [get_bd_intf_pins dummy_cyt_rdma_stack/wr_data] - connect_bd_intf_net [get_bd_intf_pins dummy_cyt_rdma_stack/wr_cmd] [get_bd_intf_pins cyt_wr_dma/S_AXIS_S2MM_CMD] - connect_bd_intf_net [get_bd_intf_pins dummy_cyt_rdma_stack/wr_sts] [get_bd_intf_pins cyt_wr_dma/M_AXIS_S2MM_STS] - connect_bd_intf_net [get_bd_intf_pins cyt_wr_dma/M_AXI_S2MM] [get_bd_intf_pins external_memory/axi_crossbar_2/S01_AXI] - connect_bd_net [get_bd_ports ap_clk] [get_bd_pins cyt_wr_dma/m_axi_s2mm_aclk] [get_bd_pins cyt_wr_dma/m_axis_s2mm_cmdsts_awclk] - connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins cyt_wr_dma/m_axi_s2mm_aresetn] [get_bd_pins cyt_wr_dma/m_axis_s2mm_cmdsts_aresetn] - - assign_bd_address -target_address_space /cyt_wr_dma/Data_S2MM [get_bd_addr_segs external_memory/axi_bram_ctrl_0/S_AXI/Mem0] -force + create_bd_cell -type ip -vlnv Xilinx:ACCL:external_dma_2port:1.0 cyt_wr_dma + connect_bd_net [get_bd_ports ap_clk] [get_bd_pins cyt_wr_dma/ap_clk] + connect_bd_net [get_bd_ports ap_rst_n] [get_bd_pins cyt_wr_dma/ap_rst_n] + connect_bd_intf_net [get_bd_intf_pins dummy_cyt_rdma_stack/wr_cmd] [get_bd_intf_pins cyt_wr_dma/s_axis_s2mm_cmd] + connect_bd_intf_net [get_bd_intf_pins dummy_cyt_rdma_stack/wr_data] [get_bd_intf_pins cyt_wr_dma/s_axis_s2mm] + connect_bd_intf_net [get_bd_intf_pins cyt_wr_dma/m_axi_0] [get_bd_intf_pins external_memory/axi_crossbar_0/S02_AXI] + connect_bd_intf_net [get_bd_intf_pins cyt_wr_dma/m_axi_1] [get_bd_intf_pins external_memory/axi_crossbar_1/S02_AXI] + + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space /cyt_wr_dma/m_axi_0 [get_bd_addr_segs external_memory/axi_bram_ctrl_0/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 1*$memsize }] -range $memsize -target_address_space /cyt_wr_dma/m_axi_0 [get_bd_addr_segs external_memory/axi_bram_ctrl_1/S_AXI/Mem0] -force + assign_bd_address -offset [expr { 0*$memsize }] -range $memsize -target_address_space /cyt_wr_dma/m_axi_1 [get_bd_addr_segs external_memory/axi_bram_ctrl_2/S_AXI/Mem0] -force } # connect arithmetic plugins diff --git a/kernels/plugins/external_dma/external_dma_1port.v b/kernels/plugins/external_dma/external_dma_1port.v new file mode 100644 index 00000000..0fd19bb5 --- /dev/null +++ b/kernels/plugins/external_dma/external_dma_1port.v @@ -0,0 +1,196 @@ + + +`timescale 1 ns / 1 ps + +module external_dma_1port +( + input ap_clk, + input ap_rst_n, + + input [15:0] s_axi_control_araddr, + output s_axi_control_arready, + input s_axi_control_arvalid, + input [15:0] s_axi_control_awaddr, + output s_axi_control_awready, + input s_axi_control_awvalid, + input s_axi_control_bready, + output [1:0] s_axi_control_bresp, + output s_axi_control_bvalid, + output [31:0] s_axi_control_rdata, + input s_axi_control_rready, + output [1:0] s_axi_control_rresp, + output s_axi_control_rvalid, + input [31:0] s_axi_control_wdata, + output s_axi_control_wready, + input [3:0] s_axi_control_wstrb, + input s_axi_control_wvalid, + + + output [63:0] m_axi_0_araddr, + output [1:0] m_axi_0_arburst, + output [3:0] m_axi_0_arcache, + output [7:0] m_axi_0_arlen, + output [2:0] m_axi_0_arprot, + input m_axi_0_arready, + output [2:0] m_axi_0_arsize, + output [3:0] m_axi_0_aruser, + output m_axi_0_arvalid, + output [63:0] m_axi_0_awaddr, + output [1:0] m_axi_0_awburst, + output [3:0] m_axi_0_awcache, + output [7:0] m_axi_0_awlen, + output [2:0] m_axi_0_awprot, + input m_axi_0_awready, + output [2:0] m_axi_0_awsize, + output [3:0] m_axi_0_awuser, + output m_axi_0_awvalid, + output m_axi_0_bready, + input [1:0] m_axi_0_bresp, + input m_axi_0_bvalid, + input [511:0] m_axi_0_rdata, + input m_axi_0_rlast, + output m_axi_0_rready, + input [1:0] m_axi_0_rresp, + input m_axi_0_rvalid, + output [511:0] m_axi_0_wdata, + output m_axi_0_wlast, + input m_axi_0_wready, + output [63:0] m_axi_0_wstrb, + output m_axi_0_wvalid, + + + + input [511:0] s_axis_s2mm_tdata, + input [63:0] s_axis_s2mm_tkeep, + input [7:0] s_axis_s2mm_tdest, + input s_axis_s2mm_tlast, + output s_axis_s2mm_tready, + input s_axis_s2mm_tvalid, + + output [511:0] m_axis_mm2s_tdata, + output [63:0] m_axis_mm2s_tkeep, + output m_axis_mm2s_tlast, + input m_axis_mm2s_tready, + output m_axis_mm2s_tvalid, + + input [103:0] s_axis_mm2s_cmd_tdata, + output s_axis_mm2s_cmd_tready, + input s_axis_mm2s_cmd_tvalid, + input [7:0] s_axis_mm2s_cmd_tdest, + + output [7:0] m_axis_mm2s_sts_tdata, + input m_axis_mm2s_sts_tready, + output m_axis_mm2s_sts_tvalid, + output [0:0] m_axis_mm2s_sts_tkeep, + output m_axis_mm2s_sts_tlast, + + input [103:0] s_axis_s2mm_cmd_tdata, + output s_axis_s2mm_cmd_tready, + input s_axis_s2mm_cmd_tvalid, + input [7:0] s_axis_s2mm_cmd_tdest, + + output [31:0] m_axis_s2mm_sts_tdata, + input m_axis_s2mm_sts_tready, + output m_axis_s2mm_sts_tvalid, + output [3:0] m_axis_s2mm_sts_tkeep, + output m_axis_s2mm_sts_tlast +); + + external_dma_bd ext_dma_bd( + + .s_axi_control_araddr(s_axi_control_araddr), + .s_axi_control_arready(s_axi_control_arready), + .s_axi_control_arvalid(s_axi_control_arvalid), + .s_axi_control_awaddr(s_axi_control_awaddr), + .s_axi_control_awready(s_axi_control_awready), + .s_axi_control_awvalid(s_axi_control_awvalid), + .s_axi_control_bready(s_axi_control_bready), + .s_axi_control_bresp(s_axi_control_bresp), + .s_axi_control_bvalid(s_axi_control_bvalid), + .s_axi_control_rdata(s_axi_control_rdata), + .s_axi_control_rready(s_axi_control_rready), + .s_axi_control_rresp(s_axi_control_rresp), + .s_axi_control_rvalid(s_axi_control_rvalid), + .s_axi_control_wdata(s_axi_control_wdata), + .s_axi_control_wready(s_axi_control_wready), + .s_axi_control_wstrb(s_axi_control_wstrb), + .s_axi_control_wvalid(s_axi_control_wvalid), + + + .m_axi_0_araddr(m_axi_0_araddr), + .m_axi_0_arburst(m_axi_0_arburst), + .m_axi_0_arcache(m_axi_0_arcache), + .m_axi_0_arlen(m_axi_0_arlen), + .m_axi_0_arprot(m_axi_0_arprot), + .m_axi_0_arready(m_axi_0_arready), + .m_axi_0_arsize(m_axi_0_arsize), + .m_axi_0_aruser(m_axi_0_aruser), + .m_axi_0_arvalid(m_axi_0_arvalid), + .m_axi_0_awaddr(m_axi_0_awaddr), + .m_axi_0_awburst(m_axi_0_awburst), + .m_axi_0_awcache(m_axi_0_awcache), + .m_axi_0_awlen(m_axi_0_awlen), + .m_axi_0_awprot(m_axi_0_awprot), + .m_axi_0_awready(m_axi_0_awready), + .m_axi_0_awsize(m_axi_0_awsize), + .m_axi_0_awuser(m_axi_0_awuser), + .m_axi_0_awvalid(m_axi_0_awvalid), + .m_axi_0_bready(m_axi_0_bready), + .m_axi_0_bresp(m_axi_0_bresp), + .m_axi_0_bvalid(m_axi_0_bvalid), + .m_axi_0_rdata(m_axi_0_rdata), + .m_axi_0_rlast(m_axi_0_rlast), + .m_axi_0_rready(m_axi_0_rready), + .m_axi_0_rresp(m_axi_0_rresp), + .m_axi_0_rvalid(m_axi_0_rvalid), + .m_axi_0_wdata(m_axi_0_wdata), + .m_axi_0_wlast(m_axi_0_wlast), + .m_axi_0_wready(m_axi_0_wready), + .m_axi_0_wstrb(m_axi_0_wstrb), + .m_axi_0_wvalid(m_axi_0_wvalid), + + + + .s_axis_s2mm_tdata(s_axis_s2mm_tdata), + .s_axis_s2mm_tkeep(s_axis_s2mm_tkeep), + + + .s_axis_s2mm_tlast(s_axis_s2mm_tlast), + .s_axis_s2mm_tready(s_axis_s2mm_tready), + .s_axis_s2mm_tvalid(s_axis_s2mm_tvalid), + + .m_axis_mm2s_tdata(m_axis_mm2s_tdata), + .m_axis_mm2s_tkeep(m_axis_mm2s_tkeep), + .m_axis_mm2s_tlast(m_axis_mm2s_tlast), + .m_axis_mm2s_tready(m_axis_mm2s_tready), + .m_axis_mm2s_tvalid(m_axis_mm2s_tvalid), + + .s_axis_mm2s_cmd_tdata(s_axis_mm2s_cmd_tdata), + .s_axis_mm2s_cmd_tready(s_axis_mm2s_cmd_tready), + .s_axis_mm2s_cmd_tvalid(s_axis_mm2s_cmd_tvalid), + + + + .m_axis_mm2s_sts_tdata(m_axis_mm2s_sts_tdata), + .m_axis_mm2s_sts_tready(m_axis_mm2s_sts_tready), + .m_axis_mm2s_sts_tvalid(m_axis_mm2s_sts_tvalid), + .m_axis_mm2s_sts_tkeep(m_axis_mm2s_sts_tkeep), + .m_axis_mm2s_sts_tlast(m_axis_mm2s_sts_tlast), + + .s_axis_s2mm_cmd_tdata(s_axis_s2mm_cmd_tdata), + .s_axis_s2mm_cmd_tready(s_axis_s2mm_cmd_tready), + .s_axis_s2mm_cmd_tvalid(s_axis_s2mm_cmd_tvalid), + + + + .m_axis_s2mm_sts_tdata(m_axis_s2mm_sts_tdata), + .m_axis_s2mm_sts_tready(m_axis_s2mm_sts_tready), + .m_axis_s2mm_sts_tvalid(m_axis_s2mm_sts_tvalid), + .m_axis_s2mm_sts_tkeep(m_axis_s2mm_sts_tkeep), + .m_axis_s2mm_sts_tlast(m_axis_s2mm_sts_tlast), + + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n) + ); + +endmodule diff --git a/kernels/plugins/external_dma/external_dma_2port.v b/kernels/plugins/external_dma/external_dma_2port.v new file mode 100644 index 00000000..9dd179d8 --- /dev/null +++ b/kernels/plugins/external_dma/external_dma_2port.v @@ -0,0 +1,262 @@ + + +`timescale 1 ns / 1 ps + +module external_dma_2port +( + input ap_clk, + input ap_rst_n, + + input [15:0] s_axi_control_araddr, + output s_axi_control_arready, + input s_axi_control_arvalid, + input [15:0] s_axi_control_awaddr, + output s_axi_control_awready, + input s_axi_control_awvalid, + input s_axi_control_bready, + output [1:0] s_axi_control_bresp, + output s_axi_control_bvalid, + output [31:0] s_axi_control_rdata, + input s_axi_control_rready, + output [1:0] s_axi_control_rresp, + output s_axi_control_rvalid, + input [31:0] s_axi_control_wdata, + output s_axi_control_wready, + input [3:0] s_axi_control_wstrb, + input s_axi_control_wvalid, + + + output [63:0] m_axi_0_araddr, + output [1:0] m_axi_0_arburst, + output [3:0] m_axi_0_arcache, + output [7:0] m_axi_0_arlen, + output [2:0] m_axi_0_arprot, + input m_axi_0_arready, + output [2:0] m_axi_0_arsize, + output [3:0] m_axi_0_aruser, + output m_axi_0_arvalid, + output [63:0] m_axi_0_awaddr, + output [1:0] m_axi_0_awburst, + output [3:0] m_axi_0_awcache, + output [7:0] m_axi_0_awlen, + output [2:0] m_axi_0_awprot, + input m_axi_0_awready, + output [2:0] m_axi_0_awsize, + output [3:0] m_axi_0_awuser, + output m_axi_0_awvalid, + output m_axi_0_bready, + input [1:0] m_axi_0_bresp, + input m_axi_0_bvalid, + input [511:0] m_axi_0_rdata, + input m_axi_0_rlast, + output m_axi_0_rready, + input [1:0] m_axi_0_rresp, + input m_axi_0_rvalid, + output [511:0] m_axi_0_wdata, + output m_axi_0_wlast, + input m_axi_0_wready, + output [63:0] m_axi_0_wstrb, + output m_axi_0_wvalid, + + + output [63:0] m_axi_1_araddr, + output [1:0] m_axi_1_arburst, + output [3:0] m_axi_1_arcache, + output [7:0] m_axi_1_arlen, + output [2:0] m_axi_1_arprot, + input m_axi_1_arready, + output [2:0] m_axi_1_arsize, + output [3:0] m_axi_1_aruser, + output m_axi_1_arvalid, + output [63:0] m_axi_1_awaddr, + output [1:0] m_axi_1_awburst, + output [3:0] m_axi_1_awcache, + output [7:0] m_axi_1_awlen, + output [2:0] m_axi_1_awprot, + input m_axi_1_awready, + output [2:0] m_axi_1_awsize, + output [3:0] m_axi_1_awuser, + output m_axi_1_awvalid, + output m_axi_1_bready, + input [1:0] m_axi_1_bresp, + input m_axi_1_bvalid, + input [511:0] m_axi_1_rdata, + input m_axi_1_rlast, + output m_axi_1_rready, + input [1:0] m_axi_1_rresp, + input m_axi_1_rvalid, + output [511:0] m_axi_1_wdata, + output m_axi_1_wlast, + input m_axi_1_wready, + output [63:0] m_axi_1_wstrb, + output m_axi_1_wvalid, + + + + input [511:0] s_axis_s2mm_tdata, + input [63:0] s_axis_s2mm_tkeep, + input [7:0] s_axis_s2mm_tdest, + input s_axis_s2mm_tlast, + output s_axis_s2mm_tready, + input s_axis_s2mm_tvalid, + + output [511:0] m_axis_mm2s_tdata, + output [63:0] m_axis_mm2s_tkeep, + output m_axis_mm2s_tlast, + input m_axis_mm2s_tready, + output m_axis_mm2s_tvalid, + + input [103:0] s_axis_mm2s_cmd_tdata, + output s_axis_mm2s_cmd_tready, + input s_axis_mm2s_cmd_tvalid, + input [7:0] s_axis_mm2s_cmd_tdest, + + output [7:0] m_axis_mm2s_sts_tdata, + input m_axis_mm2s_sts_tready, + output m_axis_mm2s_sts_tvalid, + output [0:0] m_axis_mm2s_sts_tkeep, + output m_axis_mm2s_sts_tlast, + + input [103:0] s_axis_s2mm_cmd_tdata, + output s_axis_s2mm_cmd_tready, + input s_axis_s2mm_cmd_tvalid, + input [7:0] s_axis_s2mm_cmd_tdest, + + output [31:0] m_axis_s2mm_sts_tdata, + input m_axis_s2mm_sts_tready, + output m_axis_s2mm_sts_tvalid, + output [3:0] m_axis_s2mm_sts_tkeep, + output m_axis_s2mm_sts_tlast +); + + external_dma_bd ext_dma_bd( + + .s_axi_control_araddr(s_axi_control_araddr), + .s_axi_control_arready(s_axi_control_arready), + .s_axi_control_arvalid(s_axi_control_arvalid), + .s_axi_control_awaddr(s_axi_control_awaddr), + .s_axi_control_awready(s_axi_control_awready), + .s_axi_control_awvalid(s_axi_control_awvalid), + .s_axi_control_bready(s_axi_control_bready), + .s_axi_control_bresp(s_axi_control_bresp), + .s_axi_control_bvalid(s_axi_control_bvalid), + .s_axi_control_rdata(s_axi_control_rdata), + .s_axi_control_rready(s_axi_control_rready), + .s_axi_control_rresp(s_axi_control_rresp), + .s_axi_control_rvalid(s_axi_control_rvalid), + .s_axi_control_wdata(s_axi_control_wdata), + .s_axi_control_wready(s_axi_control_wready), + .s_axi_control_wstrb(s_axi_control_wstrb), + .s_axi_control_wvalid(s_axi_control_wvalid), + + + .m_axi_0_araddr(m_axi_0_araddr), + .m_axi_0_arburst(m_axi_0_arburst), + .m_axi_0_arcache(m_axi_0_arcache), + .m_axi_0_arlen(m_axi_0_arlen), + .m_axi_0_arprot(m_axi_0_arprot), + .m_axi_0_arready(m_axi_0_arready), + .m_axi_0_arsize(m_axi_0_arsize), + .m_axi_0_aruser(m_axi_0_aruser), + .m_axi_0_arvalid(m_axi_0_arvalid), + .m_axi_0_awaddr(m_axi_0_awaddr), + .m_axi_0_awburst(m_axi_0_awburst), + .m_axi_0_awcache(m_axi_0_awcache), + .m_axi_0_awlen(m_axi_0_awlen), + .m_axi_0_awprot(m_axi_0_awprot), + .m_axi_0_awready(m_axi_0_awready), + .m_axi_0_awsize(m_axi_0_awsize), + .m_axi_0_awuser(m_axi_0_awuser), + .m_axi_0_awvalid(m_axi_0_awvalid), + .m_axi_0_bready(m_axi_0_bready), + .m_axi_0_bresp(m_axi_0_bresp), + .m_axi_0_bvalid(m_axi_0_bvalid), + .m_axi_0_rdata(m_axi_0_rdata), + .m_axi_0_rlast(m_axi_0_rlast), + .m_axi_0_rready(m_axi_0_rready), + .m_axi_0_rresp(m_axi_0_rresp), + .m_axi_0_rvalid(m_axi_0_rvalid), + .m_axi_0_wdata(m_axi_0_wdata), + .m_axi_0_wlast(m_axi_0_wlast), + .m_axi_0_wready(m_axi_0_wready), + .m_axi_0_wstrb(m_axi_0_wstrb), + .m_axi_0_wvalid(m_axi_0_wvalid), + + + .m_axi_1_araddr(m_axi_1_araddr), + .m_axi_1_arburst(m_axi_1_arburst), + .m_axi_1_arcache(m_axi_1_arcache), + .m_axi_1_arlen(m_axi_1_arlen), + .m_axi_1_arprot(m_axi_1_arprot), + .m_axi_1_arready(m_axi_1_arready), + .m_axi_1_arsize(m_axi_1_arsize), + .m_axi_1_aruser(m_axi_1_aruser), + .m_axi_1_arvalid(m_axi_1_arvalid), + .m_axi_1_awaddr(m_axi_1_awaddr), + .m_axi_1_awburst(m_axi_1_awburst), + .m_axi_1_awcache(m_axi_1_awcache), + .m_axi_1_awlen(m_axi_1_awlen), + .m_axi_1_awprot(m_axi_1_awprot), + .m_axi_1_awready(m_axi_1_awready), + .m_axi_1_awsize(m_axi_1_awsize), + .m_axi_1_awuser(m_axi_1_awuser), + .m_axi_1_awvalid(m_axi_1_awvalid), + .m_axi_1_bready(m_axi_1_bready), + .m_axi_1_bresp(m_axi_1_bresp), + .m_axi_1_bvalid(m_axi_1_bvalid), + .m_axi_1_rdata(m_axi_1_rdata), + .m_axi_1_rlast(m_axi_1_rlast), + .m_axi_1_rready(m_axi_1_rready), + .m_axi_1_rresp(m_axi_1_rresp), + .m_axi_1_rvalid(m_axi_1_rvalid), + .m_axi_1_wdata(m_axi_1_wdata), + .m_axi_1_wlast(m_axi_1_wlast), + .m_axi_1_wready(m_axi_1_wready), + .m_axi_1_wstrb(m_axi_1_wstrb), + .m_axi_1_wvalid(m_axi_1_wvalid), + + + + .s_axis_s2mm_tdata(s_axis_s2mm_tdata), + .s_axis_s2mm_tkeep(s_axis_s2mm_tkeep), + .s_axis_s2mm_tdest(s_axis_s2mm_tdest), + + .s_axis_s2mm_tlast(s_axis_s2mm_tlast), + .s_axis_s2mm_tready(s_axis_s2mm_tready), + .s_axis_s2mm_tvalid(s_axis_s2mm_tvalid), + + .m_axis_mm2s_tdata(m_axis_mm2s_tdata), + .m_axis_mm2s_tkeep(m_axis_mm2s_tkeep), + .m_axis_mm2s_tlast(m_axis_mm2s_tlast), + .m_axis_mm2s_tready(m_axis_mm2s_tready), + .m_axis_mm2s_tvalid(m_axis_mm2s_tvalid), + + .s_axis_mm2s_cmd_tdata(s_axis_mm2s_cmd_tdata), + .s_axis_mm2s_cmd_tready(s_axis_mm2s_cmd_tready), + .s_axis_mm2s_cmd_tvalid(s_axis_mm2s_cmd_tvalid), + .s_axis_mm2s_cmd_tdest(s_axis_mm2s_cmd_tdest), + + + .m_axis_mm2s_sts_tdata(m_axis_mm2s_sts_tdata), + .m_axis_mm2s_sts_tready(m_axis_mm2s_sts_tready), + .m_axis_mm2s_sts_tvalid(m_axis_mm2s_sts_tvalid), + .m_axis_mm2s_sts_tkeep(m_axis_mm2s_sts_tkeep), + .m_axis_mm2s_sts_tlast(m_axis_mm2s_sts_tlast), + + .s_axis_s2mm_cmd_tdata(s_axis_s2mm_cmd_tdata), + .s_axis_s2mm_cmd_tready(s_axis_s2mm_cmd_tready), + .s_axis_s2mm_cmd_tvalid(s_axis_s2mm_cmd_tvalid), + .s_axis_s2mm_cmd_tdest(s_axis_s2mm_cmd_tdest), + + + .m_axis_s2mm_sts_tdata(m_axis_s2mm_sts_tdata), + .m_axis_s2mm_sts_tready(m_axis_s2mm_sts_tready), + .m_axis_s2mm_sts_tvalid(m_axis_s2mm_sts_tvalid), + .m_axis_s2mm_sts_tkeep(m_axis_s2mm_sts_tkeep), + .m_axis_s2mm_sts_tlast(m_axis_s2mm_sts_tlast), + + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n) + ); + +endmodule diff --git a/kernels/plugins/external_dma/external_dma_3port.v b/kernels/plugins/external_dma/external_dma_3port.v new file mode 100644 index 00000000..3fe4642b --- /dev/null +++ b/kernels/plugins/external_dma/external_dma_3port.v @@ -0,0 +1,328 @@ + + +`timescale 1 ns / 1 ps + +module external_dma_3port +( + input ap_clk, + input ap_rst_n, + + input [15:0] s_axi_control_araddr, + output s_axi_control_arready, + input s_axi_control_arvalid, + input [15:0] s_axi_control_awaddr, + output s_axi_control_awready, + input s_axi_control_awvalid, + input s_axi_control_bready, + output [1:0] s_axi_control_bresp, + output s_axi_control_bvalid, + output [31:0] s_axi_control_rdata, + input s_axi_control_rready, + output [1:0] s_axi_control_rresp, + output s_axi_control_rvalid, + input [31:0] s_axi_control_wdata, + output s_axi_control_wready, + input [3:0] s_axi_control_wstrb, + input s_axi_control_wvalid, + + + output [63:0] m_axi_0_araddr, + output [1:0] m_axi_0_arburst, + output [3:0] m_axi_0_arcache, + output [7:0] m_axi_0_arlen, + output [2:0] m_axi_0_arprot, + input m_axi_0_arready, + output [2:0] m_axi_0_arsize, + output [3:0] m_axi_0_aruser, + output m_axi_0_arvalid, + output [63:0] m_axi_0_awaddr, + output [1:0] m_axi_0_awburst, + output [3:0] m_axi_0_awcache, + output [7:0] m_axi_0_awlen, + output [2:0] m_axi_0_awprot, + input m_axi_0_awready, + output [2:0] m_axi_0_awsize, + output [3:0] m_axi_0_awuser, + output m_axi_0_awvalid, + output m_axi_0_bready, + input [1:0] m_axi_0_bresp, + input m_axi_0_bvalid, + input [511:0] m_axi_0_rdata, + input m_axi_0_rlast, + output m_axi_0_rready, + input [1:0] m_axi_0_rresp, + input m_axi_0_rvalid, + output [511:0] m_axi_0_wdata, + output m_axi_0_wlast, + input m_axi_0_wready, + output [63:0] m_axi_0_wstrb, + output m_axi_0_wvalid, + + + output [63:0] m_axi_1_araddr, + output [1:0] m_axi_1_arburst, + output [3:0] m_axi_1_arcache, + output [7:0] m_axi_1_arlen, + output [2:0] m_axi_1_arprot, + input m_axi_1_arready, + output [2:0] m_axi_1_arsize, + output [3:0] m_axi_1_aruser, + output m_axi_1_arvalid, + output [63:0] m_axi_1_awaddr, + output [1:0] m_axi_1_awburst, + output [3:0] m_axi_1_awcache, + output [7:0] m_axi_1_awlen, + output [2:0] m_axi_1_awprot, + input m_axi_1_awready, + output [2:0] m_axi_1_awsize, + output [3:0] m_axi_1_awuser, + output m_axi_1_awvalid, + output m_axi_1_bready, + input [1:0] m_axi_1_bresp, + input m_axi_1_bvalid, + input [511:0] m_axi_1_rdata, + input m_axi_1_rlast, + output m_axi_1_rready, + input [1:0] m_axi_1_rresp, + input m_axi_1_rvalid, + output [511:0] m_axi_1_wdata, + output m_axi_1_wlast, + input m_axi_1_wready, + output [63:0] m_axi_1_wstrb, + output m_axi_1_wvalid, + + + output [63:0] m_axi_2_araddr, + output [1:0] m_axi_2_arburst, + output [3:0] m_axi_2_arcache, + output [7:0] m_axi_2_arlen, + output [2:0] m_axi_2_arprot, + input m_axi_2_arready, + output [2:0] m_axi_2_arsize, + output [3:0] m_axi_2_aruser, + output m_axi_2_arvalid, + output [63:0] m_axi_2_awaddr, + output [1:0] m_axi_2_awburst, + output [3:0] m_axi_2_awcache, + output [7:0] m_axi_2_awlen, + output [2:0] m_axi_2_awprot, + input m_axi_2_awready, + output [2:0] m_axi_2_awsize, + output [3:0] m_axi_2_awuser, + output m_axi_2_awvalid, + output m_axi_2_bready, + input [1:0] m_axi_2_bresp, + input m_axi_2_bvalid, + input [511:0] m_axi_2_rdata, + input m_axi_2_rlast, + output m_axi_2_rready, + input [1:0] m_axi_2_rresp, + input m_axi_2_rvalid, + output [511:0] m_axi_2_wdata, + output m_axi_2_wlast, + input m_axi_2_wready, + output [63:0] m_axi_2_wstrb, + output m_axi_2_wvalid, + + + + input [511:0] s_axis_s2mm_tdata, + input [63:0] s_axis_s2mm_tkeep, + input [7:0] s_axis_s2mm_tdest, + input s_axis_s2mm_tlast, + output s_axis_s2mm_tready, + input s_axis_s2mm_tvalid, + + output [511:0] m_axis_mm2s_tdata, + output [63:0] m_axis_mm2s_tkeep, + output m_axis_mm2s_tlast, + input m_axis_mm2s_tready, + output m_axis_mm2s_tvalid, + + input [103:0] s_axis_mm2s_cmd_tdata, + output s_axis_mm2s_cmd_tready, + input s_axis_mm2s_cmd_tvalid, + input [7:0] s_axis_mm2s_cmd_tdest, + + output [7:0] m_axis_mm2s_sts_tdata, + input m_axis_mm2s_sts_tready, + output m_axis_mm2s_sts_tvalid, + output [0:0] m_axis_mm2s_sts_tkeep, + output m_axis_mm2s_sts_tlast, + + input [103:0] s_axis_s2mm_cmd_tdata, + output s_axis_s2mm_cmd_tready, + input s_axis_s2mm_cmd_tvalid, + input [7:0] s_axis_s2mm_cmd_tdest, + + output [31:0] m_axis_s2mm_sts_tdata, + input m_axis_s2mm_sts_tready, + output m_axis_s2mm_sts_tvalid, + output [3:0] m_axis_s2mm_sts_tkeep, + output m_axis_s2mm_sts_tlast +); + + external_dma_bd ext_dma_bd( + + .s_axi_control_araddr(s_axi_control_araddr), + .s_axi_control_arready(s_axi_control_arready), + .s_axi_control_arvalid(s_axi_control_arvalid), + .s_axi_control_awaddr(s_axi_control_awaddr), + .s_axi_control_awready(s_axi_control_awready), + .s_axi_control_awvalid(s_axi_control_awvalid), + .s_axi_control_bready(s_axi_control_bready), + .s_axi_control_bresp(s_axi_control_bresp), + .s_axi_control_bvalid(s_axi_control_bvalid), + .s_axi_control_rdata(s_axi_control_rdata), + .s_axi_control_rready(s_axi_control_rready), + .s_axi_control_rresp(s_axi_control_rresp), + .s_axi_control_rvalid(s_axi_control_rvalid), + .s_axi_control_wdata(s_axi_control_wdata), + .s_axi_control_wready(s_axi_control_wready), + .s_axi_control_wstrb(s_axi_control_wstrb), + .s_axi_control_wvalid(s_axi_control_wvalid), + + + .m_axi_0_araddr(m_axi_0_araddr), + .m_axi_0_arburst(m_axi_0_arburst), + .m_axi_0_arcache(m_axi_0_arcache), + .m_axi_0_arlen(m_axi_0_arlen), + .m_axi_0_arprot(m_axi_0_arprot), + .m_axi_0_arready(m_axi_0_arready), + .m_axi_0_arsize(m_axi_0_arsize), + .m_axi_0_aruser(m_axi_0_aruser), + .m_axi_0_arvalid(m_axi_0_arvalid), + .m_axi_0_awaddr(m_axi_0_awaddr), + .m_axi_0_awburst(m_axi_0_awburst), + .m_axi_0_awcache(m_axi_0_awcache), + .m_axi_0_awlen(m_axi_0_awlen), + .m_axi_0_awprot(m_axi_0_awprot), + .m_axi_0_awready(m_axi_0_awready), + .m_axi_0_awsize(m_axi_0_awsize), + .m_axi_0_awuser(m_axi_0_awuser), + .m_axi_0_awvalid(m_axi_0_awvalid), + .m_axi_0_bready(m_axi_0_bready), + .m_axi_0_bresp(m_axi_0_bresp), + .m_axi_0_bvalid(m_axi_0_bvalid), + .m_axi_0_rdata(m_axi_0_rdata), + .m_axi_0_rlast(m_axi_0_rlast), + .m_axi_0_rready(m_axi_0_rready), + .m_axi_0_rresp(m_axi_0_rresp), + .m_axi_0_rvalid(m_axi_0_rvalid), + .m_axi_0_wdata(m_axi_0_wdata), + .m_axi_0_wlast(m_axi_0_wlast), + .m_axi_0_wready(m_axi_0_wready), + .m_axi_0_wstrb(m_axi_0_wstrb), + .m_axi_0_wvalid(m_axi_0_wvalid), + + + .m_axi_1_araddr(m_axi_1_araddr), + .m_axi_1_arburst(m_axi_1_arburst), + .m_axi_1_arcache(m_axi_1_arcache), + .m_axi_1_arlen(m_axi_1_arlen), + .m_axi_1_arprot(m_axi_1_arprot), + .m_axi_1_arready(m_axi_1_arready), + .m_axi_1_arsize(m_axi_1_arsize), + .m_axi_1_aruser(m_axi_1_aruser), + .m_axi_1_arvalid(m_axi_1_arvalid), + .m_axi_1_awaddr(m_axi_1_awaddr), + .m_axi_1_awburst(m_axi_1_awburst), + .m_axi_1_awcache(m_axi_1_awcache), + .m_axi_1_awlen(m_axi_1_awlen), + .m_axi_1_awprot(m_axi_1_awprot), + .m_axi_1_awready(m_axi_1_awready), + .m_axi_1_awsize(m_axi_1_awsize), + .m_axi_1_awuser(m_axi_1_awuser), + .m_axi_1_awvalid(m_axi_1_awvalid), + .m_axi_1_bready(m_axi_1_bready), + .m_axi_1_bresp(m_axi_1_bresp), + .m_axi_1_bvalid(m_axi_1_bvalid), + .m_axi_1_rdata(m_axi_1_rdata), + .m_axi_1_rlast(m_axi_1_rlast), + .m_axi_1_rready(m_axi_1_rready), + .m_axi_1_rresp(m_axi_1_rresp), + .m_axi_1_rvalid(m_axi_1_rvalid), + .m_axi_1_wdata(m_axi_1_wdata), + .m_axi_1_wlast(m_axi_1_wlast), + .m_axi_1_wready(m_axi_1_wready), + .m_axi_1_wstrb(m_axi_1_wstrb), + .m_axi_1_wvalid(m_axi_1_wvalid), + + + .m_axi_2_araddr(m_axi_2_araddr), + .m_axi_2_arburst(m_axi_2_arburst), + .m_axi_2_arcache(m_axi_2_arcache), + .m_axi_2_arlen(m_axi_2_arlen), + .m_axi_2_arprot(m_axi_2_arprot), + .m_axi_2_arready(m_axi_2_arready), + .m_axi_2_arsize(m_axi_2_arsize), + .m_axi_2_aruser(m_axi_2_aruser), + .m_axi_2_arvalid(m_axi_2_arvalid), + .m_axi_2_awaddr(m_axi_2_awaddr), + .m_axi_2_awburst(m_axi_2_awburst), + .m_axi_2_awcache(m_axi_2_awcache), + .m_axi_2_awlen(m_axi_2_awlen), + .m_axi_2_awprot(m_axi_2_awprot), + .m_axi_2_awready(m_axi_2_awready), + .m_axi_2_awsize(m_axi_2_awsize), + .m_axi_2_awuser(m_axi_2_awuser), + .m_axi_2_awvalid(m_axi_2_awvalid), + .m_axi_2_bready(m_axi_2_bready), + .m_axi_2_bresp(m_axi_2_bresp), + .m_axi_2_bvalid(m_axi_2_bvalid), + .m_axi_2_rdata(m_axi_2_rdata), + .m_axi_2_rlast(m_axi_2_rlast), + .m_axi_2_rready(m_axi_2_rready), + .m_axi_2_rresp(m_axi_2_rresp), + .m_axi_2_rvalid(m_axi_2_rvalid), + .m_axi_2_wdata(m_axi_2_wdata), + .m_axi_2_wlast(m_axi_2_wlast), + .m_axi_2_wready(m_axi_2_wready), + .m_axi_2_wstrb(m_axi_2_wstrb), + .m_axi_2_wvalid(m_axi_2_wvalid), + + + + .s_axis_s2mm_tdata(s_axis_s2mm_tdata), + .s_axis_s2mm_tkeep(s_axis_s2mm_tkeep), + .s_axis_s2mm_tdest(s_axis_s2mm_tdest), + + .s_axis_s2mm_tlast(s_axis_s2mm_tlast), + .s_axis_s2mm_tready(s_axis_s2mm_tready), + .s_axis_s2mm_tvalid(s_axis_s2mm_tvalid), + + .m_axis_mm2s_tdata(m_axis_mm2s_tdata), + .m_axis_mm2s_tkeep(m_axis_mm2s_tkeep), + .m_axis_mm2s_tlast(m_axis_mm2s_tlast), + .m_axis_mm2s_tready(m_axis_mm2s_tready), + .m_axis_mm2s_tvalid(m_axis_mm2s_tvalid), + + .s_axis_mm2s_cmd_tdata(s_axis_mm2s_cmd_tdata), + .s_axis_mm2s_cmd_tready(s_axis_mm2s_cmd_tready), + .s_axis_mm2s_cmd_tvalid(s_axis_mm2s_cmd_tvalid), + .s_axis_mm2s_cmd_tdest(s_axis_mm2s_cmd_tdest), + + + .m_axis_mm2s_sts_tdata(m_axis_mm2s_sts_tdata), + .m_axis_mm2s_sts_tready(m_axis_mm2s_sts_tready), + .m_axis_mm2s_sts_tvalid(m_axis_mm2s_sts_tvalid), + .m_axis_mm2s_sts_tkeep(m_axis_mm2s_sts_tkeep), + .m_axis_mm2s_sts_tlast(m_axis_mm2s_sts_tlast), + + .s_axis_s2mm_cmd_tdata(s_axis_s2mm_cmd_tdata), + .s_axis_s2mm_cmd_tready(s_axis_s2mm_cmd_tready), + .s_axis_s2mm_cmd_tvalid(s_axis_s2mm_cmd_tvalid), + .s_axis_s2mm_cmd_tdest(s_axis_s2mm_cmd_tdest), + + + .m_axis_s2mm_sts_tdata(m_axis_s2mm_sts_tdata), + .m_axis_s2mm_sts_tready(m_axis_s2mm_sts_tready), + .m_axis_s2mm_sts_tvalid(m_axis_s2mm_sts_tvalid), + .m_axis_s2mm_sts_tkeep(m_axis_s2mm_sts_tkeep), + .m_axis_s2mm_sts_tlast(m_axis_s2mm_sts_tlast), + + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n) + ); + +endmodule diff --git a/kernels/plugins/external_dma/external_dma_4port.v b/kernels/plugins/external_dma/external_dma_4port.v new file mode 100644 index 00000000..c7721c12 --- /dev/null +++ b/kernels/plugins/external_dma/external_dma_4port.v @@ -0,0 +1,394 @@ + + +`timescale 1 ns / 1 ps + +module external_dma_4port +( + input ap_clk, + input ap_rst_n, + + input [15:0] s_axi_control_araddr, + output s_axi_control_arready, + input s_axi_control_arvalid, + input [15:0] s_axi_control_awaddr, + output s_axi_control_awready, + input s_axi_control_awvalid, + input s_axi_control_bready, + output [1:0] s_axi_control_bresp, + output s_axi_control_bvalid, + output [31:0] s_axi_control_rdata, + input s_axi_control_rready, + output [1:0] s_axi_control_rresp, + output s_axi_control_rvalid, + input [31:0] s_axi_control_wdata, + output s_axi_control_wready, + input [3:0] s_axi_control_wstrb, + input s_axi_control_wvalid, + + + output [63:0] m_axi_0_araddr, + output [1:0] m_axi_0_arburst, + output [3:0] m_axi_0_arcache, + output [7:0] m_axi_0_arlen, + output [2:0] m_axi_0_arprot, + input m_axi_0_arready, + output [2:0] m_axi_0_arsize, + output [3:0] m_axi_0_aruser, + output m_axi_0_arvalid, + output [63:0] m_axi_0_awaddr, + output [1:0] m_axi_0_awburst, + output [3:0] m_axi_0_awcache, + output [7:0] m_axi_0_awlen, + output [2:0] m_axi_0_awprot, + input m_axi_0_awready, + output [2:0] m_axi_0_awsize, + output [3:0] m_axi_0_awuser, + output m_axi_0_awvalid, + output m_axi_0_bready, + input [1:0] m_axi_0_bresp, + input m_axi_0_bvalid, + input [511:0] m_axi_0_rdata, + input m_axi_0_rlast, + output m_axi_0_rready, + input [1:0] m_axi_0_rresp, + input m_axi_0_rvalid, + output [511:0] m_axi_0_wdata, + output m_axi_0_wlast, + input m_axi_0_wready, + output [63:0] m_axi_0_wstrb, + output m_axi_0_wvalid, + + + output [63:0] m_axi_1_araddr, + output [1:0] m_axi_1_arburst, + output [3:0] m_axi_1_arcache, + output [7:0] m_axi_1_arlen, + output [2:0] m_axi_1_arprot, + input m_axi_1_arready, + output [2:0] m_axi_1_arsize, + output [3:0] m_axi_1_aruser, + output m_axi_1_arvalid, + output [63:0] m_axi_1_awaddr, + output [1:0] m_axi_1_awburst, + output [3:0] m_axi_1_awcache, + output [7:0] m_axi_1_awlen, + output [2:0] m_axi_1_awprot, + input m_axi_1_awready, + output [2:0] m_axi_1_awsize, + output [3:0] m_axi_1_awuser, + output m_axi_1_awvalid, + output m_axi_1_bready, + input [1:0] m_axi_1_bresp, + input m_axi_1_bvalid, + input [511:0] m_axi_1_rdata, + input m_axi_1_rlast, + output m_axi_1_rready, + input [1:0] m_axi_1_rresp, + input m_axi_1_rvalid, + output [511:0] m_axi_1_wdata, + output m_axi_1_wlast, + input m_axi_1_wready, + output [63:0] m_axi_1_wstrb, + output m_axi_1_wvalid, + + + output [63:0] m_axi_2_araddr, + output [1:0] m_axi_2_arburst, + output [3:0] m_axi_2_arcache, + output [7:0] m_axi_2_arlen, + output [2:0] m_axi_2_arprot, + input m_axi_2_arready, + output [2:0] m_axi_2_arsize, + output [3:0] m_axi_2_aruser, + output m_axi_2_arvalid, + output [63:0] m_axi_2_awaddr, + output [1:0] m_axi_2_awburst, + output [3:0] m_axi_2_awcache, + output [7:0] m_axi_2_awlen, + output [2:0] m_axi_2_awprot, + input m_axi_2_awready, + output [2:0] m_axi_2_awsize, + output [3:0] m_axi_2_awuser, + output m_axi_2_awvalid, + output m_axi_2_bready, + input [1:0] m_axi_2_bresp, + input m_axi_2_bvalid, + input [511:0] m_axi_2_rdata, + input m_axi_2_rlast, + output m_axi_2_rready, + input [1:0] m_axi_2_rresp, + input m_axi_2_rvalid, + output [511:0] m_axi_2_wdata, + output m_axi_2_wlast, + input m_axi_2_wready, + output [63:0] m_axi_2_wstrb, + output m_axi_2_wvalid, + + + output [63:0] m_axi_3_araddr, + output [1:0] m_axi_3_arburst, + output [3:0] m_axi_3_arcache, + output [7:0] m_axi_3_arlen, + output [2:0] m_axi_3_arprot, + input m_axi_3_arready, + output [2:0] m_axi_3_arsize, + output [3:0] m_axi_3_aruser, + output m_axi_3_arvalid, + output [63:0] m_axi_3_awaddr, + output [1:0] m_axi_3_awburst, + output [3:0] m_axi_3_awcache, + output [7:0] m_axi_3_awlen, + output [2:0] m_axi_3_awprot, + input m_axi_3_awready, + output [2:0] m_axi_3_awsize, + output [3:0] m_axi_3_awuser, + output m_axi_3_awvalid, + output m_axi_3_bready, + input [1:0] m_axi_3_bresp, + input m_axi_3_bvalid, + input [511:0] m_axi_3_rdata, + input m_axi_3_rlast, + output m_axi_3_rready, + input [1:0] m_axi_3_rresp, + input m_axi_3_rvalid, + output [511:0] m_axi_3_wdata, + output m_axi_3_wlast, + input m_axi_3_wready, + output [63:0] m_axi_3_wstrb, + output m_axi_3_wvalid, + + + + input [511:0] s_axis_s2mm_tdata, + input [63:0] s_axis_s2mm_tkeep, + input [7:0] s_axis_s2mm_tdest, + input s_axis_s2mm_tlast, + output s_axis_s2mm_tready, + input s_axis_s2mm_tvalid, + + output [511:0] m_axis_mm2s_tdata, + output [63:0] m_axis_mm2s_tkeep, + output m_axis_mm2s_tlast, + input m_axis_mm2s_tready, + output m_axis_mm2s_tvalid, + + input [103:0] s_axis_mm2s_cmd_tdata, + output s_axis_mm2s_cmd_tready, + input s_axis_mm2s_cmd_tvalid, + input [7:0] s_axis_mm2s_cmd_tdest, + + output [7:0] m_axis_mm2s_sts_tdata, + input m_axis_mm2s_sts_tready, + output m_axis_mm2s_sts_tvalid, + output [0:0] m_axis_mm2s_sts_tkeep, + output m_axis_mm2s_sts_tlast, + + input [103:0] s_axis_s2mm_cmd_tdata, + output s_axis_s2mm_cmd_tready, + input s_axis_s2mm_cmd_tvalid, + input [7:0] s_axis_s2mm_cmd_tdest, + + output [31:0] m_axis_s2mm_sts_tdata, + input m_axis_s2mm_sts_tready, + output m_axis_s2mm_sts_tvalid, + output [3:0] m_axis_s2mm_sts_tkeep, + output m_axis_s2mm_sts_tlast +); + + external_dma_bd ext_dma_bd( + + .s_axi_control_araddr(s_axi_control_araddr), + .s_axi_control_arready(s_axi_control_arready), + .s_axi_control_arvalid(s_axi_control_arvalid), + .s_axi_control_awaddr(s_axi_control_awaddr), + .s_axi_control_awready(s_axi_control_awready), + .s_axi_control_awvalid(s_axi_control_awvalid), + .s_axi_control_bready(s_axi_control_bready), + .s_axi_control_bresp(s_axi_control_bresp), + .s_axi_control_bvalid(s_axi_control_bvalid), + .s_axi_control_rdata(s_axi_control_rdata), + .s_axi_control_rready(s_axi_control_rready), + .s_axi_control_rresp(s_axi_control_rresp), + .s_axi_control_rvalid(s_axi_control_rvalid), + .s_axi_control_wdata(s_axi_control_wdata), + .s_axi_control_wready(s_axi_control_wready), + .s_axi_control_wstrb(s_axi_control_wstrb), + .s_axi_control_wvalid(s_axi_control_wvalid), + + + .m_axi_0_araddr(m_axi_0_araddr), + .m_axi_0_arburst(m_axi_0_arburst), + .m_axi_0_arcache(m_axi_0_arcache), + .m_axi_0_arlen(m_axi_0_arlen), + .m_axi_0_arprot(m_axi_0_arprot), + .m_axi_0_arready(m_axi_0_arready), + .m_axi_0_arsize(m_axi_0_arsize), + .m_axi_0_aruser(m_axi_0_aruser), + .m_axi_0_arvalid(m_axi_0_arvalid), + .m_axi_0_awaddr(m_axi_0_awaddr), + .m_axi_0_awburst(m_axi_0_awburst), + .m_axi_0_awcache(m_axi_0_awcache), + .m_axi_0_awlen(m_axi_0_awlen), + .m_axi_0_awprot(m_axi_0_awprot), + .m_axi_0_awready(m_axi_0_awready), + .m_axi_0_awsize(m_axi_0_awsize), + .m_axi_0_awuser(m_axi_0_awuser), + .m_axi_0_awvalid(m_axi_0_awvalid), + .m_axi_0_bready(m_axi_0_bready), + .m_axi_0_bresp(m_axi_0_bresp), + .m_axi_0_bvalid(m_axi_0_bvalid), + .m_axi_0_rdata(m_axi_0_rdata), + .m_axi_0_rlast(m_axi_0_rlast), + .m_axi_0_rready(m_axi_0_rready), + .m_axi_0_rresp(m_axi_0_rresp), + .m_axi_0_rvalid(m_axi_0_rvalid), + .m_axi_0_wdata(m_axi_0_wdata), + .m_axi_0_wlast(m_axi_0_wlast), + .m_axi_0_wready(m_axi_0_wready), + .m_axi_0_wstrb(m_axi_0_wstrb), + .m_axi_0_wvalid(m_axi_0_wvalid), + + + .m_axi_1_araddr(m_axi_1_araddr), + .m_axi_1_arburst(m_axi_1_arburst), + .m_axi_1_arcache(m_axi_1_arcache), + .m_axi_1_arlen(m_axi_1_arlen), + .m_axi_1_arprot(m_axi_1_arprot), + .m_axi_1_arready(m_axi_1_arready), + .m_axi_1_arsize(m_axi_1_arsize), + .m_axi_1_aruser(m_axi_1_aruser), + .m_axi_1_arvalid(m_axi_1_arvalid), + .m_axi_1_awaddr(m_axi_1_awaddr), + .m_axi_1_awburst(m_axi_1_awburst), + .m_axi_1_awcache(m_axi_1_awcache), + .m_axi_1_awlen(m_axi_1_awlen), + .m_axi_1_awprot(m_axi_1_awprot), + .m_axi_1_awready(m_axi_1_awready), + .m_axi_1_awsize(m_axi_1_awsize), + .m_axi_1_awuser(m_axi_1_awuser), + .m_axi_1_awvalid(m_axi_1_awvalid), + .m_axi_1_bready(m_axi_1_bready), + .m_axi_1_bresp(m_axi_1_bresp), + .m_axi_1_bvalid(m_axi_1_bvalid), + .m_axi_1_rdata(m_axi_1_rdata), + .m_axi_1_rlast(m_axi_1_rlast), + .m_axi_1_rready(m_axi_1_rready), + .m_axi_1_rresp(m_axi_1_rresp), + .m_axi_1_rvalid(m_axi_1_rvalid), + .m_axi_1_wdata(m_axi_1_wdata), + .m_axi_1_wlast(m_axi_1_wlast), + .m_axi_1_wready(m_axi_1_wready), + .m_axi_1_wstrb(m_axi_1_wstrb), + .m_axi_1_wvalid(m_axi_1_wvalid), + + + .m_axi_2_araddr(m_axi_2_araddr), + .m_axi_2_arburst(m_axi_2_arburst), + .m_axi_2_arcache(m_axi_2_arcache), + .m_axi_2_arlen(m_axi_2_arlen), + .m_axi_2_arprot(m_axi_2_arprot), + .m_axi_2_arready(m_axi_2_arready), + .m_axi_2_arsize(m_axi_2_arsize), + .m_axi_2_aruser(m_axi_2_aruser), + .m_axi_2_arvalid(m_axi_2_arvalid), + .m_axi_2_awaddr(m_axi_2_awaddr), + .m_axi_2_awburst(m_axi_2_awburst), + .m_axi_2_awcache(m_axi_2_awcache), + .m_axi_2_awlen(m_axi_2_awlen), + .m_axi_2_awprot(m_axi_2_awprot), + .m_axi_2_awready(m_axi_2_awready), + .m_axi_2_awsize(m_axi_2_awsize), + .m_axi_2_awuser(m_axi_2_awuser), + .m_axi_2_awvalid(m_axi_2_awvalid), + .m_axi_2_bready(m_axi_2_bready), + .m_axi_2_bresp(m_axi_2_bresp), + .m_axi_2_bvalid(m_axi_2_bvalid), + .m_axi_2_rdata(m_axi_2_rdata), + .m_axi_2_rlast(m_axi_2_rlast), + .m_axi_2_rready(m_axi_2_rready), + .m_axi_2_rresp(m_axi_2_rresp), + .m_axi_2_rvalid(m_axi_2_rvalid), + .m_axi_2_wdata(m_axi_2_wdata), + .m_axi_2_wlast(m_axi_2_wlast), + .m_axi_2_wready(m_axi_2_wready), + .m_axi_2_wstrb(m_axi_2_wstrb), + .m_axi_2_wvalid(m_axi_2_wvalid), + + + .m_axi_3_araddr(m_axi_3_araddr), + .m_axi_3_arburst(m_axi_3_arburst), + .m_axi_3_arcache(m_axi_3_arcache), + .m_axi_3_arlen(m_axi_3_arlen), + .m_axi_3_arprot(m_axi_3_arprot), + .m_axi_3_arready(m_axi_3_arready), + .m_axi_3_arsize(m_axi_3_arsize), + .m_axi_3_aruser(m_axi_3_aruser), + .m_axi_3_arvalid(m_axi_3_arvalid), + .m_axi_3_awaddr(m_axi_3_awaddr), + .m_axi_3_awburst(m_axi_3_awburst), + .m_axi_3_awcache(m_axi_3_awcache), + .m_axi_3_awlen(m_axi_3_awlen), + .m_axi_3_awprot(m_axi_3_awprot), + .m_axi_3_awready(m_axi_3_awready), + .m_axi_3_awsize(m_axi_3_awsize), + .m_axi_3_awuser(m_axi_3_awuser), + .m_axi_3_awvalid(m_axi_3_awvalid), + .m_axi_3_bready(m_axi_3_bready), + .m_axi_3_bresp(m_axi_3_bresp), + .m_axi_3_bvalid(m_axi_3_bvalid), + .m_axi_3_rdata(m_axi_3_rdata), + .m_axi_3_rlast(m_axi_3_rlast), + .m_axi_3_rready(m_axi_3_rready), + .m_axi_3_rresp(m_axi_3_rresp), + .m_axi_3_rvalid(m_axi_3_rvalid), + .m_axi_3_wdata(m_axi_3_wdata), + .m_axi_3_wlast(m_axi_3_wlast), + .m_axi_3_wready(m_axi_3_wready), + .m_axi_3_wstrb(m_axi_3_wstrb), + .m_axi_3_wvalid(m_axi_3_wvalid), + + + + .s_axis_s2mm_tdata(s_axis_s2mm_tdata), + .s_axis_s2mm_tkeep(s_axis_s2mm_tkeep), + .s_axis_s2mm_tdest(s_axis_s2mm_tdest), + + .s_axis_s2mm_tlast(s_axis_s2mm_tlast), + .s_axis_s2mm_tready(s_axis_s2mm_tready), + .s_axis_s2mm_tvalid(s_axis_s2mm_tvalid), + + .m_axis_mm2s_tdata(m_axis_mm2s_tdata), + .m_axis_mm2s_tkeep(m_axis_mm2s_tkeep), + .m_axis_mm2s_tlast(m_axis_mm2s_tlast), + .m_axis_mm2s_tready(m_axis_mm2s_tready), + .m_axis_mm2s_tvalid(m_axis_mm2s_tvalid), + + .s_axis_mm2s_cmd_tdata(s_axis_mm2s_cmd_tdata), + .s_axis_mm2s_cmd_tready(s_axis_mm2s_cmd_tready), + .s_axis_mm2s_cmd_tvalid(s_axis_mm2s_cmd_tvalid), + .s_axis_mm2s_cmd_tdest(s_axis_mm2s_cmd_tdest), + + + .m_axis_mm2s_sts_tdata(m_axis_mm2s_sts_tdata), + .m_axis_mm2s_sts_tready(m_axis_mm2s_sts_tready), + .m_axis_mm2s_sts_tvalid(m_axis_mm2s_sts_tvalid), + .m_axis_mm2s_sts_tkeep(m_axis_mm2s_sts_tkeep), + .m_axis_mm2s_sts_tlast(m_axis_mm2s_sts_tlast), + + .s_axis_s2mm_cmd_tdata(s_axis_s2mm_cmd_tdata), + .s_axis_s2mm_cmd_tready(s_axis_s2mm_cmd_tready), + .s_axis_s2mm_cmd_tvalid(s_axis_s2mm_cmd_tvalid), + .s_axis_s2mm_cmd_tdest(s_axis_s2mm_cmd_tdest), + + + .m_axis_s2mm_sts_tdata(m_axis_s2mm_sts_tdata), + .m_axis_s2mm_sts_tready(m_axis_s2mm_sts_tready), + .m_axis_s2mm_sts_tvalid(m_axis_s2mm_sts_tvalid), + .m_axis_s2mm_sts_tkeep(m_axis_s2mm_sts_tkeep), + .m_axis_s2mm_sts_tlast(m_axis_s2mm_sts_tlast), + + .ap_clk(ap_clk), + .ap_rst_n(ap_rst_n) + ); + +endmodule diff --git a/test/host/Coyote/Makefile b/test/host/Coyote/Makefile new file mode 100644 index 00000000..dd36bbc6 --- /dev/null +++ b/test/host/Coyote/Makefile @@ -0,0 +1,192 @@ +# CMAKE generated file: DO NOT EDIT! +# Generated by "Unix Makefiles" Generator, CMake Version 3.16 + +# Default target executed when no arguments are given to make. +default_target: all + +.PHONY : default_target + +# Allow only one "make -f Makefile2" at a time, but pass parallelism. +.NOTPARALLEL: + + +#============================================================================= +# Special targets provided by cmake. + +# Disable implicit rules so canonical targets will work. +.SUFFIXES: + + +# Remove some rules from gmake that .SUFFIXES does not remove. +SUFFIXES = + +.SUFFIXES: .hpux_make_needs_suffix_list + + +# Suppress display of executed commands. +$(VERBOSE).SILENT: + + +# A target that is always out of date. +cmake_force: + +.PHONY : cmake_force + +#============================================================================= +# Set environment variables for the build. + +# The shell in which to execute make rules. +SHELL = /bin/sh + +# The CMake executable. +CMAKE_COMMAND = /usr/bin/cmake + +# The command to remove a file. +RM = /usr/bin/cmake -E remove -f + +# Escaping for special characters. +EQUALS = = + +# The top-level source directory on which CMake was run. +CMAKE_SOURCE_DIR = /mnt/scratch/jooertli/ACCL/test/host/Coyote + +# The top-level build directory on which CMake was run. +CMAKE_BINARY_DIR = /mnt/scratch/jooertli/ACCL/test/host/Coyote + +#============================================================================= +# Targets provided globally by CMake. + +# Special rule for the target rebuild_cache +rebuild_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..." + /usr/bin/cmake -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) +.PHONY : rebuild_cache + +# Special rule for the target rebuild_cache +rebuild_cache/fast: rebuild_cache + +.PHONY : rebuild_cache/fast + +# Special rule for the target edit_cache +edit_cache: + @$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "No interactive CMake dialog available..." + /usr/bin/cmake -E echo No\ interactive\ CMake\ dialog\ available. +.PHONY : edit_cache + +# Special rule for the target edit_cache +edit_cache/fast: edit_cache + +.PHONY : edit_cache/fast + +# The main all target +all: cmake_check_build_system + $(CMAKE_COMMAND) -E cmake_progress_start /mnt/scratch/jooertli/ACCL/test/host/Coyote/CMakeFiles /mnt/scratch/jooertli/ACCL/test/host/Coyote/CMakeFiles/progress.marks + $(MAKE) -f CMakeFiles/Makefile2 all + $(CMAKE_COMMAND) -E cmake_progress_start /mnt/scratch/jooertli/ACCL/test/host/Coyote/CMakeFiles 0 +.PHONY : all + +# The main clean target +clean: + $(MAKE) -f CMakeFiles/Makefile2 clean +.PHONY : clean + +# The main clean target +clean/fast: clean + +.PHONY : clean/fast + +# Prepare targets for installation. +preinstall: all + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall + +# Prepare targets for installation. +preinstall/fast: + $(MAKE) -f CMakeFiles/Makefile2 preinstall +.PHONY : preinstall/fast + +# clear depends +depend: + $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1 +.PHONY : depend + +#============================================================================= +# Target rules for targets named accl_on_coyote + +# Build rule for target. +accl_on_coyote: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 accl_on_coyote +.PHONY : accl_on_coyote + +# fast build rule for target. +accl_on_coyote/fast: + $(MAKE) -f CMakeFiles/accl_on_coyote.dir/build.make CMakeFiles/accl_on_coyote.dir/build +.PHONY : accl_on_coyote/fast + +#============================================================================= +# Target rules for targets named accl + +# Build rule for target. +accl: cmake_check_build_system + $(MAKE) -f CMakeFiles/Makefile2 accl +.PHONY : accl + +# fast build rule for target. +accl/fast: + $(MAKE) -f CMakeFiles/xrt/CMakeFiles/accl.dir/build.make CMakeFiles/xrt/CMakeFiles/accl.dir/build +.PHONY : accl/fast + +test.o: test.cpp.o + +.PHONY : test.o + +# target to build an object file +test.cpp.o: + $(MAKE) -f CMakeFiles/accl_on_coyote.dir/build.make CMakeFiles/accl_on_coyote.dir/test.cpp.o +.PHONY : test.cpp.o + +test.i: test.cpp.i + +.PHONY : test.i + +# target to preprocess a source file +test.cpp.i: + $(MAKE) -f CMakeFiles/accl_on_coyote.dir/build.make CMakeFiles/accl_on_coyote.dir/test.cpp.i +.PHONY : test.cpp.i + +test.s: test.cpp.s + +.PHONY : test.s + +# target to generate assembly for a file +test.cpp.s: + $(MAKE) -f CMakeFiles/accl_on_coyote.dir/build.make CMakeFiles/accl_on_coyote.dir/test.cpp.s +.PHONY : test.cpp.s + +# Help Target +help: + @echo "The following are some of the valid targets for this Makefile:" + @echo "... all (the default if no target is provided)" + @echo "... clean" + @echo "... depend" + @echo "... rebuild_cache" + @echo "... edit_cache" + @echo "... accl_on_coyote" + @echo "... accl" + @echo "... test.o" + @echo "... test.i" + @echo "... test.s" +.PHONY : help + + + +#============================================================================= +# Special targets to cleanup operation of make. + +# Special rule to run CMake to check the build system integrity. +# No rule that depends on this can have commands that come from listfiles +# because they might be regenerated. +cmake_check_build_system: + $(CMAKE_COMMAND) -S$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0 +.PHONY : cmake_check_build_system + diff --git a/test/host/Coyote/run_scripts/flow_u55c.sh b/test/host/Coyote/run_scripts/flow_u55c.sh index 8976bb7a..fd2357c3 100755 --- a/test/host/Coyote/run_scripts/flow_u55c.sh +++ b/test/host/Coyote/run_scripts/flow_u55c.sh @@ -2,7 +2,7 @@ # parameters SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_RDMA/lynx/lynx.runs/impl_1/cyt_top +FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/coyote_build_RDMA_u55c/bitstreams/cyt_top # FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_TCP/lynx/lynx.runs/impl_1/cyt_top DRIVER_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/driver/ @@ -85,8 +85,8 @@ if [ $HOT_RESET -eq 1 ]; then done wait echo "Driver loaded." - echo "Getting permissions for fpga..." - parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" + #echo "Getting permissions for fpga..." + #parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" echo "Done." fi diff --git a/test/host/Coyote/run_scripts/run.sh b/test/host/Coyote/run_scripts/run.sh index 59b628a7..e9079ab9 100755 --- a/test/host/Coyote/run_scripts/run.sh +++ b/test/host/Coyote/run_scripts/run.sh @@ -9,7 +9,7 @@ fi # state variables mkdir -p "$(pwd)/accl_log" BUILD_DIR=../build -EXEC=$BUILD_DIR/accl_on_coyote +EXEC=../accl_on_coyote HOST_FILE=./accl_log/host FPGA_FILE=./accl_log/fpga @@ -40,25 +40,25 @@ done #define ACCL_BARRIER 12 ARG=" -d -f -r" # debug, hardware, and tcp/rdma flags -TEST_MODE=(10) -N_ELEMENTS=(512) # 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 +TEST_MODE=(0) +N_ELEMENTS=(64) # 64 256 1024 4096 16384 65536 262144 524288 1048576 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 NRUN=(1) # number of runs HOST=(1) -PROTOC=(1) # eager=0, rendezevous=1 +PROTOC=(0) # eager=0, rendezevous=1 echo "Run command: $EXEC $ARG -y $TEST_MODE -c 1024 -l $FPGA_FILE" rm -f $(pwd)/accl_log/rank* -for NP in `seq 4 $NUM_PROCESS`; do +for NP in `seq $NUM_PROCESS $NUM_PROCESS`; do for MODE in ${TEST_MODE[@]}; do for N_ELE in ${N_ELEMENTS[@]}; do for H in ${HOST[@]}; do for P in ${PROTOC[@]}; do N=$N_ELE echo "mpirun -n $NP -f $HOST_FILE --iface ens4 $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN &" - mpirun -n $NP -f $HOST_FILE --iface ens4f0 -outfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" -errfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN & - SLEEPTIME=2 + /mnt/scratch/zhe/mpich/install/bin/mpirun -n $NP -f $HOST_FILE --iface enp65s0f0np0 -outfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" -errfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN -e & + SLEEPTIME=10 sleep $SLEEPTIME parallel-ssh -H "$HOST_LIST" "kill -9 \$(ps -aux | grep accl_on_coyote | awk '{print \$2}')" parallel-ssh -H "$HOST_LIST" "dmesg | grep "fpga_tlb_miss_isr" >$(pwd)/accl_log/tlb_miss.log" diff --git a/test/host/Coyote/test.cpp b/test/host/Coyote/test.cpp index c7c21216..251e4377 100644 --- a/test/host/Coyote/test.cpp +++ b/test/host/Coyote/test.cpp @@ -65,6 +65,7 @@ struct options_t unsigned int protoc; std::string xclbin; std::string fpgaIP; + bool eagerRx_host; }; struct timestamp_t @@ -276,6 +277,7 @@ options_t parse_options(int argc, char *argv[]) "i", "device-index", "device index of FPGA if hardware mode is used", false, 0, "positive integer"); cmd.add(device_index_arg); + TCLAP::SwitchArg eager_arg("e", "eager_host", "Eager Buffers on host", cmd, false); cmd.parse(argc, argv); if (hardware_arg.getValue()) { @@ -337,6 +339,7 @@ options_t parse_options(int argc, char *argv[]) opts.xclbin = xclbin_arg.getValue(); opts.fpgaIP = fpgaIP_arg.getValue(); opts.protoc = protoc_arg.getValue(); + opts.eagerRx_host = eager_arg.getValue(); std::cout << "count:" << opts.count << " rxbuf_size:" << opts.rxbuf_size << " seg_size:" << opts.seg_size << " num_rxbufmem:" << opts.num_rxbufmem << std::endl; return opts; @@ -479,25 +482,85 @@ void configure_cyt_tcp(std::vector &ranks, int local_rank, ACCL::CoyoteD } +void test_copy(ACCL::ACCL &accl, options_t &options){ + unsigned int count = options.count; + auto op_buf = accl.create_coyotebuffer(count, dataType::float32); + auto res_buf = accl.create_coyotebuffer(count, dataType::float32); + int errors = 0; + if (options.count*sizeof(dataType::float32) > options.rxbuf_size){ + std::cout<<"experiment size larger than buffer size, exiting..."<buffer()[i] = (float)i; + res_buf.get()->buffer()[i] = -999.0f; + } + if (options.host == 0){ op_buf->sync_to_device(); } + if (options.host == 0){ res_buf->sync_to_device(); } + // Print buffer addresses for debugging + //std::cout << "Source buffer address: " << op_buf.get()->buffer() << std::endl; + //std::cout << "Result buffer address: " << res_buf.get()->buffer() << std::endl; + // Debug print first few values before operation + + + ACCL::ACCLRequest* req; + req = accl.copy(*op_buf, *res_buf, count, true, true, false); + accl.wait(req, 1000ms); + //sync res buffer from device + if (options.host == 0){ + std::cout << "Syncing result buffer from device..." << std::endl; + res_buf->sync_from_device(); + } + // Debug print first few values after operation + std::cout << "Result buffer after copy (first 4 elements): "; + for (int i = 0; i < count; i++) { + std::cout << res_buf.get()->buffer()[i] << " "; + } + std::cout << std::endl; + //compare results + if (options.host == 0){ op_buf->sync_from_device(); } + if (options.host == 0){ res_buf->sync_from_device(); } + for (int i = 0; i < count; i++) { + if (res_buf.get()->buffer()[i] != op_buf.get()->buffer()[i]) { + std::cout << std::to_string(i + 1) + "th item is incorrect!" << res_buf.get()->buffer()[i] << " != " + << op_buf.get()->buffer()[i] << std::endl; + errors += 1; + } + } + if (errors > 0) { + std::cout << "Copy test failed with " << errors << " errors out of " << count << " elements!" << std::endl; + failed_tests++; + } else { + std::cout << "Copy test successful!" << std::endl; + } + + // Free buffers + std::cout << "Freeing buffers..." << std::endl; + op_buf->free_buffer(); + res_buf->free_buffer(); +} + + void test_sendrcv(ACCL::ACCL &accl, options_t &options) { std::cout << "Start send recv test..." << std::endl< options.rxbuf_size){ + if (options.count*sizeof(dataType::float32) > options.rxbuf_size){ std::cout<<"experiment size larger than buffer size, exiting..."<(bufsize, dataType::int32); + auto op_buf = accl.create_coyotebuffer(bufsize, dataType::float32); for (int n = 0; n < options.nruns; n++) { std::cout << "Repetition " <buffer()[i] = (mpi_rank == 0) ? i : -1; + for (int i = 0; i < bufsize; i++) op_buf.get()->buffer()[i] = (mpi_rank == 0) ? 5.0f : -1.0f; if (options.host == 0){ op_buf->sync_to_device(); } @@ -542,8 +605,8 @@ void test_sendrcv(ACCL::ACCL &accl, options_t &options) { if (mpi_rank == 1) { for (int i = 0; i < bufsize; i++) { - unsigned int res = op_buf.get()->buffer()[i]; - unsigned int ref = i; + float res = op_buf.get()->buffer()[i]; + float ref = 5.0f; if (res != ref) { std::cout << std::to_string(i + 1) + "th item is incorrect! (" + std::to_string(res) + " != " + std::to_string(ref) + ")" @@ -559,6 +622,7 @@ void test_sendrcv(ACCL::ACCL &accl, options_t &options) { } else { std::cout << "Test is successful!" << std::endl; } + debug(accl.dump_eager_rx_buffers(false)); } op_buf->free_buffer(); @@ -1106,9 +1170,13 @@ void test_accl_base(options_t options) accl = std::make_unique(device); if (options.protoc == 0){ + bool eagerBufs = false; + if(options.eagerRx_host){ + eagerBufs = true; + } std::cout<<"Eager Protocol"<initialize(ranks, mpi_rank, - mpi_size+2, options.rxbuf_size, options.seg_size, 4096*1024*2); + mpi_size+3, options.rxbuf_size, /*options.seg_size*/ 4096*1024, 4096*1024*2, eagerBufs); } else if (options.protoc == 1){ std::cout<<"Rendezvous Protocol"<initialize(ranks, mpi_rank, mpi_size, 64, 64, options.seg_size); @@ -1141,6 +1209,7 @@ void test_accl_base(options_t options) MPI_Barrier(MPI_COMM_WORLD); + test_copy(*accl, options); if(options.test_mode == ACCL_SEND || options.test_mode == 0){ debug(accl->dump_eager_rx_buffers(false)); diff --git a/test/host/hls_tcp_vadd/test.cpp b/test/host/hls_tcp_vadd/test.cpp index 4e762554..a8e2db9e 100644 --- a/test/host/hls_tcp_vadd/test.cpp +++ b/test/host/hls_tcp_vadd/test.cpp @@ -48,6 +48,7 @@ struct options_t { unsigned int count; unsigned int device_index; bool hardware; + bool eagerRx_host; bool rsfec; std::string xclbin; std::string config_file; diff --git a/test/host/xrt/include/fixture.hpp b/test/host/xrt/include/fixture.hpp index 14e539e1..7d650888 100644 --- a/test/host/xrt/include/fixture.hpp +++ b/test/host/xrt/include/fixture.hpp @@ -96,15 +96,16 @@ class TestEnvironment : public ::testing::Environment { // Clear any erroneous setting of benchmark flag options.benchmark = false; } - + std::cout << "Setting up TestEnvironment for rank " << ::rank << std::endl; if(options.hardware && (options.cyt_rdma || options.cyt_tcp)){ accl = std::make_unique(cyt_dev); accl.get()->initialize(ranks, ::rank, options.rxbuf_count, options.rxbuf_size, options.max_eager_count); } else { + std::cout << "Initializing ACCL for rank " << ::rank << std::endl; accl = initialize_accl( ranks, ::rank, !options.hardware, design, dev, options.xclbin, options.rxbuf_count, - options.rxbuf_size, options.max_eager_count, options.rsfec); + options.rxbuf_size, options.max_eager_count, options.rsfec, options.eagerRx_host); } std::cout << "Done setting up TestEnvironment" << std::endl; accl->set_timeout(1e6); diff --git a/test/host/xrt/include/utility.hpp b/test/host/xrt/include/utility.hpp index 5987d83e..bc4b5ce8 100644 --- a/test/host/xrt/include/utility.hpp +++ b/test/host/xrt/include/utility.hpp @@ -49,6 +49,7 @@ struct options_t { bool startemu; bool benchmark; std::string csvfile; + bool eagerRx_host; }; pid_t start_emulator(options_t opts, unsigned size, unsigned rank); diff --git a/test/host/xrt/src/test.cpp b/test/host/xrt/src/test.cpp index adacef88..4260dbe2 100644 --- a/test/host/xrt/src/test.cpp +++ b/test/host/xrt/src/test.cpp @@ -32,15 +32,17 @@ TEST_F(ACCLTest, test_copy){ GTEST_SKIP() << "Skipping single-node test on multi-node setup"; } unsigned int count = options.count; + std::cout << "Testing copy with count: " << count << std::endl; auto op_buf = accl->create_buffer(count, dataType::float32); auto res_buf = accl->create_buffer(count, dataType::float32); - EXPECT_FALSE(op_buf->is_host_only()); - EXPECT_FALSE(res_buf->is_host_only()); + //EXPECT_TRUE(op_buf->is_host_only()); + //EXPECT_TRUE(res_buf->is_host_only()); random_array(op_buf->buffer(), count); accl->copy(*op_buf, *res_buf, count); for (unsigned int i = 0; i < count; ++i) { + std::cout << "comapring for i " << i << std::endl; EXPECT_FLOAT_EQ((*op_buf)[i], (*res_buf)[i]); } } @@ -201,13 +203,26 @@ TEST_F(ACCLTest, test_sendrcv_basic) { unsigned int count = options.count; unsigned int count_bytes = count * dataTypeSize.at(dataType::float32) / 8; - auto op_buf = accl->create_buffer(count, dataType::float32); auto res_buf = accl->create_buffer(count, dataType::float32); - random_array(op_buf->buffer(), count); + //random_array(op_buf->buffer(), count); + op_buf->buffer()[0] = 15.5f; + res_buf->buffer()[0] = 0.0f; int next_rank = ::rank + 1; int prev_rank = ::rank - 1; + std::cout << "op_buf before send/recv: " << (*op_buf)[0] << " in rank " << ::rank << std::endl; + std::cout << "res_buf before send/recv: " << (*res_buf)[0] << " in rank " << ::rank << std::endl; + uint32_t op_bits, res_bits; + float op_val = (*op_buf)[0]; + float res_val = (*res_buf)[0]; + std::memcpy(&op_bits, &op_val, sizeof(float)); + std::memcpy(&res_bits, &res_val, sizeof(float)); + + std::cout << "op buf before send/recv: " << std::bitset<32>(op_bits) + << " res buf before send/recv: " << std::bitset<32>(res_bits) + << " in rank " << ::rank << std::endl; + //accl->dump_eager_rx_buffers(false); if(::rank % 2 == 0){ if(next_rank < ::size){ test_debug("Sending data on " + std::to_string(::rank) + " to " + @@ -220,6 +235,10 @@ TEST_F(ACCLTest, test_sendrcv_basic) { accl->recv(*res_buf, count, prev_rank, 0); } + std::cout << "op_buf after send/recv: " << (*op_buf)[0] << " in rank " << ::rank << std::endl; + std::cout << "res_buf after send/recv: " << (*res_buf)[0] << " in rank " << ::rank << std::endl; + //accl->dump_eager_rx_buffers(false); + if(::rank % 2 == 1){ test_debug("Sending data on " + std::to_string(::rank) + " to " + std::to_string(prev_rank) + "...", options); @@ -235,6 +254,16 @@ TEST_F(ACCLTest, test_sendrcv_basic) { if(next_rank < ::size){ for (unsigned int i = 0; i < count; ++i) { EXPECT_FLOAT_EQ((*res_buf)[i], (*op_buf)[i]); + float op_val = (*op_buf)[i]; + float res_val = (*res_buf)[i]; + + uint32_t op_bits, res_bits; + std::memcpy(&op_bits, &op_val, sizeof(float)); + std::memcpy(&res_bits, &res_val, sizeof(float)); + + std::cout << "op buf " << std::bitset<32>(op_bits) + << " res buf " << std::bitset<32>(res_bits) + << std::endl; } } else { SUCCEED(); @@ -1206,6 +1235,7 @@ options_t parse_options(int argc, char *argv[]) { "Maximum byte count for eager mode", false, 3*1024, "positive integer"); cmd.add(max_eager_arg); + TCLAP::SwitchArg host_arg("", "rxEager-host", "Set eager Rxbufs to host", cmd, false); try { cmd.parse(argc, argv); if (axis3_arg.getValue() + udp_arg.getValue() + tcp_arg.getValue() + @@ -1233,6 +1263,7 @@ options_t parse_options(int argc, char *argv[]) { opts.udp = udp_arg.getValue(); opts.tcp = tcp_arg.getValue(); opts.cyt_rdma = cyt_rdma_arg.getValue(); + std::cout << "options cyt_rdma: " << opts.cyt_rdma << std::endl; opts.cyt_tcp = cyt_tcp_arg.getValue(); opts.device_index = device_index_arg.getValue(); opts.xclbin = xclbin_arg.getValue(); @@ -1243,6 +1274,7 @@ options_t parse_options(int argc, char *argv[]) { opts.benchmark = bench_arg.getValue(); opts.csvfile = csvfile_arg.getValue(); opts.max_eager_count = max_eager_arg.getValue(); + opts.eagerRx_host = host_arg.getValue(); return opts; } @@ -1259,8 +1291,20 @@ int main(int argc, char *argv[]) { //gather ACCL options for the test //NOTE: this has to come before the gtest environment is initialized options = parse_options(argc, argv); + if(options.cyt_rdma){ + std::cout << "Using Coyote RDMA backend" << std::endl; + }else if(options.cyt_tcp){ + std::cout << "Using Coyote TCP backend" << std::endl; + }else if(options.axis3){ + std::cout << "Using AXI Stream 3 backend" << std::endl; + }else if(options.udp){ + std::cout << "Using UDP backend" << std::endl; + }else if(options.tcp){ + std::cout << "Using TCP backend" << std::endl; + } if(options.startemu){ + std::cout << "Starting emulator..." << std::endl; emulator_pid = start_emulator(options,::size,::rank); if(!emulator_is_running(emulator_pid)){ std::cout << "Could not start emulator" << std::endl; diff --git a/test/model/emulator/cclo_emu.cpp b/test/model/emulator/cclo_emu.cpp index 578cc482..89ac0220 100644 --- a/test/model/emulator/cclo_emu.cpp +++ b/test/model/emulator/cclo_emu.cpp @@ -61,6 +61,7 @@ void dma_read(vector &dmem, vector &hmem, Stream &dmem, vector &hmem, Stream &dmem, vector &hmem, Stream > &cmd, Stream > &sts, Stream &wdata){ ap_axiu<104,0,0,DEST_WIDTH> cmd_word = cmd.Pop(); axi::Command<64, 23> command = axi::Command<64, 23>(cmd_word.data); - bool host = (cmd_word.dest == 1); + bool host = (cmd_word.dest == 1); axi::Status status; stream_word tmp; logger << log_level::verbose << "DMA " << (host ? "host" : "device") << " write: Command popped. length: " << command.length << " offset: " << command.address << " EOF: " << command.eof << endl; + std::cout << "DMA " << (host ? "host" : "device") << " write: Command popped. length: " << command.length << " offset: " << command.address << " EOF: " << command.eof << endl; int byte_count = 0; while(byte_count &dmem, vector &hmem, Stream &dmem, vector &hmem, Stream loglevel("l", "loglevel", "Verbosity level of logging", diff --git a/test/model/simulator/CMakeLists.txt b/test/model/simulator/CMakeLists.txt index 4175d9ab..ec281543 100644 --- a/test/model/simulator/CMakeLists.txt +++ b/test/model/simulator/CMakeLists.txt @@ -52,7 +52,7 @@ target_link_libraries(cclo_sim PUBLIC zmq zmqpp pthread jsoncpp_lib dl rt) target_compile_definitions(cclo_sim PUBLIC ZMQ_CALL_VERBOSE NUM_CTRL_STREAMS=3 ACCL_SIM_MEM_SIZE_KB=${ACCL_SIM_MEM_SIZE_KB}) target_compile_options(cclo_sim PRIVATE -Wno-attributes -fdiagnostics-color=always -g -fmax-errors=3) -set(STACK_TYPE "TCP" CACHE STRING "Type of POE") +set(STACK_TYPE "RDMA" CACHE STRING "Type of POE") set_property(CACHE STACK_TYPE PROPERTY STRINGS "TCP" "UDP" "RDMA") set(BUILD_FOLDER ${STACK_TYPE}_sim) diff --git a/test/model/simulator/waveform_rank1_c.wcfg b/test/model/simulator/waveform_rank1_c.wcfg new file mode 100644 index 00000000..a6594f58 --- /dev/null +++ b/test/model/simulator/waveform_rank1_c.wcfg @@ -0,0 +1,264 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + eth_rx + label + + s_axis_eth_rx_data_tdata[511:0] + s_axis_eth_rx_data_tdata[511:0] + + + s_axis_eth_rx_data_tdest[7:0] + s_axis_eth_rx_data_tdest[7:0] + + + s_axis_eth_rx_data_tready + s_axis_eth_rx_data_tready + + + s_axis_eth_rx_data_tvalid + s_axis_eth_rx_data_tvalid + + + + dma0_s2mm_cmd + label + + m_axis_dma0_s2mm_cmd_tdata[103:0] + m_axis_dma0_s2mm_cmd_tdata[103:0] + + + m_axis_dma0_s2mm_cmd_tdest[7:0] + m_axis_dma0_s2mm_cmd_tdest[7:0] + + + m_axis_dma0_s2mm_cmd_tready + m_axis_dma0_s2mm_cmd_tready + + + m_axis_dma0_s2mm_cmd_tvalid + m_axis_dma0_s2mm_cmd_tvalid + + + + m_axis_dma0_s2mm + label + + m_axis_dma0_s2mm_tdata[511:0] + m_axis_dma0_s2mm_tdata[511:0] + + + m_axis_dma0_s2mm_tdest[7:0] + m_axis_dma0_s2mm_tdest[7:0] + + + m_axis_dma0_s2mm_tready + m_axis_dma0_s2mm_tready + + + m_axis_dma0_s2mm_tvalid + m_axis_dma0_s2mm_tvalid + + + + dma1_s2mm_cmd + label + + m_axis_dma1_s2mm_cmd_tdata[103:0] + m_axis_dma1_s2mm_cmd_tdata[103:0] + + + m_axis_dma1_s2mm_cmd_tdest[7:0] + m_axis_dma1_s2mm_cmd_tdest[7:0] + + + m_axis_dma1_s2mm_cmd_tready + m_axis_dma1_s2mm_cmd_tready + + + m_axis_dma1_s2mm_cmd_tvalid + m_axis_dma1_s2mm_cmd_tvalid + + + + m_axis_dma1_s2mm + label + + m_axis_dma1_s2mm_tdata[511:0] + m_axis_dma1_s2mm_tdata[511:0] + + + m_axis_dma1_s2mm_tdest[7:0] + m_axis_dma1_s2mm_tdest[7:0] + + + m_axis_dma1_s2mm_tready + m_axis_dma1_s2mm_tready + + + m_axis_dma1_s2mm_tvalid + m_axis_dma1_s2mm_tvalid + + + + dma0_s2mm_sts + label + + s_axis_dma0_s2mm_sts_tdata[31:0] + s_axis_dma0_s2mm_sts_tdata[31:0] + + + s_axis_dma0_s2mm_sts_tready + s_axis_dma0_s2mm_sts_tready + + + s_axis_dma0_s2mm_sts_tvalid + s_axis_dma0_s2mm_sts_tvalid + + + + dma1_s2mm_sts + label + + s_axis_dma1_s2mm_sts_tdata[31:0] + s_axis_dma1_s2mm_sts_tdata[31:0] + + + s_axis_dma1_s2mm_sts_tready + s_axis_dma1_s2mm_sts_tready + + + s_axis_dma1_s2mm_sts_tvalid + s_axis_dma1_s2mm_sts_tvalid + + + + dma0_mm2s_cmd + label + + m_axis_dma0_mm2s_cmd_tdata[103:0] + m_axis_dma0_mm2s_cmd_tdata[103:0] + + + m_axis_dma0_mm2s_cmd_tdest[7:0] + m_axis_dma0_mm2s_cmd_tdest[7:0] + + + m_axis_dma0_mm2s_cmd_tready + m_axis_dma0_mm2s_cmd_tready + + + m_axis_dma0_mm2s_cmd_tvalid + m_axis_dma0_mm2s_cmd_tvalid + + + + dma1_mm2s_cmd + label + + m_axis_dma1_mm2s_cmd_tdata[103:0] + m_axis_dma1_mm2s_cmd_tdata[103:0] + + + m_axis_dma1_mm2s_cmd_tdest[7:0] + m_axis_dma1_mm2s_cmd_tdest[7:0] + + + m_axis_dma1_mm2s_cmd_tready + m_axis_dma1_mm2s_cmd_tready + + + m_axis_dma1_mm2s_cmd_tvalid + m_axis_dma1_mm2s_cmd_tvalid + + + + dma0_mm2s_sts + label + + s_axis_dma0_mm2s_sts_tdata[31:0] + s_axis_dma0_mm2s_sts_tdata[31:0] + + + s_axis_dma0_mm2s_sts_tready + s_axis_dma0_mm2s_sts_tready + + + s_axis_dma0_mm2s_sts_tvalid + s_axis_dma0_mm2s_sts_tvalid + + + + s_axis_dm0_mm2s + label + + s_axis_dma0_mm2s_tdata[511:0] + s_axis_dma0_mm2s_tdata[511:0] + + + s_axis_dma0_mm2s_tready + s_axis_dma0_mm2s_tready + + + s_axis_dma0_mm2s_tvalid + s_axis_dma0_mm2s_tvalid + + + + dma1_mm2s_sts + label + + s_axis_dma1_mm2s_sts_tdata[31:0] + s_axis_dma1_mm2s_sts_tdata[31:0] + + + s_axis_dma1_mm2s_sts_tready + s_axis_dma1_mm2s_sts_tready + + + s_axis_dma1_mm2s_sts_tvalid + s_axis_dma1_mm2s_sts_tvalid + + + + s_axis_dma1_mm2s + label + + s_axis_dma1_mm2s_tdata[511:0] + s_axis_dma1_mm2s_tdata[511:0] + + + s_axis_dma1_mm2s_tready + s_axis_dma1_mm2s_tready + + + s_axis_dma1_mm2s_tvalid + s_axis_dma1_mm2s_tvalid + + + diff --git a/test/model/simulator/xsi_dut.cpp b/test/model/simulator/xsi_dut.cpp index 820bcbe9..3469696c 100644 --- a/test/model/simulator/xsi_dut.cpp +++ b/test/model/simulator/xsi_dut.cpp @@ -44,6 +44,12 @@ XSI_DUT::XSI_DUT(const string& design_libname, const string& simkernel_libname, memset(&info, 0, sizeof(info)); info.logFileName = NULL; info.wdbFileName = const_cast(wdbName.c_str()); + + // Work-around for an added field (ximDir) in Vivado >= 2023.2, which if left NULL, will cause C++ string exception + // However, for older versions of Vivado, this field is not present, so we need to check the size of the struct + if (sizeof(info) == 24) { + info.xsimDir = const_cast("xsim.dir"); + } xsi.open(&info); *logger << log_level::verbose << "XSI opened" << std::endl; if(trace){