diff --git a/.gitignore b/.gitignore index 46f1b628..09c459e2 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,11 @@ driver/xrt/src/m2m driver/xrt/build .vscode +coyote_build* +*xcu55c-fsvh2892-2L-e* +*_prj +*.gen +*.ip_user_files +*.cache +*.srcs +**/fpga_ips.txt \ No newline at end of file diff --git a/driver/xrt/CMakeLists.txt b/driver/xrt/CMakeLists.txt index f6ed4236..c6b62a9b 100644 --- a/driver/xrt/CMakeLists.txt +++ b/driver/xrt/CMakeLists.txt @@ -74,6 +74,7 @@ set(ACCL_DOCS_RST ) set(EN_COYOTE ON) +set(EN_AVX 1 CACHE STRING "AVX environment.") if(EN_COYOTE) message("Enable Coyote") set(ACCL_HEADERS @@ -88,6 +89,7 @@ if(EN_COYOTE) file(GLOB COYOTE_SOURCE "${COYOTE_SOURCE_PATH}/*.cpp") if(EN_AVX) + add_definitions(-DEN_AVX) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") else() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -march=native -O1") diff --git a/driver/xrt/include/accl.hpp b/driver/xrt/include/accl.hpp index a98667fd..cbe7ffe8 100644 --- a/driver/xrt/include/accl.hpp +++ b/driver/xrt/include/accl.hpp @@ -101,7 +101,7 @@ class ACCL { */ void initialize(const std::vector &ranks, int local_rank, int n_egr_rx_bufs = 16, addr_t egr_rx_buf_size = 1024, - addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024); + addr_t max_egr_size = 1024, addr_t max_rndzv_size = 32*1024, bool rxEager_host = false); /** * Get the return code of the last ACCL call. @@ -1101,7 +1101,7 @@ ACCLRequest *barrier(communicatorId comm_id = GLOBAL_COMM, void configure_arithmetic(); void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, - const std::vector &devicemem); + const std::vector &devicemem, bool host=false); void setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, int devicemem) { std::vector mems = {devicemem}; return setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, mems); diff --git a/driver/xrt/include/accl/coyotebuffer.hpp b/driver/xrt/include/accl/coyotebuffer.hpp index c082a856..95c921ae 100644 --- a/driver/xrt/include/accl/coyotebuffer.hpp +++ b/driver/xrt/include/accl/coyotebuffer.hpp @@ -19,7 +19,7 @@ #pragma once #include "buffer.hpp" #include "common.hpp" -#include "cProcess.hpp" +#include "cThread.hpp" #include "cDefs.hpp" #include "coyotedevice.hpp" #include @@ -57,14 +57,13 @@ template class CoyoteBuffer : public Buffer { size_t page_size = 1ULL << 21; this->buffer_size = length * sizeof(dtype); this->n_pages = (buffer_size + page_size - 1) / page_size; - std::cerr << "CoyoteBuffer contructor called! page_size:"<aligned_buffer = (dtype *)this->device->coyote_proc->getMem({fpga::CoyoteAlloc::HUGE_2M, n_pages}); + this->aligned_buffer = (dtype *)this->device->coyote_proc->getMem({coyote::CoyoteAllocType::HPF, this->buffer_size, true}); this->update_buffer(this->aligned_buffer, (addr_t)this->aligned_buffer); std::cerr << "Allocation successful! Allocated buffer: "<aligned_buffer << std::setbase(10) <<", Size: " << this->_size << std::endl; + //buffers in coyote per default on host memory host_flag = true; @@ -112,22 +111,30 @@ template class CoyoteBuffer : public Buffer { */ void sync_from_device() override { - std::cerr << "calling sync: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; + std::cerr << "sync_from_device at address: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; - this->device->coyote_proc->invoke({fpga::CoyoteOper::SYNC, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false}); + coyote::syncSg sg; + memset(&sg, 0, sizeof(coyote::syncSg)); + sg.addr = this->aligned_buffer; + sg.len = this->size(); + this->device->coyote_proc->invoke(coyote::CoyoteOper::LOCAL_SYNC, sg); this->host_flag = true; } /** * Sync the data from the host to the device. - * + * */ void sync_to_device() override { - std::cerr << "calling offload: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; + std::cerr << "sync_to_device at address: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; - this->device->coyote_proc->invoke({fpga::CoyoteOper::OFFLOAD, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false}); + coyote::syncSg sg; + memset(&sg, 0, sizeof(coyote::syncSg)); + sg.addr = this->aligned_buffer; + sg.len = this->size(); + this->device->coyote_proc->invoke(coyote::CoyoteOper::LOCAL_OFFLOAD, sg); this->host_flag = false; } @@ -144,7 +151,7 @@ template class CoyoteBuffer : public Buffer { // } // } - std::cerr << "Free user buffer from cProc cPid:"<< std::setbase(10)<device->coyote_proc->getCpid()<<", buffer_size:"<aligned_buffer<device->coyote_proc->getCtid()<<", buffer_size:"<aligned_buffer<device->coyote_proc->freeMem(this->aligned_buffer); return; } diff --git a/driver/xrt/include/accl/coyotedevice.hpp b/driver/xrt/include/accl/coyotedevice.hpp index 7d3e1689..5c5795cd 100644 --- a/driver/xrt/include/accl/coyotedevice.hpp +++ b/driver/xrt/include/accl/coyotedevice.hpp @@ -20,9 +20,9 @@ #include "acclrequest.hpp" #include "cclo.hpp" #include "constants.hpp" -#include "cProcess.hpp" -#include "ibvQpConn.hpp" -#include "ibvStructs.hpp" +#include "cThread.hpp" +// #include "ibvQpConn.hpp" +// #include "ibvStructs.hpp" #include #include #include @@ -108,7 +108,7 @@ class CoyoteDevice : public CCLO { void printDebug() override; - fpga::cProcess* get_device(){ + coyote::cThread* get_device(){ return coyote_proc; } @@ -120,13 +120,13 @@ class CoyoteDevice : public CCLO { val_t get_retcode(ACCLRequest *request) override; - fpga::cProcess* coyote_proc; + coyote::cThread* coyote_proc; // RDMA related // RDMA requires multiple processes to establish queue pairs // The CCLO kernel is still managed by coyote_proc unsigned int num_qp; - std::vector coyote_qProc_vec; + std::vector coyote_qProc_vec; private: const size_t OFFSET_CCLO = 0x0; diff --git a/driver/xrt/src/accl.cpp b/driver/xrt/src/accl.cpp index 9e98efb7..60a7f4d4 100644 --- a/driver/xrt/src/accl.cpp +++ b/driver/xrt/src/accl.cpp @@ -128,12 +128,11 @@ ACCLRequest *ACCL::send(BaseBuffer &srcbuf, unsigned int count, if (from_fpga == false) { srcbuf.sync_to_device(); } - options.scenario = operation::send; options.comm = communicators[comm_id].communicators_addr(); options.addr_0 = &srcbuf; options.count = count; - options.root_src_dst = dst; + options.root_src_dst = dst; options.tag = tag; options.compress_dtype = compress_dtype; options.waitfor = waitfor; @@ -143,7 +142,6 @@ ACCLRequest *ACCL::send(BaseBuffer &srcbuf, unsigned int count, wait(handle); check_return_value("send", handle); } - return handle; } @@ -244,7 +242,6 @@ ACCLRequest *ACCL::recv(BaseBuffer &dstbuf, unsigned int count, "sync_from_device() after waiting" << std::endl; } - options.scenario = operation::recv; options.comm = communicators[comm_id].communicators_addr(); options.addr_2 = &dstbuf; @@ -262,7 +259,6 @@ ACCLRequest *ACCL::recv(BaseBuffer &dstbuf, unsigned int count, } check_return_value("recv", handle); } - return handle; } @@ -287,7 +283,6 @@ ACCLRequest *ACCL::recv(dataType dst_data_type, unsigned int count, wait(handle); check_return_value("recv", handle); } - return handle; } @@ -302,11 +297,9 @@ ACCLRequest *ACCL::copy(BaseBuffer *srcbuf, BaseBuffer *dstbuf, unsigned int cou "sync_from_device() after waiting" << std::endl; } - if (from_fpga == false) { srcbuf->sync_to_device(); } - options.scenario = operation::copy; options.addr_0 = srcbuf; options.addr_2 = dstbuf; @@ -318,13 +311,13 @@ ACCLRequest *ACCL::copy(BaseBuffer *srcbuf, BaseBuffer *dstbuf, unsigned int cou ACCLRequest *handle = call_async(options); if (!run_async) { - wait(handle); + std::chrono::milliseconds timeout(1000); + wait(handle, timeout); if (to_fpga == false) { dstbuf->sync_from_device(); } check_return_value("copy", handle); } - return handle; } @@ -1011,6 +1004,8 @@ std::string ACCL::dump_eager_rx_buffers(size_t n_egr_rx_bufs, bool dump_data) { address += 4; val_t addrh = cclo->read(address); address += 4; + val_t max_len = cclo->read(address); + address += 4; val_t rxtag = cclo->read(address); address += 4; val_t rxlen = cclo->read(address); @@ -1018,24 +1013,33 @@ std::string ACCL::dump_eager_rx_buffers(size_t n_egr_rx_bufs, bool dump_data) { val_t rxsrc = cclo->read(address); address += 4; val_t seq = cclo->read(address); + address += 4; + val_t hostBit = cclo->read(address); stream << "Spare RX Buffer " << i << ":\t address: 0x" << std::hex << addrh * (1UL << 32) + addrl << std::dec << " \t status: " << status << " \t occupancy: " << rxlen << "/" << maxsize << " \t MPI tag: " << std::hex << rxtag << std::dec - << " \t seq: " << seq << " \t src: " << rxsrc; + << " \t seq: " << seq << " \t src: " << rxsrc + << " \t hostBit: " << hostBit; if(dump_data) { - eager_rx_buffers[i]->sync_from_device(); + if(!(hostBit && cclo->get_device_type() == CCLO::coyote_device)){ + eager_rx_buffers[i]->sync_from_device(); + } stream << " \t data: " << std::hex << "["; for (size_t j = 0; j < eager_rx_buffers[i]->size(); ++j) { - stream << "0x" - << static_cast(static_cast( - eager_rx_buffers[i]->byte_array())[j]); - if (j != eager_rx_buffers[i]->size() - 1) { - stream << ", "; - } + if(static_cast(static_cast( + eager_rx_buffers[i]->byte_array())[j]) != 0){ + + stream << "0x" + << static_cast(static_cast( + eager_rx_buffers[i]->byte_array())[j]); + if (j != eager_rx_buffers[i]->size() - 1) { + stream << ", "; + } + } } stream << "]" << std::dec << std::endl; } else { @@ -1065,7 +1069,7 @@ void ACCL::parse_hwid(){ void ACCL::initialize(const std::vector &ranks, int local_rank, int n_egr_rx_bufs, addr_t egr_rx_buf_size, - addr_t max_egr_size, addr_t max_rndzv_size) { + addr_t max_egr_size, addr_t max_rndzv_size, bool rxEager_host) { parse_hwid(); @@ -1077,8 +1081,7 @@ void ACCL::initialize(const std::vector &ranks, int local_rank, } debug("Configuring Eager RX Buffers"); - setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, rxbufmem); - + setup_eager_rx_buffers(n_egr_rx_bufs, egr_rx_buf_size, rxbufmem, rxEager_host); debug("Configuring Rendezvous Spare Buffers"); setup_rendezvous_spare_buffers(max_rndzv_size, rxbufmem); @@ -1129,7 +1132,7 @@ addr_t ACCL::get_arithmetic_config_addr(std::pair id) { } void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, - const std::vector &devicemem) { + const std::vector &devicemem, bool host) { addr_t address = CCLO_ADDR::EGR_RX_BUF_SIZE_OFFSET; eager_rx_buffer_size = egr_rx_buf_size; for (size_t i = 0; i < n_egr_rx_bufs; ++i) { @@ -1137,15 +1140,32 @@ void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, Buffer *buf; if (sim_mode) { - buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, + if(host){ + buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, + static_cast(cclo)->get_context(), true, ACCL_SIM_DEFAULT_BANK); + }else{ + buf = new SimBuffer(new int8_t[eager_rx_buffer_size](), eager_rx_buffer_size, dataType::int8, static_cast(cclo)->get_context()); + } } else if(cclo->get_device_type() == CCLO::xrt_device ){ - buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + if(host){ + //TODO: how to define host buffers in XRT? + buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + }else{ + buf = new XRTBuffer(eager_rx_buffer_size, dataType::int8, *(static_cast(cclo)->get_device()), devicemem[i % devicemem.size()]); + } } else if(cclo->get_device_type() == CCLO::coyote_device){ - buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + if(host){ + //buffers in coyote per default on host + buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + }else{ + buf = new CoyoteBuffer(eager_rx_buffer_size, dataType::int8, static_cast(cclo)); + } + } + //add if else as well, test for coyote backend + eager on host + if(!(host && cclo->get_device_type() == CCLO::coyote_device)){ + buf->sync_to_device(); } - - buf->sync_to_device(); eager_rx_buffers.emplace_back(buf); // program this buffer into the accelerator address += 4; @@ -1155,10 +1175,18 @@ void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, address += 4; cclo->write(address, (buf->address() >> 32) & 0xffffffff); // clear remaining 4 fields - for (size_t j = 0; j < 4; ++j) { + for (size_t j = 0; j < 5; ++j) { address += 4; cclo->write(address, 0); } + //set the host flag + // NOTE: the host flag is set to true if the buffer is a host buffer + address += 4; + if(host){ + cclo->write(address, 1); // set host flag + }else{ + cclo->write(address, 0); // set host flag + } } //write buffer len @@ -1168,7 +1196,6 @@ void ACCL::setup_eager_rx_buffers(size_t n_egr_rx_bufs, addr_t egr_rx_buf_size, cclo->write(CCLO_ADDR::NUM_EGR_RX_BUFS_OFFSET, n_egr_rx_bufs); current_config_address = address + 4; - } void ACCL::setup_rendezvous_spare_buffers(addr_t rndzv_spare_buf_size, const std::vector &devicemem){ @@ -1184,7 +1211,8 @@ void ACCL::setup_rendezvous_spare_buffers(addr_t rndzv_spare_buf_size, const std } else if(cclo->get_device_type() == CCLO::coyote_device){ buf = new CoyoteBuffer(max_rndzv_msg_size, dataType::int8, static_cast(cclo)); } - buf->sync_to_device(); + //TODO: make mem residience configurable as well for nocard configuration + //buf->sync_to_device(); utility_spares.emplace_back(buf); } cclo->write(CCLO_ADDR::SPARE1_OFFSET, utility_spares.at(0)->address() & 0xffffffff); @@ -1310,12 +1338,15 @@ void ACCL::prepare_call(CCLO::Options &options) { // determine which operand is compressed if (options.addr_0->type() == compressed) { options.compression_flags |= compressionFlags::OP0_COMPRESSED; + std::cout << "addr 0 is compressed " << std::endl; } if (options.addr_1->type() == compressed) { options.compression_flags |= compressionFlags::OP1_COMPRESSED; + std::cout << "addr 1 is compressed " << std::endl; } if (options.addr_2->type() == compressed) { options.compression_flags |= compressionFlags::RES_COMPRESSED; + std::cout << "addr 2 is compressed " << std::endl; } } } else { @@ -1342,12 +1373,15 @@ void ACCL::prepare_call(CCLO::Options &options) { // determine which operand is compressed if (options.addr_0->type() == options.compress_dtype) { options.compression_flags |= compressionFlags::OP0_COMPRESSED; + std::cout << "addr 0 is eth compressed " << std::endl; } if (options.addr_1->type() == options.compress_dtype) { options.compression_flags |= compressionFlags::OP1_COMPRESSED; + std::cout << "addr 1 is eth compressed " << std::endl; } if (options.addr_2->type() == options.compress_dtype) { options.compression_flags |= compressionFlags::RES_COMPRESSED; + std::cout << "addr 2 is eth compressed " << std::endl; } } } diff --git a/driver/xrt/src/coyotedevice.cpp b/driver/xrt/src/coyotedevice.cpp index fd96b904..7c761fb1 100644 --- a/driver/xrt/src/coyotedevice.cpp +++ b/driver/xrt/src/coyotedevice.cpp @@ -18,9 +18,10 @@ #include "accl/coyotedevice.hpp" #include "accl/common.hpp" -#include "cProcess.hpp" +#include "cThread.hpp" #include #include +#include static void finish_coyote_request(ACCL::CoyoteRequest *req) { req->wait_kernel(); @@ -45,8 +46,14 @@ void CoyoteRequest::start() { } else { function = static_cast(options.reduce_function); } + /*std::cout << "options.scenario " << static_cast(options.scenario) << " options.count " << options.count << " options.comm " << options.comm << + " options.root_src_dest " << options.root_src_dst << " options.cfg_function " << static_cast(options.cfg_function) << " options.reduce_function " << static_cast(options.reduce_function) << + " options.tag " << options.tag << " options.arithcfg_addr " << options.arithcfg_addr << " options.compress_dtype " << static_cast(options.compress_dtype) << + " options.compression_flags " << std::bitset<32>(static_cast(options.compression_flags)) << " options.stream_flags " << std::bitset<32>(static_cast(options.stream_flags)) << + " options.host_flags " << std::bitset<32>(static_cast(options.host_flags)) << " address0 " << options.addr_0->address() << " address1 " << options.addr_1->address() << + " address2 " << options.addr_2->address() << " options.data_type_io_0 " << static_cast(options.data_type_io_0) << " options.data_type_io_1 " << static_cast(options.data_type_io_1) << + " options.data_type_io_2 " << static_cast(options.data_type_io_2) << std::endl;*/ uint32_t flags = static_cast(options.host_flags) << 8 | static_cast(options.stream_flags); - auto coyote_proc = reinterpret_cast(cclo())->get_device(); if ((coyote_proc->getCSR((OFFSET_HOSTCTRL + HOSTCTRL_ADDR::AP_CTRL)>>2) & 0x4) == 0) { // read AP_CTRL and check bit 3 (the idle bit) @@ -251,6 +258,7 @@ void CoyoteRequest::start() { } case ACCL::operation::config:{ coyote_proc->setCSR(static_cast(options.scenario), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::SCEN)>>2); + coyote_proc->setCSR(static_cast(options.count), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::LEN)>>2); coyote_proc->setCSR(static_cast(function), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::FUNCTION_R)>>2); //coyote_proc->setCSR(static_cast(flags), (OFFSET_HOSTCTRL + HOSTCTRL_ADDR::STREAM_FLAGS)>>2); //safe to delete? break; @@ -276,28 +284,28 @@ void CoyoteRequest::wait_kernel() { } CoyoteDevice::CoyoteDevice(): num_qp(0) { - this->coyote_proc = new fpga::cProcess(targetRegion, getpid()); - std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCpid() << std::endl; + this->coyote_proc = new coyote::cThread(targetRegion, getpid(), 0); + std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCtid() << std::endl; } CoyoteDevice::CoyoteDevice(unsigned int num_qp): num_qp(num_qp) { for (unsigned int i=0; i<(num_qp+1); i++) { - fpga::cProcess* cproc = new fpga::cProcess(targetRegion, getpid()); + coyote::cThread* cproc = new coyote::cThread(targetRegion, getpid(), 0); coyote_qProc_vec.push_back(cproc); } for (unsigned int i=0; igetCpid() == 0){ + if(i == 0){ this->coyote_proc = coyote_qProc_vec[i]; - std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCpid() << std::endl; + std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCtid() << std::endl; coyote_qProc_vec.erase(coyote_qProc_vec.begin() + i); break; } } - if(coyote_proc == NULL || coyote_proc->getCpid() != 0){ + if(coyote_proc == NULL){ std::cerr << "cProc initialization error!"<getCpid() << std::endl; + std::cerr << "ACLL DEBUG: aquiring qProc: targetRegion: " << targetRegion << ", cPid: " << coyote_qProc_vec[i]->getCtid() << std::endl; } } @@ -369,7 +377,7 @@ CCLO::deviceType CoyoteDevice::get_device_type() void CoyoteDevice::printDebug(){ coyote_proc->printDebug(); - std::ifstream inputFile("/sys/kernel/coyote_cnfg/cyt_attr_nstats_q0"); + std::ifstream inputFile("/sys/kernel/coyote_sysfs_0/cyt_attr_nstats"); if (!inputFile.is_open()) { std::cerr << "Failed to open net sts file." << std::endl; diff --git a/kernels/cclo/Makefile b/kernels/cclo/Makefile index dc76095c..48180f4c 100644 --- a/kernels/cclo/Makefile +++ b/kernels/cclo/Makefile @@ -15,13 +15,13 @@ # # *******************************************************************************/ -PLATFORM ?= xilinx_u280_xdma_201920_3 +PLATFORM ?= xilinx_u55c_gen3x16_xdma_3_202210_1 HW_DEBUG ?= none -STACK_TYPE ?= UDP +STACK_TYPE ?= RDMA MODE ?= xo EN_DMA ?= 1 EN_ARITH ?= 1 -EN_COMPRESS ?= 1 +EN_COMPRESS ?= 0 EN_EXT_KRNL ?= 1 MB_DEBUG_LEVEL ?= 0 SIM_MEM_SIZE_LOG ?= 28 diff --git a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h index d523b4ff..3bb1cee3 100644 --- a/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h +++ b/kernels/cclo/fw/sw_apps/ccl_offload_control/src/ccl_offload_control.h @@ -273,16 +273,19 @@ typedef struct { unsigned int rx_len; unsigned int rx_src; unsigned int sequence_number; + unsigned int host; //host address for the packet } rx_buffer; #define STATUS_OFFSET 0 #define ADDRL_OFFSET 1 #define ADDRH_OFFSET 2 -#define RX_TAG_OFFSET 3 -#define RX_LEN_OFFSET 4 -#define RX_SRC_OFFSET 5 -#define SEQUENCE_NUMBER_OFFSET 6 -#define SPARE_BUFFER_FIELDS 7 +#define MAX_LEN_OFFSET 3 +#define RX_TAG_OFFSET 4 +#define RX_LEN_OFFSET 5 +#define RX_SRC_OFFSET 6 +#define SEQUENCE_NUMBER_OFFSET 7 +#define HOST_OFFSET 8 //host address offset for the buffer +#define SPARE_BUFFER_FIELDS 9 #define STATUS_IDLE 0x00 #define STATUS_ENQUEUED 0x01 diff --git a/kernels/cclo/hdl/ccl_offload.v b/kernels/cclo/hdl/ccl_offload.v index 8fbea965..8b6b17e9 100644 --- a/kernels/cclo/hdl/ccl_offload.v +++ b/kernels/cclo/hdl/ccl_offload.v @@ -622,5 +622,46 @@ module ccl_offload .s_axi_control_wstrb(s_axi_control_wstrb), .s_axi_control_wvalid(s_axi_control_wvalid) ); +//ila to capture commands on cclo interf +/*ila_top inst_ila_top( + .clk(ap_clk), + .probe0(s_axis_call_req_tdata), //32 + .probe1(s_axis_call_req_tready), + .probe2(s_axis_call_req_tvalid), + .probe3(s_axis_call_req_tlast), + .probe4(s_axis_eth_rx_data_tdata), //512 + .probe5(s_axis_eth_rx_data_tdest), //8 + .probe6(s_axis_eth_rx_data_tready), + .probe7(s_axis_eth_rx_data_tvalid), + .probe8(m_axis_dma0_s2mm_tdata), //512 + .probe9(m_axis_dma0_s2mm_tdest), //8 + .probe10(m_axis_dma0_s2mm_tready), + .probe11(m_axis_dma0_s2mm_tvalid), + .probe12(s_axis_dma0_mm2s_tdata),//512 + .probe13(s_axis_dma0_mm2s_tdest), //8 + .probe14(s_axis_dma0_mm2s_tready), + .probe15(s_axis_dma0_mm2s_tvalid), + .probe16(m_axis_dma1_s2mm_tdata),//512 + .probe17(m_axis_dma1_s2mm_tdest), //8 + .probe18(m_axis_dma1_s2mm_tready), + .probe19(m_axis_dma1_s2mm_tvalid), + .probe20(s_axis_dma1_mm2s_tdata),//512 + .probe21(s_axis_dma1_mm2s_tdest), //8 + .probe22(s_axis_dma1_mm2s_tready), + .probe23(s_axis_dma1_mm2s_tvalid), + .probe24(s_axis_eth_notification_tdata), //64 + .probe25(s_axis_eth_notification_tready), + .probe26(s_axis_eth_notification_tvalid), + .probe27(m_axis_eth_tx_meta_tdata), //32 + .probe28(m_axis_eth_tx_meta_tready), + .probe29(m_axis_eth_tx_meta_tvalid), + .probe30(s_axis_eth_tx_status_tdata), //64 + .probe31(s_axis_eth_tx_status_tready), + .probe32(s_axis_eth_tx_status_tvalid), + .probe33(m_axis_eth_tx_data_tdata), //512 + .probe34(m_axis_eth_tx_data_tready), + .probe35(m_axis_eth_tx_data_tvalid), + .probe36(m_axis_eth_tx_data_tdest) //8 + );*/ endmodule diff --git a/kernels/cclo/hls/dma_mover/dma_mover.cpp b/kernels/cclo/hls/dma_mover/dma_mover.cpp index bede2f43..27cd75b1 100644 --- a/kernels/cclo/hls/dma_mover/dma_mover.cpp +++ b/kernels/cclo/hls/dma_mover/dma_mover.cpp @@ -594,6 +594,7 @@ void instruction_decode( dm1_rd.total_bytes = seek_res.len; bytes_remaining -= seek_res.len; dm1_rd.last = (bytes_remaining <= 0); + dm1_rd.mem_id = seek_res.host ? 1 : 0; //instruct to release this buffer once the DMA movement is complete STREAM_WRITE(rxbuf_release_idx, seek_res.index); ack_insn.release_count++; diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_enqueue.cpp b/kernels/cclo/hls/rxbuf_offload/rxbuf_enqueue.cpp index 643a11b1..209c1833 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_enqueue.cpp +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_enqueue.cpp @@ -50,8 +50,10 @@ void rxbuf_enqueue( //iterate until you run out of spare buffers for(int i=0; i < nbufs; i++){ ap_uint<32> status; + ap_uint<32> host; ap_uint<64> addr; status = rx_buffers[(i * SPARE_BUFFER_FIELDS) + STATUS_OFFSET]; + host = rx_buffers[(i * SPARE_BUFFER_FIELDS) + HOST_OFFSET]; addr(31, 0) = rx_buffers[(i * SPARE_BUFFER_FIELDS) + ADDRL_OFFSET]; addr(63, 32) = rx_buffers[(i * SPARE_BUFFER_FIELDS) + ADDRH_OFFSET]; @@ -64,7 +66,7 @@ void rxbuf_enqueue( cmd.tag = tag++; cmd_word.data = cmd; cmd_word.last = 1;//unused for now - cmd_word.dest = 0;//unused for now + cmd_word.dest = host;//unused for now STREAM_WRITE(dma_cmd, cmd_word); //update spare buffer status rx_buffers[(i * SPARE_BUFFER_FIELDS) + STATUS_OFFSET] = STATUS_ENQUEUED; diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h b/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h index 4b2b443e..35c68201 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_offload.h @@ -20,6 +20,7 @@ typedef struct { ap_uint<32> index; ap_uint<32> len; bool valid; + bool host; } rxbuf_seek_result; typedef struct { diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp b/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp index 78812821..d2c8fc78 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_seek.cpp @@ -59,6 +59,7 @@ void rxbuf_seek( pending_notif.signature.src == seek_sig.src && pending_notif.signature.seqn == seek_sig.seqn){ seek_res.addr(31,0) = rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + ADDRL_OFFSET]; seek_res.addr(63,32) = rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + ADDRH_OFFSET]; + seek_res.host = (rx_buffers[(RX_BUFFER_METADATA_OFFSET/4) + pending_notif.index * SPARE_BUFFER_FIELDS + HOST_OFFSET] == 1) ? true : false; seek_res.len = pending_notif.signature.len; seek_res.index = pending_notif.index; seek_res.valid = true; diff --git a/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp b/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp index 55751558..f7063e2f 100644 --- a/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp +++ b/kernels/cclo/hls/rxbuf_offload/rxbuf_session.cpp @@ -84,7 +84,7 @@ void rxbuf_session_command( cmd.length = notif.length; cmd_word.data = cmd; cmd_word.last = 1;//always last, each command is a single word - cmd_word.dest = 0;//always write RX data to device (not host) + cmd_word.dest = desc.mem_index;//always write RX data to device (not host) STREAM_WRITE(fragment_dma_cmd, cmd_word); } else { //if EOF update address in descriptor diff --git a/kernels/cclo/tcl/generate_kernel.tcl b/kernels/cclo/tcl/generate_kernel.tcl index 0ccd5835..17402eba 100644 --- a/kernels/cclo/tcl/generate_kernel.tcl +++ b/kernels/cclo/tcl/generate_kernel.tcl @@ -70,6 +70,28 @@ generate_target all [get_files ./ccl_offload_ex/ccl_offload_ex.srcs/sources_1/b #build a .xsa file handoff write_hw_platform -fixed -force -file $xsafile + +#create ila_top for commands on cclo interf +#create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_top +#set_property -dict [ list CONFIG.C_PROBE0_WIDTH {32} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE4_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE5_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE8_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE9_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE12_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE13_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE16_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE17_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE20_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE21_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE24_WIDTH {64} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE27_WIDTH {32} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE30_WIDTH {64} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE33_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE36_WIDTH {8} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_NUM_OF_PROBES {37} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_EN_STRG_QUAL {1} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_DATA_DEPTH {1024} ] [get_ips ila_top] # close and exit close_project exit diff --git a/kernels/plugins/Makefile b/kernels/plugins/Makefile index da39ea18..c176941d 100644 --- a/kernels/plugins/Makefile +++ b/kernels/plugins/Makefile @@ -16,8 +16,8 @@ # # *******************************************************************************/ -PERIPHERAL_IPS = hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma dummy_cyt_rdma_stack dummy_cyt_dma tcp_session_handler -DEVICE=xcu280-fsvh2892-2L-e +PERIPHERAL_IPS = hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma tcp_session_handler +DEVICE=xcu55c-fsvh2892-2L-e TARGET=ip all: $(PERIPHERAL_IPS) diff --git a/kernels/plugins/cyt_adapter/Makefile b/kernels/plugins/cyt_adapter/Makefile index be1a85c2..2db4e41d 100644 --- a/kernels/plugins/cyt_adapter/Makefile +++ b/kernels/plugins/cyt_adapter/Makefile @@ -16,19 +16,24 @@ # *******************************************************************************/ TARGET=ip -DEVICE=xcu250-figd2104-2L-e -CYT_DMA_ADAPTER=cyt_dma_adapter_$(DEVICE).xo +DEVICE=xcu55c-fsvh2892-2L-e +CYT_DMA_SQ_ADAPTER=cyt_dma_sq_adapter_$(DEVICE).xo CYT_RDMA_ARBITER=cyt_rdma_arbiter_$(DEVICE).xo -CYT_RDMA_MUX=cyt_rdma_mux_$(DEVICE).xo +CCLO_SQ_ADAPTER=cclo_sq_adapter_$(DEVICE).xo +CYT_CQ_DM_STS_CONVERTER=cyt_cq_dm_sts_converter_$(DEVICE).xo -all: $(CYT_DMA_ADAPTER) $(CYT_RDMA_ARBITER) $(CYT_RDMA_MUX) -$(CYT_DMA_ADAPTER): build_cyt_dma_adapter.tcl cyt_dma_adapter.cpp +all: $(CYT_RDMA_ARBITER) $(CCLO_SQ_ADAPTER) $(CYT_DMA_SQ_ADAPTER) $(CYT_CQ_DM_STS_CONVERTER) + +$(CYT_CQ_DM_STS_CONVERTER): build_cyt_cq_dm_sts_converter.tcl cyt_cq_dm_sts_converter.cpp + vitis_hls $< -tclargs $(TARGET) $(DEVICE) + +$(CYT_DMA_SQ_ADAPTER): build_cyt_dma_sq_adapter.tcl cyt_dma_sq_adapter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) $(CYT_RDMA_ARBITER): build_cyt_rdma_arbiter.tcl cyt_rdma_arbiter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) -$(CYT_RDMA_MUX): build_cyt_rdma_mux.tcl cyt_rdma_mux.cpp +$(CCLO_SQ_ADAPTER): build_cclo_sq_adapter.tcl cclo_sq_adapter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) diff --git a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl b/kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl similarity index 81% rename from kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl rename to kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl index ac0e6dde..c4d915ea 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl +++ b/kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl @@ -51,16 +51,15 @@ switch $command { } -open_project build_cyt_rdma_mux.${device} +open_project build_cclo_sq_adapter.${device} +add_files cclo_sq_adapter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" -add_files cyt_rdma_mux.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" - -set_top cyt_rdma_mux +set_top cclo_sq_adapter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_rdma_mux_${device}.xo +config_export -format xo -library ACCL -output [pwd]/cclo_sq_adapter_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl b/kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl similarity index 75% rename from kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl rename to kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl index 342b3400..1b868a48 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl +++ b/kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl @@ -17,7 +17,6 @@ set command [lindex $argv 0] set device [lindex $argv 1] -set stack [lindex $argv 2] set do_sim 0 set do_syn 0 @@ -52,19 +51,15 @@ switch $command { } -open_project build_cyt_dma_adapter.${device} +open_project build_cyt_cq_dm_sts_converter.${device} -if {$stack eq "RDMA"} { - add_files cyt_dma_adapter.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS -DACCL_RDMA" -} else { - add_files cyt_dma_adapter.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS" -} +add_files cyt_cq_dm_sts_converter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" -set_top cyt_dma_adapter +set_top cyt_cq_dm_sts_converter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_dma_adapter_$device.xo +config_export -format xo -library ACCL -output [pwd]/cyt_cq_dm_sts_converter_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl b/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl new file mode 100644 index 00000000..e8e851a7 --- /dev/null +++ b/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl @@ -0,0 +1,82 @@ +# /******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +set command [lindex $argv 0] +set device [lindex $argv 1] + +set do_sim 0 +set do_syn 0 +set do_export 0 +set do_cosim 0 + +switch $command { + "sim" { + set do_sim 1 + } + "syn" { + set do_syn 1 + } + "ip" { + set do_syn 1 + set do_export 1 + } + "cosim" { + set do_syn 1 + set do_cosim 1 + } + "all" { + set do_sim 1 + set do_syn 1 + set do_export 1 + set do_cosim 1 + } + default { + puts "Unrecognized command" + exit + } +} + + +open_project build_cyt_dma_sq_adapter.${device} + +add_files cyt_dma_sq_adapter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" + + +set_top cyt_dma_sq_adapter + +open_solution sol1 +config_export -format xo -library ACCL -output [pwd]/cyt_dma_sq_adapter_${device}.xo + +if {$do_sim} { + csim_design -clean +} + +if {$do_syn} { + set_part $device + create_clock -period 4 -name default + csynth_design +} + +if {$do_export} { + export_design +} + +if ${do_cosim} { + cosim_design +} + +exit diff --git a/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp b/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp new file mode 100644 index 00000000..cb15cbba --- /dev/null +++ b/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp @@ -0,0 +1,128 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" +#include "eth_intf.h" + +using namespace std; + +// convert the cclo sq (rdma) to cyt sq (rdma) +// currently the cclo sq only contains WRITE/SEND rdma command +// we keep the conversion to cyt_sq_rd output just for consistency of interfaces and future extension +// the m_axis_cyt data stream corresponds to rreq_send and we use the dest to indicate whether host/device +// the s_axis_cyt data stream corresponds to rreq_recv and we simply consume it +void cclo_sq_adapter( + hls::stream& cclo_sq, + hls::stream >& s_axis_cclo, + hls::stream& cyt_sq_wr, + hls::stream& cyt_sq_rd, + hls::stream >& m_axis_cyt, + hls::stream >& s_axis_cyt + ) +{ +#pragma HLS INTERFACE axis register port=cclo_sq +#pragma HLS INTERFACE axis register port=s_axis_cclo +#pragma HLS INTERFACE axis register port=cyt_sq_wr +#pragma HLS INTERFACE axis register port=cyt_sq_rd +#pragma HLS INTERFACE axis register port=m_axis_cyt +#pragma HLS INTERFACE axis register port=s_axis_cyt +#pragma HLS aggregate variable=cclo_sq compact=bit +#pragma HLS aggregate variable=cyt_sq_wr compact=bit +#pragma HLS aggregate variable=cyt_sq_rd compact=bit + +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS PIPELINE II=1 + + enum fsmStateType {META, WR_STREAM, RD_STREAM}; + static fsmStateType fsmState = META; + + static rdma_req_t cclo_req; + static cyt_req_t cyt_req; + static ap_axiu<512, 0, 0, 8> currWord; + static ap_uint<32> pkt_word; + static ap_uint<32> word_cnt = 0; + + switch (fsmState) + { + case META: + if(!STREAM_IS_EMPTY(cclo_sq)){ + cclo_req = STREAM_READ(cclo_sq); + + cyt_req.rsrvd = 0; + cyt_req.offs = 0; + cyt_req.host = 0; + cyt_req.actv = 0; + cyt_req.len = cclo_req.len; + cyt_req.vaddr = cclo_req.vaddr; + cyt_req.last = 1; // always assert last + cyt_req.dest = cclo_req.host; // 0-device memory, 1-host memory; + cyt_req.pid = cclo_req.qpn(CYT_PID_BITS-1,0); //qpn lowest bits are pid + cyt_req.vfid = cclo_req.qpn(CYT_DEST_BITS+CYT_PID_BITS-1,CYT_PID_BITS); + cyt_req.remote = 0; + cyt_req.rdma = 0; + cyt_req.mode = 0; // always PARSE + cyt_req.strm = CYT_STRM_RDMA; + cyt_req.opcode = cclo_req.opcode; + + pkt_word = (cyt_req.len + 63) >> 6; + + if(cyt_req.opcode == CYT_RDMA_WRITE || cyt_req.opcode == CYT_RDMA_SEND || cyt_req.opcode == CYT_RDMA_IMMED){ + STREAM_WRITE(cyt_sq_wr, cyt_req); + fsmState = WR_STREAM; + } else if (cyt_req.opcode == CYT_RDMA_READ) { + STREAM_WRITE(cyt_sq_rd, cyt_req); + fsmState = RD_STREAM; + } + } + break; + // move s_axis_cclo to m_axis_cyt and adjust the dest field + case WR_STREAM: + if (!STREAM_IS_EMPTY(s_axis_cclo)) + { + currWord = STREAM_READ(s_axis_cclo); + ap_axiu<512, 0, 0, 8> outWord; + + outWord.data = currWord.data; + outWord.keep = currWord.keep; + outWord.last = currWord.last; + outWord.dest = cyt_req.dest; // use the dest flag to indicate whether it is to host or device + word_cnt++; + + if (word_cnt == pkt_word) + { + word_cnt = 0; + outWord.last = 1; + fsmState = META; + } + STREAM_WRITE(m_axis_cyt, outWord); + } + break; + // just consume all the data + case RD_STREAM: + if(!STREAM_IS_EMPTY(s_axis_cyt)){ + currWord = STREAM_READ(s_axis_cyt); + word_cnt++; + if (word_cnt == pkt_word) + { + word_cnt = 0; + fsmState = META; + } + } + break; + } + +} \ No newline at end of file diff --git a/kernels/plugins/cyt_adapter/cyt.h b/kernels/plugins/cyt_adapter/cyt.h index ae8d428b..39f7bcf2 100644 --- a/kernels/plugins/cyt_adapter/cyt.h +++ b/kernels/plugins/cyt_adapter/cyt.h @@ -24,70 +24,15 @@ using namespace std; +#define CYT_OFFS_BITS 6 #define CYT_VADDR_BITS 48 #define CYT_LEN_BITS 28 #define CYT_DEST_BITS 4 #define CYT_PID_BITS 6 -#define CYT_N_REGIONS_BITS 1 -#define CYT_RSRVD_BITS 96-4-CYT_N_REGIONS_BITS-CYT_VADDR_BITS-CYT_LEN_BITS-CYT_DEST_BITS-CYT_PID_BITS - -struct cyt_req_t{ - ap_uint rsrvd; - ap_uint vfid; - ap_uint pid; - ap_uint dest; - ap_uint<1> host; - ap_uint<1> ctl; - ap_uint<1> sync; - ap_uint<1> stream; - ap_uint len; - ap_uint vaddr; - - cyt_req_t() : rsrvd(0), vfid(0), pid(0), dest(0), host(0), ctl(0), sync(0), stream(0), len(0), vaddr(0) {} - - cyt_req_t(ap_uint rsrvd_arg, ap_uint vfid_arg, ap_uint pid_arg, - ap_uint dest_arg, ap_uint<1> host_arg, ap_uint<1> ctl_arg, ap_uint<1> sync_arg, - ap_uint<1> stream_arg, ap_uint len_arg, ap_uint vaddr_arg) - : rsrvd(rsrvd_arg), - vfid(vfid_arg), - pid(pid_arg), - dest(dest_arg), - host(host_arg), - ctl(ctl_arg), - sync(sync_arg), - stream(stream_arg), - len(len_arg), - vaddr(vaddr_arg) {} - - cyt_req_t(ap_uint<96> in) { - rsrvd = in(CYT_RSRVD_BITS - 1, 0); - vfid = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS-1, CYT_RSRVD_BITS); - pid = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS); - dest = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS); - host = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS); - ctl = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1); - sync = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2); - stream = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3); - len = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4); - vaddr = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS+CYT_VADDR_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS); - } - - operator ap_uint<96>() { - ap_uint<96> ret; - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS+CYT_VADDR_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS) = vaddr; //vaddr - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4) = len; //len - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3) = stream; //stream - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2) = sync; //sync - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1) = ctl; //ctl - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS) = host; //host - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS) = dest; //dest - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS) = pid; //pid - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS-1,CYT_RSRVD_BITS) = vfid; //vfid - ret(CYT_RSRVD_BITS-1,0) = rsrvd; //rsrvd, disregard - return ret; - } -}; - +#define CYT_STRM_BITS 2 +#define CYT_OPCODE_BITS 5 +#define CYT_REQ_RSRVD_BITS (128 - CYT_OFFS_BITS - 2 - CYT_VADDR_BITS - CYT_LEN_BITS - 1 - 2 * CYT_DEST_BITS - CYT_PID_BITS - 3 - CYT_STRM_BITS - CYT_OPCODE_BITS) +#define CYT_ACK_RSRVD_BITS (32 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 - CYT_DEST_BITS - CYT_PID_BITS - CYT_DEST_BITS) // Coyote RDMA Opcode #define CYT_RDMA_READ 0 @@ -95,105 +40,158 @@ struct cyt_req_t{ #define CYT_RDMA_SEND 2 #define CYT_RDMA_IMMED 3 -// Coyote cyt_rdma_req_t structs -#define CYT_RDMA_OPCODE_BITS 5 -#define CYT_RDMA_MSG_BITS 448 -#define CYT_RDMA_OFFS_BITS 4 -#define CYT_RDMA_QPN_BITS 10 -#define CYT_RDMA_MSN_BITS 24 -#define CYT_RDMA_RSRVD_BITS 17 -#define CYT_RDMA_REQ_BITS CYT_RDMA_RSRVD_BITS+CYT_RDMA_MSG_BITS+CYT_RDMA_OFFS_BITS+CYT_RDMA_MSN_BITS+4+CYT_RDMA_QPN_BITS+CYT_RDMA_OPCODE_BITS - -#define CYT_RDMA_VADDR_BITS 64 -#define CYT_RDMA_LEN_BITS 32 -#define CYT_RDMA_PARAMS_BITS 288 - -struct cyt_rdma_req_t{ - ap_uint rsrvd; - ap_uint msg; - ap_uint offs; - ap_uint ssn; - ap_uint<1> cmplt; - ap_uint<1> last; - ap_uint<1> mode; - ap_uint<1> host; - ap_uint qpn; - ap_uint opcode; - - cyt_rdma_req_t() : rsrvd(0), msg(0), offs(0), ssn(0), cmplt(0), last(0), mode(0), host(0), qpn(0), opcode(0) {} - cyt_rdma_req_t(ap_uint in) { - rsrvd = in(CYT_RDMA_RSRVD_BITS - 1, 0); - msg = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS - 1, CYT_RDMA_RSRVD_BITS); - offs = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS); - ssn = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS); - cmplt = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS); - last = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1); - mode = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2); - host = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3); - qpn = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4); - opcode = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS + CYT_RDMA_OPCODE_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS); - } - operator ap_uint() { - ap_uint ret; - ret(CYT_RDMA_RSRVD_BITS - 1, 0) = rsrvd; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS - 1, CYT_RDMA_RSRVD_BITS) = msg; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS) = offs; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS) = ssn; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS) = cmplt; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1) = last; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2) = mode; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3) = host; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4) = qpn; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS + CYT_RDMA_OPCODE_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS) = opcode; - return ret; - } -} ; +#define RC_SEND_FIRST 0 +#define RC_SEND_MIDDLE 1 +#define RC_SEND_LAST 2 +#define RC_SEND_ONLY 4 +#define RC_RDMA_WRITE_FIRST 6 +#define RC_RDMA_WRITE_MIDDLE 7 +#define RC_RDMA_WRITE_LAST 8 +#define RC_RDMA_WRITE_LAST_WITH_IMD 9 +#define RC_RDMA_WRITE_ONLY 10 +#define RC_RDMA_WRITE_ONLY_WIT_IMD 11 +#define RC_RDMA_READ_REQUEST 12 +#define RC_RDMA_READ_RESP_FIRST 13 +#define RC_RDMA_READ_RESP_MIDDLE 14 +#define RC_RDMA_READ_RESP_LAST 15 +#define RC_RDMA_READ_RESP_ONLY 16 +#define RC_ACK 17 + +// Coyote STRM Opcode +#define CYT_STRM_CARD 0 +#define CYT_STRM_HOST 1 +#define CYT_STRM_RDMA 2 +#define CYT_STRM_TCP 3 +struct cyt_req_t{ + ap_uint rsrvd; // 19 bits + ap_uint offs; // 6 bits + ap_uint<1> host; // 1 bit + ap_uint<1> actv; // 1 bit + + ap_uint len; // 28 bits + ap_uint vaddr; // 48 bits + + ap_uint<1> last; // 1 bit -struct cyt_rdma_req_msg_t{ - ap_uint lvaddr; - ap_uint rvaddr; - ap_uint len; - ap_uint params; + ap_uint dest; // 4 bits + ap_uint pid; // 6 bits + ap_uint vfid; // 4 bits + + ap_uint<1> remote; // 1 bit + ap_uint<1> rdma; // 1 bit + ap_uint<1> mode; // 1 bit + ap_uint strm; // 2 bits + ap_uint opcode; // 5 bits - cyt_rdma_req_msg_t() : lvaddr(0), rvaddr(0), len(0), params(0) {} - cyt_rdma_req_msg_t(ap_uint in) { - lvaddr = in(CYT_RDMA_VADDR_BITS - 1, 0); - rvaddr = in(2 * CYT_RDMA_VADDR_BITS - 1, CYT_RDMA_VADDR_BITS); - len = in(2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS - 1, 2 * CYT_RDMA_VADDR_BITS); - params = in(CYT_RDMA_MSG_BITS - 1, 2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS); + // Default constructor + cyt_req_t() + : rsrvd(0), offs(0), host(0), actv(0), len(0), vaddr(0), last(0), + dest(0), pid(0), vfid(0), remote(0), rdma(0), mode(0), strm(0), opcode(0) {} + + // Parameterized constructor + cyt_req_t(ap_uint rsrvd_arg, ap_uint offs_arg, ap_uint<1> host_arg, ap_uint<1> actv_arg, + ap_uint len_arg, ap_uint vaddr_arg, ap_uint<1> last_arg, + ap_uint dest_arg, ap_uint pid_arg, ap_uint vfid_arg, + ap_uint<1> remote_arg, ap_uint<1> rdma_arg, ap_uint<1> mode_arg, ap_uint strm_arg, ap_uint opcode_arg) + : rsrvd(rsrvd_arg), offs(offs_arg), host(host_arg), actv(actv_arg), len(len_arg), vaddr(vaddr_arg), + last(last_arg), dest(dest_arg), pid(pid_arg), vfid(vfid_arg), remote(remote_arg), rdma(rdma_arg), + mode(mode_arg), strm(strm_arg), opcode(opcode_arg) {} + + // Constructor from a single ap_uint<128> argument + cyt_req_t(ap_uint<128> in) { + rsrvd = in(CYT_REQ_RSRVD_BITS - 1, 0); + offs = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS - 1, CYT_REQ_RSRVD_BITS); + host = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS); + actv = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1); + len = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1 + CYT_LEN_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2); + vaddr = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS); + last = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS); + dest = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + 1); + pid = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS); + vfid = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS); + remote = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS); + rdma = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1); + mode = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2); + strm = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3); + opcode = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS + CYT_OPCODE_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS); } - operator ap_uint() { - ap_uint ret; - ret(CYT_RDMA_VADDR_BITS - 1, 0) = lvaddr; - ret(2 * CYT_RDMA_VADDR_BITS - 1, CYT_RDMA_VADDR_BITS) = rvaddr; - ret(2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS - 1, 2 * CYT_RDMA_VADDR_BITS) = len; - ret(CYT_RDMA_MSG_BITS - 1, 2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS) = params; + + operator ap_uint<128>() { + ap_uint<128> ret; + + // Assigning fields to the appropriate bit positions in the 128-bit return value. + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS + CYT_OPCODE_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS) = opcode; // opcode + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3) = strm; // strm + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2) = mode; // mode + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1) = rdma; // rdma + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS) = remote; // remote + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS) = vfid; // vfid + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS) = pid; // pid + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + 1) = dest; // dest + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS) = last; // last + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS) = vaddr; // vaddr + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2) = len; // len + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1) = actv; // actv + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS) = host; // host + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS - 1, CYT_REQ_RSRVD_BITS) = offs; // offs + ret(CYT_REQ_RSRVD_BITS - 1, 0) = rsrvd; // rsrvd, disregard + return ret; } }; +struct cyt_ack_t { + ap_uint rsrvd; // 9 bits + ap_uint vfid; // 4 bits + ap_uint pid; // 6 bits + ap_uint dest; // 4 bits + ap_uint<1> host; // 1 bit + ap_uint<1> remote; // 1 bit + ap_uint strm; // 2 bits + ap_uint opcode; // 5 bits + + // Default constructor + cyt_ack_t() + : rsrvd(0), vfid(0), pid(0), dest(0), host(0), remote(0), strm(0), opcode(0) {} + + // Parameterized constructor + cyt_ack_t(ap_uint rsrvd_arg, + ap_uint vfid_arg, + ap_uint pid_arg, + ap_uint dest_arg, + ap_uint<1> host_arg, + ap_uint<1> remote_arg, + ap_uint strm_arg, + ap_uint opcode_arg) + : rsrvd(rsrvd_arg), vfid(vfid_arg), pid(pid_arg), dest(dest_arg), + host(host_arg), remote(remote_arg), strm(strm_arg), opcode(opcode_arg) {} + + // Constructor from a single ap_uint<32> argument + cyt_ack_t(ap_uint<32> in) { + opcode = in(31, 31 - CYT_OPCODE_BITS + 1); + strm = in(31 - CYT_OPCODE_BITS, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS + 1); + remote = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1); + host = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2); + dest = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 2); + pid = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 2); + vfid = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 * CYT_DEST_BITS - CYT_PID_BITS - 2); + rsrvd = in(CYT_ACK_RSRVD_BITS - 1, 0); // Remaining bits for reserved + } + + // Conversion operator to ap_uint<32> + operator ap_uint<32>() { + ap_uint<32> ret; + ret(31, 31 - CYT_OPCODE_BITS + 1) = opcode; + ret(31 - CYT_OPCODE_BITS, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS + 1) = strm; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1) = remote; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2) = host; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 2) = dest; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 2) = pid; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 * CYT_DEST_BITS - CYT_PID_BITS - 2) = vfid; + ret(CYT_ACK_RSRVD_BITS - 1, 0) = rsrvd; -void cyt_dma_adapter( - //DM command streams - hls::stream> &dma0_s2mm_cmd, - hls::stream> &dma1_s2mm_cmd, - hls::stream> &dma0_mm2s_cmd, - hls::stream> &dma1_mm2s_cmd, - //DM status streams - hls::stream> &dma0_s2mm_sts, - hls::stream> &dma1_s2mm_sts, - hls::stream> &dma0_mm2s_sts, - hls::stream> &dma1_mm2s_sts, -#ifdef ACCL_RDMA - //RDMA rd_req and wr_req - hls::stream> & rdma_wr_req, - hls::stream> & rdma_rd_req, -#endif - //Coyote Bypass interface command and status - hls::stream> &cyt_byp_wr_cmd, - hls::stream> &cyt_byp_wr_sts, - hls::stream> &cyt_byp_rd_cmd, - hls::stream> &cyt_byp_rd_sts -); \ No newline at end of file + return ret; + } +}; diff --git a/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp b/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp new file mode 100644 index 00000000..758f119d --- /dev/null +++ b/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp @@ -0,0 +1,112 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" + +using namespace std; + + +void cyt_cq_dm_sts_converter(hls::stream & cq_sts, + hls::stream> & dm0_sts, + hls::stream> & dm1_sts, + hls::stream>& dm0_meta, + hls::stream>& dm1_meta) +{ +#pragma HLS INTERFACE axis register port=cq_sts +#pragma HLS INTERFACE axis register port=dm0_sts +#pragma HLS INTERFACE axis register port=dm1_sts +#pragma HLS INTERFACE axis register port=dm0_meta +#pragma HLS INTERFACE axis register port=dm1_meta +#pragma HLS aggregate variable=cq_sts compact=bit + +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS PIPELINE II=1 + + enum fsmStateType {CYT_STS, DM_STS_0, DM_STS_1}; + static fsmStateType fsmState = CYT_STS; + + static cyt_ack_t cq_sts_word; + + static ap_axiu<32,0,0,0> dm_sts_word; + static ap_uint<1+4+23> dm_meta_word; + + switch (fsmState) + { + // no workaround anymore + case CYT_STS: + if (!STREAM_IS_EMPTY(cq_sts)) + { + cq_sts_word = STREAM_READ(cq_sts); + + // only process status if it is local memory completion status + // only send back ack when the cq_sts stems from kernel issued bypass commands with host == 0 + // if dest == 2, this comes from wr_req/rd_req, no need to forward to data mover + if((cq_sts_word.opcode == CYT_STRM_CARD || cq_sts_word.opcode == CYT_STRM_HOST) && cq_sts_word.host == 0 && (cq_sts_word.dest == 0 || cq_sts_word.dest == 1)) + { + if (cq_sts_word.dest == 0) { + fsmState = DM_STS_0; + } else if (cq_sts_word.dest == 1) { + fsmState = DM_STS_1; + } + } + else{ + fsmState = CYT_STS; + } + } + break; + case DM_STS_0: + if(!STREAM_IS_EMPTY(dm0_meta)){ + + dm_meta_word = STREAM_READ(dm0_meta); + + dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag + dm_sts_word.data.range(4,4) = 0; // internal error + dm_sts_word.data.range(5,5) = 0; // decode erro + dm_sts_word.data.range(6,6) = 0; // slave error + dm_sts_word.data.range(7,7) = 1; // OK + dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received + dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP + dm_sts_word.last = 1; + + STREAM_WRITE(dm0_sts, dm_sts_word); + + fsmState = CYT_STS; // todo: add the check of eop flag + } + break; + case DM_STS_1: + if(!STREAM_IS_EMPTY(dm1_meta)){ + + dm_meta_word = STREAM_READ(dm1_meta); + + dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag + dm_sts_word.data.range(4,4) = 0; // internal error + dm_sts_word.data.range(5,5) = 0; // decode erro + dm_sts_word.data.range(6,6) = 0; // slave error + dm_sts_word.data.range(7,7) = 1; // OK + dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received + dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP + dm_sts_word.last = 1; + + STREAM_WRITE(dm1_sts, dm_sts_word); + + fsmState = CYT_STS; // todo: add the check of eop flag + } + break; + + } +} + diff --git a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp b/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp deleted file mode 100644 index 300e991a..00000000 --- a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/******************************************************************************* -# Copyright (C) 2023 Advanced Micro Devices, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# *******************************************************************************/ - -#include "cyt.h" - -using namespace std; - -template -void dm_byp_cmd_converter(hls::stream>& dm_cmd, - hls::stream& byp_cmd, - hls::stream>& dm_meta - ) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if (!STREAM_IS_EMPTY(dm_cmd)) - { - ap_axiu<104,0,0,DEST_WIDTH> dm_cmd_with_dest = STREAM_READ(dm_cmd); - ap_uint<104> dm_cmd_word = dm_cmd_with_dest.data; - - ap_uint<23> btt = dm_cmd_word(22,0); - ap_uint<64> saddr = dm_cmd_word(95,32); - ap_uint<4> tag = dm_cmd_word(99,96); - ap_uint<1> strm = dm_cmd_with_dest.dest(2,0); // 1 if targeting host memory, 0 if targeting card memory - ap_uint<1> ctl = dm_cmd_word(30,30); // ctl field determines if a TLAST must be asserted at the end of the data stream - - cyt_req_t req(0, 0, 0, DMA_CHANNEL, 0, ctl, 0, strm, btt, saddr); - STREAM_WRITE(byp_cmd, req); - - ap_uint<1+4+23> dm_meta_word; - dm_meta_word(22,0) = btt; - dm_meta_word(26,23) = tag; - dm_meta_word(27,27) = ctl; - STREAM_WRITE(dm_meta, dm_meta_word); - } -} - -template -void rdma_req_byp_cmd_converter( - hls::stream& rdma_req, - hls::stream& byp_cmd -) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if(!STREAM_IS_EMPTY(rdma_req)){ - cyt_req_t req = STREAM_READ(rdma_req); - // TODO: - // Better mechanism of buffer & proc mapping - // Currently has to set the pid to 0, corresponding to coyote_proc instead of any coyote_qproc - // Every coyote_qproc has a unique physical address in device - cyt_req_t cmd(req.rsrvd, req.vfid, 0 /*req.pid*/, DMA_CHANNEL, 0, 1, 0, req.stream, req.len, req.vaddr); - STREAM_WRITE(byp_cmd, cmd); - } - -} - -void multiplexor(hls::stream& in0, - hls::stream& in1, - hls::stream& in2, - hls::stream& out) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - cyt_req_t currWord; - - if (!STREAM_IS_EMPTY(in0)) - { - currWord = STREAM_READ(in0); - STREAM_WRITE(out, currWord); - } - else if(!STREAM_IS_EMPTY(in1)) - { - currWord = STREAM_READ(in1); - STREAM_WRITE(out, currWord); - } - else if(!STREAM_IS_EMPTY(in2)) - { - currWord = STREAM_READ(in2); - STREAM_WRITE(out, currWord); - } - -} - - -void byp_dm_sts_converter(hls::stream> & byp_sts, - hls::stream> & dm0_sts, - hls::stream> & dm1_sts, - hls::stream>& dm0_meta, - hls::stream>& dm1_meta) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if (!STREAM_IS_EMPTY(byp_sts)) - { - ap_uint<16> byp_sts_word = STREAM_READ(byp_sts); - // PID in LSB according to Coyote dma_rsp_t: - ap_uint pid = byp_sts_word(CYT_PID_BITS-1,0); - ap_uint dest = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS-1,CYT_PID_BITS); - ap_uint<1> strm = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS,CYT_DEST_BITS+CYT_PID_BITS); - ap_uint<1> host = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS+1,CYT_DEST_BITS+CYT_PID_BITS+1); - - ap_axiu<32,0,0,0> dm_sts_word; - ap_uint<1+4+23> dm_meta_word; - - // only send back ack when the byp_sts stems from kernel issued bypass commands - // if dest == 2, this comes from wr_req/rd_req, no need to forward to data mover - if(host == 0) - { - do{ - if(dest == 0){ - dm_meta_word = STREAM_READ(dm0_meta); - } else if (dest == 1){ - dm_meta_word = STREAM_READ(dm1_meta); - } - dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag - dm_sts_word.data.range(4,4) = 0; // internal error - dm_sts_word.data.range(5,5) = 0; // decode error - dm_sts_word.data.range(6,6) = 0; // slave error - dm_sts_word.data.range(7,7) = 1; // OK - dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received - dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP - dm_sts_word.last = 1; - if(dest == 0){ - STREAM_WRITE(dm0_sts, dm_sts_word); - } else if (dest == 1){ - STREAM_WRITE(dm1_sts, dm_sts_word); - } - } while(dm_meta_word(27,27) == 0); - } - } - -} - -// The cyt bypass commands have 3 sources if RDMA is enabled -// 2 DMA channels from the CCLO and the rdma req interface -void cyt_dma_adapter( - //DM command streams - hls::stream> &dma0_s2mm_cmd, - hls::stream> &dma1_s2mm_cmd, - hls::stream> &dma0_mm2s_cmd, - hls::stream> &dma1_mm2s_cmd, - //DM status streams - hls::stream> &dma0_s2mm_sts, - hls::stream> &dma1_s2mm_sts, - hls::stream> &dma0_mm2s_sts, - hls::stream> &dma1_mm2s_sts, - - //RDMA rd_req and wr_req - hls::stream & rdma_wr_req, - hls::stream & rdma_rd_req, - - //Coyote Bypass interface command and status - hls::stream &cyt_byp_wr_cmd, - hls::stream> &cyt_byp_wr_sts, - hls::stream &cyt_byp_rd_cmd, - hls::stream> &cyt_byp_rd_sts -) { -#pragma HLS INTERFACE axis port=dma0_s2mm_cmd -#pragma HLS INTERFACE axis port=dma1_s2mm_cmd -#pragma HLS INTERFACE axis port=dma0_mm2s_cmd -#pragma HLS INTERFACE axis port=dma1_mm2s_cmd -#pragma HLS INTERFACE axis port=dma0_s2mm_sts -#pragma HLS INTERFACE axis port=dma1_s2mm_sts -#pragma HLS INTERFACE axis port=dma0_mm2s_sts -#pragma HLS INTERFACE axis port=dma1_mm2s_sts -#pragma HLS INTERFACE axis port=cyt_byp_rd_cmd -#pragma HLS INTERFACE axis port=cyt_byp_rd_sts -#pragma HLS INTERFACE axis port=cyt_byp_wr_cmd -#pragma HLS INTERFACE axis port=cyt_byp_wr_sts -#pragma HLS INTERFACE ap_ctrl_none port=return -#pragma HLS DATAFLOW disable_start_propagation - -#pragma HLS aggregate variable=cyt_byp_wr_cmd compact=bit -#pragma HLS aggregate variable=cyt_byp_rd_cmd compact=bit - -#pragma HLS INTERFACE axis port=rdma_wr_req -#pragma HLS INTERFACE axis port=rdma_rd_req -#pragma HLS aggregate variable=rdma_wr_req compact=bit -#pragma HLS aggregate variable=rdma_rd_req compact=bit - - static hls::stream byp_wr_cmd_0; - #pragma HLS stream variable=byp_wr_cmd_0 depth=16 - static hls::stream byp_wr_cmd_1; - #pragma HLS stream variable=byp_wr_cmd_1 depth=16 - static hls::stream byp_rd_cmd_0; - #pragma HLS stream variable=byp_rd_cmd_0 depth=16 - static hls::stream byp_rd_cmd_1; - #pragma HLS stream variable=byp_rd_cmd_1 depth=16 - - static hls::stream> dma0_mm2s_meta; - #pragma HLS stream variable=dma0_mm2s_meta depth=16 - static hls::stream> dma1_mm2s_meta; - #pragma HLS stream variable=dma1_mm2s_meta depth=16 - static hls::stream> dma0_s2mm_meta; - #pragma HLS stream variable=dma0_s2mm_meta depth=16 - static hls::stream> dma1_s2mm_meta; - #pragma HLS stream variable=dma1_s2mm_meta depth=16 - - static hls::stream byp_wr_cmd_2; - #pragma HLS stream variable=byp_wr_cmd_2 depth=16 - static hls::stream byp_rd_cmd_2; - #pragma HLS stream variable=byp_rd_cmd_2 depth=16 - - dm_byp_cmd_converter<0>(dma0_s2mm_cmd, byp_wr_cmd_0, dma0_s2mm_meta); - dm_byp_cmd_converter<1>(dma1_s2mm_cmd, byp_wr_cmd_1, dma1_s2mm_meta); - rdma_req_byp_cmd_converter<2>(rdma_wr_req, byp_wr_cmd_2); - multiplexor(byp_wr_cmd_0,byp_wr_cmd_1,byp_wr_cmd_2,cyt_byp_wr_cmd); - - - dm_byp_cmd_converter<0>(dma0_mm2s_cmd,byp_rd_cmd_0, dma0_mm2s_meta); - dm_byp_cmd_converter<1>(dma1_mm2s_cmd,byp_rd_cmd_1, dma1_mm2s_meta); - rdma_req_byp_cmd_converter<2>(rdma_rd_req, byp_rd_cmd_2); - multiplexor(byp_rd_cmd_0,byp_rd_cmd_1,byp_rd_cmd_2,cyt_byp_rd_cmd); - - byp_dm_sts_converter(cyt_byp_wr_sts, dma0_s2mm_sts, dma1_s2mm_sts, dma0_s2mm_meta, dma1_s2mm_meta); - byp_dm_sts_converter(cyt_byp_rd_sts, dma0_mm2s_sts, dma1_mm2s_sts, dma0_mm2s_meta, dma1_mm2s_meta); - - -} diff --git a/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp b/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp new file mode 100644 index 00000000..abb76505 --- /dev/null +++ b/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp @@ -0,0 +1,210 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" + +using namespace std; + +// convert the data mover command (dma) to the cyt_req_t (dma) +// currently all the memory accesses initialized by the CCLO is associated with pid 0 (coyote_proc) +// also we assume a vfid 0 for single cyt region +// the dest field of the dm cmd indicates the host/card accesses +// the dest field is converted to strm flag in the cyt_sq_cmd +// DMA Channel is used to select axis streams, channel 0 and 1 are reserved + +template +void dm_sq_cmd_converter(hls::stream>& dm_cmd, + hls::stream& cyt_sq_cmd, + hls::stream>& dm_meta + ) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + if (!STREAM_IS_EMPTY(dm_cmd)) + { + ap_axiu<104,0,0,DEST_WIDTH> dm_cmd_with_dest = STREAM_READ(dm_cmd); + ap_uint<104> dm_cmd_word = dm_cmd_with_dest.data; + + ap_uint<23> btt = dm_cmd_word(22,0); + ap_uint<64> saddr = dm_cmd_word(95,32); + ap_uint<4> tag = dm_cmd_word(99,96); + ap_uint<1> strm = dm_cmd_with_dest.dest(2,0); // 1 if targeting host memory, 0 if targeting card memory + ap_uint<1> ctl = dm_cmd_word(30,30); // ctl field determines if a TLAST must be asserted at the end of the data stream + + cyt_req_t req(0/*rsrvd_arg*/, 0 /*offs_arg*/, 0 /*host_arg*/, 0 /*actv_arg*/, + btt/*len_arg*/, saddr /*vaddr_arg*/, ctl /*last_arg*/, + DMA_CHANNEL /*dest_arg*/, 0 /*pid_arg*/, 0 /*vfid_arg*/, + 0 /*remote_arg*/, 0 /*rdma_arg*/, 0 /*mode_arg*/, strm /*strm_arg*/, 0 /*opcode_arg*/); + + STREAM_WRITE(cyt_sq_cmd, req); + + ap_uint<1+4+23> dm_meta_word; + dm_meta_word(22,0) = btt; + dm_meta_word(26,23) = tag; + dm_meta_word(27,27) = ctl; + STREAM_WRITE(dm_meta, dm_meta_word); + } +} + +// convert the cyt_rq (rdma) to cyt_sq (dma) +// Channel 2 of the host/card axis stream is reserved for cyt_rq command +// the rq dest field is used to indicate whether this is host/device access, it should be converted to strm field here +// the sq opcode is not relevant as it is targeting dma +template +void cyt_rq_sq_cmd_converter( + hls::stream& cyt_rq_cmd, + hls::stream& cyt_sq_cmd +) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + if(!STREAM_IS_EMPTY(cyt_rq_cmd)){ + cyt_req_t req = STREAM_READ(cyt_rq_cmd); + + // Currently has to set the pid to 0, corresponding to coyote_proc instead of any coyote_qproc + // Because all the buffer allocation within the ACCL driver is associated with the coyote_proc + // And every coyote_qproc has a unique physical address in device which is different than the coyote_proc + // Also mark the host flag in the new output command to 0 to indicate the command is issued from the kernel instead of host + // However, the cq of this command is not processed in the cq_dm_sts_converter as the dest channel is 2 + cyt_req_t cmd(req.rsrvd/*rsrvd_arg*/, req.offs /*offs_arg*/, 0/*host_arg*/, req.actv /*actv_arg*/, + req.len/*len_arg*/, req.vaddr /*vaddr_arg*/, req.last /*last_arg*/, + DMA_CHANNEL /*dest_arg*/, 0 /*pid_arg*/, req.vfid /*vfid_arg*/, + req.remote /*remote_arg*/, req.rdma /*rdma_arg*/, req.mode /*mode_arg*/, req.dest /*strm_arg*/, req.opcode /*opcode_arg*/); + + STREAM_WRITE(cyt_sq_cmd, cmd); + } + +} + +void multiplexor(hls::stream& in0, + hls::stream& in1, + hls::stream& in2, + hls::stream& in3, + hls::stream& out) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + cyt_req_t currWord; + + if (!STREAM_IS_EMPTY(in0)) + { + currWord = STREAM_READ(in0); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in1)) + { + currWord = STREAM_READ(in1); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in2)) + { + currWord = STREAM_READ(in2); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in3)) + { + currWord = STREAM_READ(in3); + STREAM_WRITE(out, currWord); + } + +} + + + +// The cyt sq commands have 4 sources if RDMA is enabled +// 2 DMA channels from the CCLO, CCLO sq command, and the Cyt rq interface +void cyt_dma_sq_adapter( + //DM command streams + hls::stream> &dma0_s2mm_cmd, + hls::stream> &dma1_s2mm_cmd, + hls::stream> &dma0_mm2s_cmd, + hls::stream> &dma1_mm2s_cmd, + + //Coyote rq rd_req and wr_req + hls::stream & cyt_rq_wr_cmd, + hls::stream & cyt_rq_rd_cmd, + + //CCLO sq command + hls::stream& cclo_sq_wr_cmd, + hls::stream& cclo_sq_rd_cmd, + + //Coyote sq interface command + hls::stream &cyt_sq_wr_cmd, + hls::stream &cyt_sq_rd_cmd, + + //DM command meta + hls::stream> & dma0_s2mm_meta, + hls::stream> & dma1_s2mm_meta, + hls::stream> & dma0_mm2s_meta, + hls::stream> & dma1_mm2s_meta +) { +#pragma HLS INTERFACE axis port=dma0_s2mm_cmd +#pragma HLS INTERFACE axis port=dma1_s2mm_cmd +#pragma HLS INTERFACE axis port=dma0_mm2s_cmd +#pragma HLS INTERFACE axis port=dma1_mm2s_cmd +#pragma HLS INTERFACE axis port=dma0_s2mm_meta +#pragma HLS INTERFACE axis port=dma1_s2mm_meta +#pragma HLS INTERFACE axis port=dma0_mm2s_meta +#pragma HLS INTERFACE axis port=dma1_mm2s_meta +#pragma HLS INTERFACE axis port=cyt_sq_rd_cmd +#pragma HLS INTERFACE axis port=cyt_sq_wr_cmd +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS DATAFLOW disable_start_propagation + +#pragma HLS aggregate variable=cyt_sq_wr_cmd compact=bit +#pragma HLS aggregate variable=cyt_sq_rd_cmd compact=bit + +#pragma HLS INTERFACE axis port=cyt_rq_wr_cmd +#pragma HLS INTERFACE axis port=cyt_rq_rd_cmd +#pragma HLS aggregate variable=cyt_rq_wr_cmd compact=bit +#pragma HLS aggregate variable=cyt_rq_rd_cmd compact=bit + +#pragma HLS INTERFACE axis port=cclo_sq_wr_cmd +#pragma HLS aggregate variable=cclo_sq_wr_cmd compact=bit +#pragma HLS INTERFACE axis port=cclo_sq_rd_cmd +#pragma HLS aggregate variable=cclo_sq_rd_cmd compact=bit + + static hls::stream sq_wr_cmd_0; + #pragma HLS stream variable=sq_wr_cmd_0 depth=16 + static hls::stream sq_wr_cmd_1; + #pragma HLS stream variable=sq_wr_cmd_1 depth=16 + static hls::stream sq_rd_cmd_0; + #pragma HLS stream variable=sq_rd_cmd_0 depth=16 + static hls::stream sq_rd_cmd_1; + #pragma HLS stream variable=sq_rd_cmd_1 depth=16 + + static hls::stream sq_wr_cmd_2; + #pragma HLS stream variable=sq_wr_cmd_2 depth=16 + static hls::stream sq_rd_cmd_2; + #pragma HLS stream variable=sq_rd_cmd_2 depth=16 + + dm_sq_cmd_converter<0>(dma0_s2mm_cmd, sq_wr_cmd_0, dma0_s2mm_meta); + dm_sq_cmd_converter<1>(dma1_s2mm_cmd, sq_wr_cmd_1, dma1_s2mm_meta); + cyt_rq_sq_cmd_converter<2>(cyt_rq_wr_cmd, sq_wr_cmd_2); + multiplexor(cclo_sq_wr_cmd, sq_wr_cmd_0,sq_wr_cmd_1,sq_wr_cmd_2, cyt_sq_wr_cmd); + + + dm_sq_cmd_converter<0>(dma0_mm2s_cmd,sq_rd_cmd_0, dma0_mm2s_meta); + dm_sq_cmd_converter<1>(dma1_mm2s_cmd,sq_rd_cmd_1, dma1_mm2s_meta); + cyt_rq_sq_cmd_converter<2>(cyt_rq_rd_cmd, sq_rd_cmd_2); + multiplexor(cclo_sq_rd_cmd, sq_rd_cmd_0,sq_rd_cmd_1,sq_rd_cmd_2, cyt_sq_rd_cmd); + + +} diff --git a/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp b/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp index da59244c..8b478808 100644 --- a/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp +++ b/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp @@ -38,23 +38,25 @@ void cyt_rdma_arbiter_meta( if (!STREAM_IS_EMPTY(s_meta)){ reqWord = STREAM_READ(s_meta); - if (reqWord.host == 0){ + // if (reqWord.strm == CYT_STRM_RDMA && reqWord.opcode == CYT_RDMA_SEND){ + if(reqWord.opcode == RC_SEND_FIRST || reqWord.opcode == RC_SEND_MIDDLE || reqWord.opcode == RC_SEND_LAST || reqWord.opcode == RC_SEND_ONLY ){ meta_notif.type = 0; //don't care meta_notif.session_id(CYT_PID_BITS-1,0) = reqWord.pid; - meta_notif.session_id(CYT_PID_BITS+CYT_DEST_BITS-1,CYT_PID_BITS) = reqWord.dest; + meta_notif.session_id(CYT_PID_BITS+CYT_DEST_BITS-1,CYT_PID_BITS) = reqWord.vfid; //TODO: check this meta_notif.length = reqWord.len; STREAM_WRITE(m_meta_0, meta_notif); - meta_internal(15,0) = reqWord.host; - meta_internal(31,16) = reqWord.stream; + meta_internal(15,0) = reqWord.dest; + meta_internal(31,16) = CYT_RDMA_SEND; meta_internal(63,32) = reqWord.len; STREAM_WRITE(meta_int, meta_internal); - } else if (reqWord.host == 1) { + // } else if (reqWord.strm == CYT_STRM_RDMA && reqWord.opcode == CYT_RDMA_WRITE) { + } else if (reqWord.opcode == RC_RDMA_WRITE_FIRST || reqWord.opcode == RC_RDMA_WRITE_MIDDLE || reqWord.opcode == RC_RDMA_WRITE_LAST || reqWord.opcode == RC_RDMA_WRITE_LAST_WITH_IMD || reqWord.opcode == RC_RDMA_WRITE_ONLY || reqWord.opcode == RC_RDMA_WRITE_ONLY_WIT_IMD) { + // simply forward the rq_wr and the conversion from rq_wr to sq_wr is done downstream STREAM_WRITE(m_meta_1, reqWord); - - meta_internal(15,0) = reqWord.host; - meta_internal(31,16) = reqWord.stream; + meta_internal(15,0) = reqWord.dest; + meta_internal(31,16) = CYT_RDMA_WRITE; meta_internal(63,32) = reqWord.len; STREAM_WRITE(meta_int, meta_internal); } @@ -68,7 +70,8 @@ void cyt_rdma_arbiter_meta( // We also append the last signal for WRITE data stream for each packet as the cyt adapter set the ctl bits always to 1 void cyt_rdma_arbiter_data( hls::stream >& meta_int, - hls::stream >& s_axis, + hls::stream >& s_axis_0, + hls::stream >& s_axis_1, hls::stream >& m_axis_0, hls::stream >& m_axis_1 ) @@ -76,14 +79,14 @@ void cyt_rdma_arbiter_data( #pragma HLS PIPELINE II=1 #pragma HLS INLINE off - enum fsmStateType {META, SEND_STREAM, WRITE_STREAM}; + enum fsmStateType {META, SEND_STREAM, WRITE_STREAM_CARD, WRITE_STREAM_HOST}; static fsmStateType fsmState = META; static ap_axiu<512, 0, 0, 8> currWord; static ap_uint<64> meta_internal; static ap_uint<16> meta_internal_host; - static ap_uint<16> meta_internal_stream; + static ap_uint<16> meta_internal_opcode; static ap_uint<32> meta_internal_len; static ap_uint<32> pkt_word; static ap_uint<32> word_cnt = 0; @@ -95,22 +98,24 @@ void cyt_rdma_arbiter_data( { meta_internal = STREAM_READ(meta_int); meta_internal_host = meta_internal(15,0); - meta_internal_stream = meta_internal(31,16); + meta_internal_opcode = meta_internal(31,16); meta_internal_len = meta_internal(63,32); pkt_word = (meta_internal_len + 63) >> 6; - if (meta_internal_host == 0){ + if (meta_internal_opcode == CYT_RDMA_SEND){ fsmState = SEND_STREAM; - } else if (meta_internal_host == 1){ - fsmState = WRITE_STREAM; + } else if (meta_internal_opcode == CYT_RDMA_WRITE && meta_internal_host == CYT_STRM_CARD){ + fsmState = WRITE_STREAM_CARD; + } else if (meta_internal_opcode == CYT_RDMA_WRITE && meta_internal_host == CYT_STRM_HOST){ + fsmState = WRITE_STREAM_HOST; } } break; case SEND_STREAM: - if (!s_axis.empty()) + if (!s_axis_0.empty()) { - currWord = STREAM_READ(s_axis); + currWord = STREAM_READ(s_axis_0); word_cnt++; if (word_cnt == pkt_word) { @@ -121,22 +126,43 @@ void cyt_rdma_arbiter_data( STREAM_WRITE(m_axis_0, currWord); } break; - case WRITE_STREAM: - if (!s_axis.empty()) + case WRITE_STREAM_CARD: + if (!s_axis_0.empty()) { - currWord = STREAM_READ(s_axis); + currWord = STREAM_READ(s_axis_0); ap_axiu<512, 0, 0, 8> outWord; outWord.data = currWord.data; outWord.keep = currWord.keep; outWord.last = currWord.last; - outWord.dest = meta_internal_stream; + outWord.dest = meta_internal_host; // use the host flag to indicate whether it is to host or device word_cnt++; if (word_cnt == pkt_word) { word_cnt = 0; - currWord.last = 1; + outWord.last = 1; + fsmState = META; + } + STREAM_WRITE(m_axis_1, outWord); + } + break; + case WRITE_STREAM_HOST: + if (!s_axis_1.empty()) + { + currWord = STREAM_READ(s_axis_1); + ap_axiu<512, 0, 0, 8> outWord; + + outWord.data = currWord.data; + outWord.keep = currWord.keep; + outWord.last = currWord.last; + outWord.dest = meta_internal_host; // use the host flag to indicate whether it is to host or device + word_cnt++; + + if (word_cnt == pkt_word) + { + word_cnt = 0; + outWord.last = 1; fsmState = META; } STREAM_WRITE(m_axis_1, outWord); @@ -145,15 +171,17 @@ void cyt_rdma_arbiter_data( } } -// check the host bit of the s_meta, which corresponds to the wr_req -// if host bit equals 0, this is a SEND Verb, route meta to eth notification and route data stream to channel 0 -// if host bit equals 1, this is an WRITE Verb, route meta and data to channel 1 -// if data routes to channel 1, set the meta_internal field according to the stream flag in the cyt_req_t to indicate host/card +// check the command type of the s_meta, which corresponds to the rq_wr +// the input data stream can come from either s_axis_0/s_axis_1, corresponding to rrsp_recv_0/1. It should be selected based on the dest field of the s_meta +// if the dest is CYT_STRM_CARD, then consume from s_axis_0, and if dest is CYT_STRM_HOST, consume from s_axis_1 +// if strm flag is CYT_STRM_RDMA, and the opcode is CYT_RDMA_SEND, this is a SEND Verb, route meta to eth notification and route data stream to channel 0 +// if strm flag is CYT_STRM_RDMA, and the opcode is CYT_RDMA_WRITE, this is a WRITE Verb, route meta and data to channel 1 +// if data routes to channel 1 (CYT_RDMA_WRITE), set the meta_internal field according to the dest flag in the cyt_req_t to indicate host/card // compact bit pragma required for cyt_req_t as this interfaces with Coyote. - void cyt_rdma_arbiter( hls::stream& s_meta, - hls::stream >& s_axis, + hls::stream >& s_axis_0, + hls::stream >& s_axis_1, hls::stream& m_meta_0, hls::stream >& m_axis_0, hls::stream& m_meta_1, @@ -161,7 +189,8 @@ void cyt_rdma_arbiter( ) { #pragma HLS INTERFACE axis register port=s_meta -#pragma HLS INTERFACE axis register port=s_axis +#pragma HLS INTERFACE axis register port=s_axis_1 +#pragma HLS INTERFACE axis register port=s_axis_0 #pragma HLS INTERFACE axis register port=m_meta_0 #pragma HLS INTERFACE axis register port=m_axis_0 #pragma HLS INTERFACE axis register port=m_meta_1 @@ -186,7 +215,8 @@ void cyt_rdma_arbiter( cyt_rdma_arbiter_data( meta_int, - s_axis, + s_axis_0, + s_axis_1, m_axis_0, m_axis_1 ); diff --git a/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp b/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp deleted file mode 100644 index 0febd79c..00000000 --- a/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/******************************************************************************* -# Copyright (C) 2023 Advanced Micro Devices, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# *******************************************************************************/ - -#include "cyt.h" -#include "eth_intf.h" - -using namespace std; - - -void cyt_rdma_mux_meta( - hls::stream& s_meta_0, - hls::stream& s_meta_1, - hls::stream& m_meta_0, - hls::stream& m_meta_1, - hls::stream >& meta_int - ) -{ - -#pragma HLS PIPELINE II=1 -#pragma HLS INLINE off - - static rdma_req_t s_metaWord_0; - static cyt_rdma_req_t m_metaWord_0; - static cyt_req_t s_metaWord_1; - static cyt_rdma_req_msg_t rdma_req_msg; - static ap_uint<8> dest = 0; - - // if there is a rdma_sq cmd - // sq command comes from CCLO only has WRITE and SEND Verb - if (!STREAM_IS_EMPTY(s_meta_0)){ - s_metaWord_0 = STREAM_READ(s_meta_0); - m_metaWord_0.opcode = s_metaWord_0.opcode; - m_metaWord_0.qpn = s_metaWord_0.qpn; - m_metaWord_0.host = 0; // data always managed by CCLO - m_metaWord_0.mode = 0; // always PARSE - m_metaWord_0.last = 1; // always assert last - m_metaWord_0.cmplt = 0; // no need to ack - m_metaWord_0.ssn = 0; - m_metaWord_0.offs = 0; - m_metaWord_0.rsrvd = 0; - - rdma_req_msg.lvaddr = 0; // we don't care about local vaddr - rdma_req_msg.rvaddr(47,0) = s_metaWord_0.vaddr; - rdma_req_msg.rvaddr(52,52) = s_metaWord_0.host; - rdma_req_msg.len = s_metaWord_0.len; - rdma_req_msg.params = 0; - - m_metaWord_0.msg = (ap_uint)rdma_req_msg; - - STREAM_WRITE(m_meta_0, m_metaWord_0); - dest = 0; - STREAM_WRITE(meta_int, dest); - } - else if (!STREAM_IS_EMPTY(s_meta_1)){ - s_metaWord_1 = STREAM_READ(s_meta_1); - STREAM_WRITE(m_meta_1, s_metaWord_1); - dest = 1; - STREAM_WRITE(meta_int, dest); - } -} - -void cyt_rdma_mux_data( - hls::stream >& meta_int, - hls::stream >& s_axis_0, - hls::stream >& s_axis_1, - hls::stream >& m_axis -) -{ - #pragma HLS PIPELINE II=1 - #pragma HLS INLINE off - - enum fsmStateType {META, STREAM_0, STREAM_1}; - static fsmStateType fsmState = META; - - static ap_axiu<512, 0, 0, 8> currWord; - - switch (fsmState) - { - case META: - if (!STREAM_IS_EMPTY(meta_int)) - { - ap_uint<8> dest = STREAM_READ(meta_int); - if (dest == 0){ - fsmState = STREAM_0; - } else { - fsmState = STREAM_1; - } - } - break; - case STREAM_0: - if (!STREAM_IS_EMPTY(s_axis_0)) - { - currWord = STREAM_READ(s_axis_0); - STREAM_WRITE(m_axis, currWord); - if (currWord.last) // TODO: check by cnt instead of last - { - fsmState = META; - } - } - break; - case STREAM_1: - if (!STREAM_IS_EMPTY(s_axis_1)) - { - currWord = STREAM_READ(s_axis_1); - STREAM_WRITE(m_axis, currWord); - if (currWord.last) // TODO: check by cnt instead of last - { - fsmState = META; - } - } - break; - } -} - - -// cyt rdma mux will arbitrate the data stream according to the accepted command signal -// the command can be either rdma_sq or the rd_req -// the data stream can be data stream coming from the cclo or from the host/card data stream -// these two streams are mux into single rdma m_axis data stream - -void cyt_rdma_mux( - hls::stream& s_meta_0, - hls::stream >& s_axis_0, - hls::stream& s_meta_1, - hls::stream >& s_axis_1, - hls::stream& m_meta_0, - hls::stream& m_meta_1, - hls::stream >& m_axis - ) -{ -#pragma HLS INTERFACE axis register port=s_meta_0 -#pragma HLS INTERFACE axis register port=s_axis_0 -#pragma HLS INTERFACE axis register port=s_meta_1 -#pragma HLS INTERFACE axis register port=s_axis_1 -#pragma HLS INTERFACE axis register port=m_meta_0 -#pragma HLS INTERFACE axis register port=m_meta_1 -#pragma HLS INTERFACE axis register port=m_axis -#pragma HLS aggregate variable=s_meta_0 compact=bit -#pragma HLS aggregate variable=s_meta_1 compact=bit -#pragma HLS aggregate variable=m_meta_0 compact=bit -#pragma HLS aggregate variable=m_meta_1 compact=bit - -#pragma HLS INTERFACE ap_ctrl_none port=return - -#pragma HLS DATAFLOW disable_start_propagation - - static hls::stream > meta_int; - #pragma HLS STREAM depth=4 variable=meta_int - - cyt_rdma_mux_meta( - s_meta_0, - s_meta_1, - m_meta_0, - m_meta_1, - meta_int - ); - - cyt_rdma_mux_data( - meta_int, - s_axis_0, - s_axis_1, - m_axis - ); - -} \ No newline at end of file diff --git a/test/host/Coyote/CMakeLists.txt b/test/host/Coyote/CMakeLists.txt index 7c6b9842..78b7ca5c 100644 --- a/test/host/Coyote/CMakeLists.txt +++ b/test/host/Coyote/CMakeLists.txt @@ -16,6 +16,8 @@ set(EN_AVX 1 CACHE STRING "AVX environment.") add_subdirectory(${CMAKE_SOURCE_DIR}/../../../driver/xrt/ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/xrt/) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) # Exec set(EXEC accl_on_coyote) diff --git a/test/host/Coyote/run_scripts/flow_u55c.sh b/test/host/Coyote/run_scripts/flow_u55c.sh index 8976bb7a..f58dfcb7 100755 --- a/test/host/Coyote/run_scripts/flow_u55c.sh +++ b/test/host/Coyote/run_scripts/flow_u55c.sh @@ -2,7 +2,7 @@ # parameters SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_RDMA/lynx/lynx.runs/impl_1/cyt_top +FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/coyote_build_RDMA_u55c/bitstreams/cyt_top # FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_TCP/lynx/lynx.runs/impl_1/cyt_top DRIVER_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/driver/ @@ -81,12 +81,12 @@ if [ $HOT_RESET -eq 1 ]; then for servid in "${SERVID[@]}"; do boardidx=$(expr $servid - 1) host="alveo-u55c-$(printf "%02d" $servid)" - ssh -q -tt $host "sudo insmod $DRIVER_PATH/coyote_drv.ko ip_addr_q0=${IPADDR[boardidx]} mac_addr_q0=${MACADDR[boardidx]}" & + ssh -q -tt $host "sudo insmod $DRIVER_PATH/coyote_drv.ko ip_addr=${IPADDR[boardidx]} mac_addr=${MACADDR[boardidx]}" & done wait echo "Driver loaded." - echo "Getting permissions for fpga..." - parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" + # echo "Getting permissions for fpga..." + # parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" echo "Done." fi diff --git a/test/host/Coyote/run_scripts/run.sh b/test/host/Coyote/run_scripts/run.sh index 59b628a7..808adbb9 100755 --- a/test/host/Coyote/run_scripts/run.sh +++ b/test/host/Coyote/run_scripts/run.sh @@ -8,7 +8,7 @@ fi # state variables mkdir -p "$(pwd)/accl_log" -BUILD_DIR=../build +BUILD_DIR=.. EXEC=$BUILD_DIR/accl_on_coyote HOST_FILE=./accl_log/host FPGA_FILE=./accl_log/fpga @@ -40,25 +40,25 @@ done #define ACCL_BARRIER 12 ARG=" -d -f -r" # debug, hardware, and tcp/rdma flags -TEST_MODE=(10) -N_ELEMENTS=(512) # 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 +TEST_MODE=(0) +N_ELEMENTS=(64) # 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 NRUN=(1) # number of runs HOST=(1) -PROTOC=(1) # eager=0, rendezevous=1 +PROTOC=(0) # eager=0, rendezevous=1 echo "Run command: $EXEC $ARG -y $TEST_MODE -c 1024 -l $FPGA_FILE" rm -f $(pwd)/accl_log/rank* -for NP in `seq 4 $NUM_PROCESS`; do +for NP in `seq $NUM_PROCESS $NUM_PROCESS`; do for MODE in ${TEST_MODE[@]}; do for N_ELE in ${N_ELEMENTS[@]}; do for H in ${HOST[@]}; do for P in ${PROTOC[@]}; do N=$N_ELE echo "mpirun -n $NP -f $HOST_FILE --iface ens4 $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN &" - mpirun -n $NP -f $HOST_FILE --iface ens4f0 -outfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" -errfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN & - SLEEPTIME=2 + /mnt/scratch/zhe/mpich/install/bin/mpirun -n $NP -f $HOST_FILE --iface enp65s0f0np0 -outfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" -errfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN -e & + SLEEPTIME=10 sleep $SLEEPTIME parallel-ssh -H "$HOST_LIST" "kill -9 \$(ps -aux | grep accl_on_coyote | awk '{print \$2}')" parallel-ssh -H "$HOST_LIST" "dmesg | grep "fpga_tlb_miss_isr" >$(pwd)/accl_log/tlb_miss.log" diff --git a/test/host/Coyote/run_scripts/run_eval.sh b/test/host/Coyote/run_scripts/run_eval.sh new file mode 100755 index 00000000..8f2f4c1a --- /dev/null +++ b/test/host/Coyote/run_scripts/run_eval.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +#check working directory +if [[ $(pwd) != */test/host/Coyote/run_scripts ]]; then + echo "ERROR: this script should only be run in the /test/host/Coyote/run_scripts of the repo!" + exit 1 +fi + +# state variables +mkdir -p "$(pwd)/accl_log" +BUILD_DIR=.. +EXEC=$BUILD_DIR/accl_on_coyote +HOST_FILE=./accl_log/host +FPGA_FILE=./accl_log/fpga + +# read server ids from user +echo "Enter u55c machine ids (space separated):" +read -a SERVID + +# create ip files +rm -f $HOST_FILE $FPGA_FILE +NUM_PROCESS=0 +for ID in ${SERVID[@]}; do + echo "10.253.74.$(((ID-1) * 4 + 66))">>$HOST_FILE + echo "10.253.74.$(((ID-1) * 4 + 68))">>$FPGA_FILE + NUM_PROCESS=$((NUM_PROCESS+1)) + HOST_LIST+="alveo-u55c-$(printf "%02d" $ID) " +done + + +# Test Mode +#define ALL 0 +#define ACCL_SEND 3 +#define ACCL_BCAST 5 +#define ACCL_SCATTER 6 +#define ACCL_GATHER 7 +#define ACCL_REDUCE 8 +#define ACCL_ALLGATHER 9 +#define ACCL_ALLREDUCE 10 +#define ACCL_BARRIER 12 + +# read N_ELEMENTS as argument or use default +if [[ -z "$1" ]]; then + echo "No N_ELEMENTS passed as argument, using default value: 16384" + N_ELEMENTS=(16384) +else + echo "Using provided N_ELEMENTS: $1" + N_ELEMENTS=($1) +fi + +ARG=" -d -f -r" # debug, hardware, and tcp/rdma flags +TEST_MODE=(6) +NRUN=(1) # number of runs +HOST=(0) +PROTOC=(0) # eager=0, rendezevous=1 + +echo "Run command: $EXEC $ARG -y $TEST_MODE -c 1024 -l $FPGA_FILE" + +rm -f $(pwd)/accl_log/rank* + +for NP in `seq $NUM_PROCESS $NUM_PROCESS`; do + for MODE in ${TEST_MODE[@]}; do + for N_ELE in ${N_ELEMENTS[@]}; do + for H in ${HOST[@]}; do + for P in ${PROTOC[@]}; do + N=$N_ELE + echo "mpirun -n $NP -f $HOST_FILE --iface ens4 $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN &" + /mnt/scratch/zhe/mpich/install/bin/mpirun -n $NP -f $HOST_FILE --iface enp65s0f0np0 -outfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" -errfile-pattern "./accl_log/rank_%r_M_${MODE}_N_${N}_H_${H}_P_${P}_stdout" $EXEC $ARG -z $H -y $MODE -c $N -l $FPGA_FILE -p $P -n $NRUN -e & + SLEEPTIME=15 + sleep $SLEEPTIME + parallel-ssh -H "$HOST_LIST" "kill -9 \$(ps -aux | grep accl_on_coyote | awk '{print \$2}')" + parallel-ssh -H "$HOST_LIST" "dmesg | grep "fpga_tlb_miss_isr" >$(pwd)/accl_log/tlb_miss.log" + done + done + done + done +done + +mkdir -p "$(pwd)/accl_results" +# Loop through accl log files in the source directory and append to accl_results folder +for source_log in "$(pwd)/accl"*.log; do + # Extract the log number from the source log file name (assuming the format is acclX.log) + log_number=$(basename "${source_log}" | sed 's/accl\([0-9]*\)\.log/\1/') + # Create the destination log file path + destination_log="$(pwd)/accl_results/accl${log_number}.log" + # Append the content of the source log to the destination log + cat "${source_log}" >> "${destination_log}" + # Remove the tmp log + rm ${source_log} +done diff --git a/test/host/Coyote/run_scripts/run_the_run.sh b/test/host/Coyote/run_scripts/run_the_run.sh new file mode 100644 index 00000000..e1f6a7d9 --- /dev/null +++ b/test/host/Coyote/run_scripts/run_the_run.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Paths (adjust these if needed) +UTIL_PATH="/mnt/scratch/jooertli/ACCL_CYT_V2_EGR_HOST/test/refdesigns/Coyote/util" +BITSTREAM_PATH="/mnt/scratch/jooertli/ACCL_CYT_V2_EGR_HOST/test/refdesigns/coyote_build_RDMA_u55c_norrsp_nocomp_opt_tlb2_eager_host/bitstreams/cyt_top.bit" +DRIVER_PATH="/mnt/scratch/jooertli/ACCL_CYT_V2_EGR_HOST/test/refdesigns/Coyote/driver/build/coyote_driver.ko" + +# Get FPGA numbers from the user for programming +read -p "Enter FPGA numbers (separated by space, e.g., '1 2'): " FPGA_INPUTS + +NRUN=10 +N_ELEMENTS=(4096 16384 65536 262144 524288) #4096 16384 65536 262144 524288 1048576 +for N_ELE in ${N_ELEMENTS[@]}; do + for (( run=1; run<=NRUN; run++ )); do + echo "Run #$run for N_ELEMENTS=$N_ELE" + # Run the programming script + echo "Programming FPGAs: $FPGA_INPUTS ..." + bash "$UTIL_PATH/program_hacc_remote.sh" "$BITSTREAM_PATH" "$DRIVER_PATH" <<< "$FPGA_INPUTS" + SLEEPTIME=6 + sleep $SLEEPTIME + # Run run_eval.sh with current N_ELE and same FPGA_INPUTS + echo "Now running run_eval.sh with N_ELEMENTS=$N_ELE (run #$run)..." + bash "$(dirname "$0")/run_eval.sh" "$N_ELE" <<< "$FPGA_INPUTS" + SLEEPTIME=3 + sleep $SLEEPTIME + done +done +echo "Done!" \ No newline at end of file diff --git a/test/host/Coyote/test.cpp b/test/host/Coyote/test.cpp index c7c21216..5c80974b 100644 --- a/test/host/Coyote/test.cpp +++ b/test/host/Coyote/test.cpp @@ -65,6 +65,7 @@ struct options_t unsigned int protoc; std::string xclbin; std::string fpgaIP; + bool eagerRx_host; }; struct timestamp_t @@ -276,6 +277,7 @@ options_t parse_options(int argc, char *argv[]) "i", "device-index", "device index of FPGA if hardware mode is used", false, 0, "positive integer"); cmd.add(device_index_arg); + TCLAP::SwitchArg eager_arg("e", "eager_host", "Eager Buffers on host", cmd, false); cmd.parse(argc, argv); if (hardware_arg.getValue()) { @@ -337,6 +339,7 @@ options_t parse_options(int argc, char *argv[]) opts.xclbin = xclbin_arg.getValue(); opts.fpgaIP = fpgaIP_arg.getValue(); opts.protoc = protoc_arg.getValue(); + opts.eagerRx_host = eager_arg.getValue(); std::cout << "count:" << opts.count << " rxbuf_size:" << opts.rxbuf_size << " seg_size:" << opts.seg_size << " num_rxbufmem:" << opts.num_rxbufmem << std::endl; return opts; @@ -354,24 +357,24 @@ options_t parse_options(int argc, char *argv[]) } -void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int local_rank, std::vector &ibvQpConn_vec, std::vector &ranks) +void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int local_rank, std::vector &ranks, ACCL::CoyoteDevice* device) { if (local_rank == master_rank) { std::cout<<"Local rank "<getQpairStruct()->local), sizeof(fpga::ibvQ), MPI_CHAR, slave_rank, 0, MPI_COMM_WORLD); + MPI_Send(&(device->coyote_qProc_vec[slave_rank]->getQpair()->local), sizeof(coyote::ibvQ), MPI_CHAR, slave_rank, 0, MPI_COMM_WORLD); } else if (local_rank == slave_rank) { std::cout<<"Local rank "<getQpairStruct()->remote = received_q; + device->coyote_qProc_vec[master_rank]->getQpair()->remote = received_q; } // Synchronize after the first exchange to avoid race conditions @@ -381,17 +384,17 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int { std::cout<<"Local rank "<getQpairStruct()->local), sizeof(fpga::ibvQ), MPI_CHAR, master_rank, 0, MPI_COMM_WORLD); + MPI_Send(&(device->coyote_qProc_vec[master_rank]->getQpair()->local), sizeof(coyote::ibvQ), MPI_CHAR, master_rank, 0, MPI_COMM_WORLD); } else if (local_rank == master_rank) { std::cout<<"Local rank "<getQpairStruct()->remote = received_q; + device->coyote_qProc_vec[slave_rank]->getQpair()->remote = received_q; } MPI_Barrier(MPI_COMM_WORLD); @@ -399,20 +402,22 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int // write established connection to hardware and perform arp lookup if (local_rank == master_rank) { - int connection = (ibvQpConn_vec[slave_rank]->getQpairStruct()->local.qpn & 0xFFFF) | ((ibvQpConn_vec[slave_rank]->getQpairStruct()->remote.qpn & 0xFFFF) << 16); - ibvQpConn_vec[slave_rank]->getQpairStruct()->print(); - ibvQpConn_vec[slave_rank]->setConnection(connection); - ibvQpConn_vec[slave_rank]->writeContext(ranks[slave_rank].port); - ibvQpConn_vec[slave_rank]->doArpLookup(); - ranks[slave_rank].session_id = ibvQpConn_vec[slave_rank]->getQpairStruct()->local.qpn; + int connection = (device->coyote_qProc_vec[slave_rank]->getQpair()->local.qpn & 0xFFFF) | ((device->coyote_qProc_vec[slave_rank]->getQpair()->remote.qpn & 0xFFFF) << 16); + device->coyote_qProc_vec[slave_rank]->getQpair()->local.print("Local "); + device->coyote_qProc_vec[slave_rank]->getQpair()->remote.print("Remote"); + //device->coyote_qProc_vec[slave_rank]->setConnection(connection); + device->coyote_qProc_vec[slave_rank]->writeQpContext(ranks[slave_rank].port); + device->coyote_qProc_vec[slave_rank]->doArpLookup(device->coyote_qProc_vec[slave_rank]->getQpair()->remote.ip_addr); + ranks[slave_rank].session_id = device->coyote_qProc_vec[slave_rank]->getQpair()->local.qpn; } else if (local_rank == slave_rank) { - int connection = (ibvQpConn_vec[master_rank]->getQpairStruct()->local.qpn & 0xFFFF) | ((ibvQpConn_vec[master_rank]->getQpairStruct()->remote.qpn & 0xFFFF) << 16); - ibvQpConn_vec[master_rank]->getQpairStruct()->print(); - ibvQpConn_vec[master_rank]->setConnection(connection); - ibvQpConn_vec[master_rank]->writeContext(ranks[master_rank].port); - ibvQpConn_vec[master_rank]->doArpLookup(); - ranks[master_rank].session_id = ibvQpConn_vec[master_rank]->getQpairStruct()->local.qpn; + int connection = (device->coyote_qProc_vec[master_rank]->getQpair()->local.qpn & 0xFFFF) | ((device->coyote_qProc_vec[master_rank]->getQpair()->remote.qpn & 0xFFFF) << 16); + device->coyote_qProc_vec[master_rank]->getQpair()->local.print("Local "); + device->coyote_qProc_vec[master_rank]->getQpair()->remote.print("Remote"); + //device->coyote_qProc_vec[master_rank]->setConnection(connection); + device->coyote_qProc_vec[master_rank]->writeQpContext(ranks[master_rank].port); + device->coyote_qProc_vec[master_rank]->doArpLookup(device->coyote_qProc_vec[slave_rank]->getQpair()->remote.ip_addr); + ranks[master_rank].session_id = device->coyote_qProc_vec[master_rank]->getQpair()->local.qpn; } MPI_Barrier(MPI_COMM_WORLD); @@ -422,63 +427,129 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int void configure_cyt_rdma(std::vector &ranks, int local_rank, ACCL::CoyoteDevice* device) { - std::cout<<"Initializing QP connections..."< ibvQpConn_vec; - // create single page dummy memory space for each qp - uint32_t n_pages = 1; - for(int i=0; icoyote_qProc_vec[i], ranks[local_rank].ip, n_pages); - ibvQpConn_vec.push_back(qpConn); - // qpConn->getQpairStruct()->print(); - } + // std::cout<<"Initializing QP connections..."< ibvQp_vec; + // // create single page dummy memory space for each qp + // uint32_t n_pages = 1; + // for(int i=0; icoyote_qProc_vec[i], ranks[local_rank].ip, n_pages); + // ibvQp_vec.push_back(qpConn); + // // qpConn->getQpair()->print(); + // } std::cout<<"Exchanging QP..."< &ranks, int local_rank, ACCL::CoyoteDevice* device) -{ - std::cout<<"Configuring Coyote TCP..."<get_device()->doArpLookup(_ip_encode(ranks[i].ip)); - } - } - - //open port - for (int i=0; iget_device()->tcpOpenPort(dstPort); - } - - std::this_thread::sleep_for(10ms); - - //open con - for (int i=0; iget_device()->tcpOpenCon(dstIp, dstPort, &session); - ranks[i].session_id = session; - } - } +// void configure_cyt_tcp(std::vector &ranks, int local_rank, ACCL::CoyoteDevice* device) +// { +// std::cout<<"Configuring Coyote TCP..."<get_device()->doArpLookup(_ip_encode(ranks[i].ip)); +// } +// } + +// //open port +// for (int i=0; iget_device()->tcpOpenPort(dstPort); +// } + +// std::this_thread::sleep_for(10ms); + +// //open con +// for (int i=0; iget_device()->tcpOpenCon(dstIp, dstPort, &session); +// ranks[i].session_id = session; +// } +// } + +// } + +void test_copy(ACCL::ACCL &accl, options_t &options){ + std::cout << "Start copy test..." << std::endl<(count, dataType::float32); + auto res_buf = accl.create_coyotebuffer(count, dataType::float32); + int errors = 0; -} + if (options.count*sizeof(dataType::float32) > options.rxbuf_size){ + std::cout<<"experiment size larger than buffer size, exiting..."<buffer()[i] = (float)i; + res_buf.get()->buffer()[i] = -999.0f; + } + op_buf.get()->buffer()[0] = (float) 5; + if (options.host == 0){ op_buf->sync_to_device(); } + if (options.host == 0){ res_buf->sync_to_device(); } + // Print buffer addresses for debugging + //std::cout << "Source buffer address: " << op_buf.get()->buffer() << std::endl; + //std::cout << "Result buffer address: " << res_buf.get()->buffer() << std::endl; + for (int n = 0; n < options.nruns; n++) + { + //copy + MPI_Barrier(MPI_COMM_WORLD); + + double durationUs = 0.0; + double tput = 0.0; + auto start = std::chrono::high_resolution_clock::now(); + + ACCL::ACCLRequest* req; + req = accl.copy(*op_buf, *res_buf, count, true, true, false); + accl.wait(req, 1000ms); + + auto end = std::chrono::high_resolution_clock::now(); + durationUs = (std::chrono::duration_cast(end-start).count() / 1000.0); + tput = (options.count*sizeof(dataType::int32)*8.0)/(durationUs*1000.0); + durationUs = (double)accl.get_duration(req)/1000.0; + tput = (options.count*sizeof(dataType::int32)*8.0)/(durationUs*1000.0); + if(durationUs > 1.0){ + accl_log(mpi_rank, format_log("copy", options, durationUs, tput)); + } + std::cout << std::endl; + //compare results + if (options.host == 0){ op_buf->sync_from_device(); } + if (options.host == 0){ res_buf->sync_from_device(); } + for (int i = 0; i < count; i++) { + if (res_buf.get()->buffer()[i] != op_buf.get()->buffer()[i]) { + std::cout << std::to_string(i + 1) + "th item is incorrect!" << std::endl; + errors += 1; + } + } + if (errors > 0) { + std::cout << "Copy test failed with " << errors << " errors out of " << count << " elements!" << std::endl; + failed_tests++; + } else { + std::cout << "Copy test successful!" << std::endl; + } + } + // Free buffers + std::cout << "Freeing buffers..." << std::endl; + op_buf->free_buffer(); + res_buf->free_buffer(); +} void test_sendrcv(ACCL::ACCL &accl, options_t &options) { std::cout << "Start send recv test..." << std::endl<(bufsize, dataType::int32); + auto op_buf = accl.create_coyotebuffer(bufsize, dataType::int32); + //std::cout << "Buffer address: " << op_buf.get()->buffer() << std::endl; for (int n = 0; n < options.nruns; n++) { std::cout << "Repetition " <buffer()[i] = (mpi_rank == 0) ? i : -1; + op_buf.get()->buffer()[0] = (int) 5; if (options.host == 0){ op_buf->sync_to_device(); } @@ -538,12 +610,14 @@ void test_sendrcv(ACCL::ACCL &accl, options_t &options) { int errors = 0; if (options.host == 0){ op_buf->sync_from_device(); } - if (mpi_rank == 1) { for (int i = 0; i < bufsize; i++) { - unsigned int res = op_buf.get()->buffer()[i]; - unsigned int ref = i; + float res = op_buf.get()->buffer()[i]; + float ref = i + n; + if(i == 0){ + ref = 5 + n; + } if (res != ref) { std::cout << std::to_string(i + 1) + "th item is incorrect! (" + std::to_string(res) + " != " + std::to_string(ref) + ")" @@ -559,6 +633,7 @@ void test_sendrcv(ACCL::ACCL &accl, options_t &options) { } else { std::cout << "Test is successful!" << std::endl; } + debug(accl.dump_eager_rx_buffers(false)); } op_buf->free_buffer(); @@ -901,8 +976,8 @@ void test_reduce(ACCL::ACCL &accl, options_t &options, int root, for (int i = 0; i < count; i++) op_buf.get()->buffer()[i] = i; for (int i = 0; i < count; i++) res_buf.get()->buffer()[i] = 0; - if (options.host == 0){ op_buf->sync_to_device(); } - if (options.host == 0){ res_buf->sync_to_device(); } + if (options.host == 1){ op_buf->sync_to_device(); } + if (options.host == 1){ res_buf->sync_to_device(); } test_debug("Reduce data to " + std::to_string(root) + "...", options); @@ -1081,8 +1156,10 @@ void test_accl_base(options_t options) MPI_Barrier(MPI_COMM_WORLD); if (options.tcp){ - device = new ACCL::CoyoteDevice(); - configure_cyt_tcp(ranks, local_rank, device); + std::cout<<"ACCL with Coyote V2 TCP not supported"<(device); if (options.protoc == 0){ + bool eagerBufs = false; + if(options.eagerRx_host){ + eagerBufs = true; + } std::cout<<"Eager Protocol"<initialize(ranks, mpi_rank, - mpi_size+2, options.rxbuf_size, options.seg_size, 4096*1024*2); + mpi_size+3, options.rxbuf_size, 4096*1024, 4096*1024*2, eagerBufs); } else if (options.protoc == 1){ std::cout<<"Rendezvous Protocol"<initialize(ranks, mpi_rank, mpi_size, 64, 64, options.seg_size); @@ -1141,7 +1222,8 @@ void test_accl_base(options_t options) MPI_Barrier(MPI_COMM_WORLD); - + test_copy(*accl, options); + if(options.test_mode == ACCL_SEND || options.test_mode == 0){ debug(accl->dump_eager_rx_buffers(false)); MPI_Barrier(MPI_COMM_WORLD); @@ -1177,7 +1259,8 @@ void test_accl_base(options_t options) debug(accl->dump_communicator()); debug(accl->dump_eager_rx_buffers(false)); } - if(options.test_mode == ACCL_REDUCE || options.test_mode == 0){ + //comment out non working tests + /*if(options.test_mode == ACCL_REDUCE || options.test_mode == 0){ debug(accl->dump_eager_rx_buffers(false)); MPI_Barrier(MPI_COMM_WORLD); int root = 0; @@ -1191,7 +1274,7 @@ void test_accl_base(options_t options) test_allreduce(*accl, options, reduceFunction::SUM); debug(accl->dump_communicator()); debug(accl->dump_eager_rx_buffers(false)); - } + }*/ if(options.test_mode == ACCL_BARRIER){ std::cout << "Start barrier test..."<< std::endl; for (int n = 0; n < options.nruns; n++) @@ -1208,7 +1291,7 @@ void test_accl_base(options_t options) } } - + MPI_Barrier(MPI_COMM_WORLD); if (failed_tests == 0){ std::cout << "\nACCL base functionality test completed successfully!\n" << std::endl; } diff --git a/test/refdesigns/Coyote b/test/refdesigns/Coyote index ef4853fc..4f5b7d5b 160000 --- a/test/refdesigns/Coyote +++ b/test/refdesigns/Coyote @@ -1 +1 @@ -Subproject commit ef4853fc4eefc2768213179abe1b9278834bd2d2 +Subproject commit 4f5b7d5b2a19be7dda3e454d10e642c19538b5c3 diff --git a/test/refdesigns/Makefile b/test/refdesigns/Makefile index a5ff4ed6..4534bbb8 100644 --- a/test/refdesigns/Makefile +++ b/test/refdesigns/Makefile @@ -77,10 +77,11 @@ COMPRESSION_XO=../../kernels/plugins/hp_compression/hp_compression_$(FPGAPART).x LOOPBACK_XO=../../kernels/plugins/loopback/loopback_$(FPGAPART).xo TCP_SESS_XO=../../kernels/plugins/tcp_session_handler/tcp_session_handler_$(FPGAPART).xo VADD_XO=../../kernels/plugins/vadd_put/vadd_put_$(FPGAPART).xo -CYT_DMA_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cyt_dma_adapter_$(FPGAPART).xo +CYT_DMA_SQ_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cyt_dma_sq_adapter_$(FPGAPART).xo +CYT_CQ_DM_STS_CONVERTER_XO=../../kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter_$(FPGAPART).xo CYT_RDMA_ARBITER_XO=../../kernels/plugins/cyt_adapter/cyt_rdma_arbiter_$(FPGAPART).xo -CYT_RDMA_MUX_XO=../../kernels/plugins/cyt_adapter/cyt_rdma_mux_$(FPGAPART).xo -CYT_ADAPTER_XO = $(CYT_DMA_ADAPTER_XO) $(CYT_RDMA_ARBITER_XO) $(CYT_RDMA_MUX_XO) +CCLO_SQ_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cclo_sq_adapter_$(FPGAPART).xo +CYT_ADAPTER_XO = $(CYT_DMA_SQ_ADAPTER_XO) $(CYT_RDMA_ARBITER_XO) $(CCLO_SQ_ADAPTER_XO) $(CYT_CQ_DM_STS_CONVERTER_XO) HWEMU_MST_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_master_512.xo HWEMU_SLV_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_slave_512.xo @@ -193,7 +194,7 @@ else ifeq (coyote_rdma, $(MODE)) N_DDR_CHAN = 2 endif OTHER_XO += $(CYT_ADAPTER_XO) - COYOTE_CONFIG = -DFDEV_NAME=$(BOARD) -DEN_MEM=1 -DEN_STRM=1 -DEN_BPSS=1 -DEN_RDMA_0=1 -DEN_RPC=1 -DN_STRM_AXI=3 -DN_CARD_AXI=3 -DEN_HLS=0 -DACLK_F=250 -DTLBL_A=12 -DN_DDR_CHAN=$(N_DDR_CHAN) + COYOTE_CONFIG = -DFDEV_NAME=$(BOARD) -DEXAMPLE=rdma -DSHELL_PROBE=8 -DN_REGIONS=1 -DEN_MEM=0 -DEN_STRM=1 -DEN_RDMA=1 -DN_RDMA_AXI=2 -DN_STRM_AXI=3 -DACLK_F=250 CCLO_STACK_TYPE = RDMA OUTPUT_PRODUCT = $(CYT_BIT) USE_HOSTMEM = 1 @@ -201,7 +202,7 @@ else $(error Unsupported MODE) endif -CCLO_XO = ../../kernels/cclo/$(CCLO_STACK_TYPE)_1111$(CCLO_MB_DEBUG_LEVEL)_$(FPGAPART)/ccl_offload.xo +CCLO_XO = ../../kernels/cclo/$(CCLO_STACK_TYPE)_1101$(CCLO_MB_DEBUG_LEVEL)_$(FPGAPART)/ccl_offload.xo OTHER_XO += $(CCLO_XO) ifeq (1, $(USE_HOSTMEM)) @@ -232,19 +233,19 @@ $(VNX)/NetLayers/_x.%/networklayer.xo: $(MAKE) -C xup_vitis_network_example/NetLayers DEVICE=$* all .PHONY: coyote_shell -coyote_shell: $(CYT_BUILD_DIR)/lynx/lynx.xpr +coyote_shell: $(CYT_BUILD_DIR)/test_config_0/user_c0_0/test.xpr -$(CYT_BUILD_DIR)/lynx/lynx.xpr: - mkdir $(CYT_BUILD_DIR) && cd $(CYT_BUILD_DIR) && cmake ../Coyote/hw $(COYOTE_CONFIG) - $(MAKE) -C $(CYT_BUILD_DIR) shell +$(CYT_BUILD_DIR)/test_config_0/user_c0_0/test.xpr: + mkdir $(CYT_BUILD_DIR) && cd $(CYT_BUILD_DIR) && cmake ../hdl/ $(COYOTE_CONFIG) + $(MAKE) -C $(CYT_BUILD_DIR) project $(CYT_BIT): coyote_shell $(MAKE) -C ../../kernels/cclo PLATFORM=$(PLATFORM) STACK_TYPE=$(CCLO_STACK_TYPE) MB_DEBUG_LEVEL=$(CCLO_MB_DEBUG_LEVEL) $(MAKE) -C ../../kernels/plugins DEVICE=$(FPGAPART) cp -rf $(OTHER_XO) $(CYT_BUILD_DIR)/iprepo && cd $(CYT_BUILD_DIR)/iprepo && unzip -n '*.xo' vivado -mode tcl -source tcl/coyote.tcl -tclargs $(CCLO_STACK_TYPE) $(CYT_BUILD_DIR) - cp hdl/$(MODE)_top.sv $(CYT_BUILD_DIR)/lynx/hdl/config_0/user_logic_c0_0.sv - $(MAKE) -C $(CYT_BUILD_DIR) compile + cp hdl/$(MODE)_top.sv $(CYT_BUILD_DIR)/test_config_0/user_c0_0/hdl/wrappers/user_logic_c0_0.sv + $(MAKE) -C $(CYT_BUILD_DIR) bitgen .PHONY: tcp_stack tcp_stack: $(TCP_XO) $(CMAC_TCP_XO) diff --git a/test/refdesigns/hdl/CMakeLists.txt b/test/refdesigns/hdl/CMakeLists.txt new file mode 100644 index 00000000..3fa95946 --- /dev/null +++ b/test/refdesigns/hdl/CMakeLists.txt @@ -0,0 +1,31 @@ +cmake_minimum_required(VERSION 3.0) +project(test) + +set(CYT_DIR ${CMAKE_SOURCE_DIR}/../Coyote) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CYT_DIR}/cmake) + +find_package(CoyoteHW REQUIRED) + +# +# ACCL Test +# +if(EXAMPLE STREQUAL "rdma") + message("** ACCL-RDMA Test") + + set(N_REGIONS 1) + set(EN_STRM 1) + set(EN_RDMA 1) + set(EN_MEM 0) + set(N_STRM_AXI 3) + set(N_RDMA_AXI 2) + set(TLBL_A 2) + set(BUILD_OPT 1) + + validation_checks_hw() + + # load_apps ( + # VFPGA_C0_0 "coyote_rdma_top.sv" + # ) + + create_hw() +endif() diff --git a/test/refdesigns/hdl/coyote_rdma_top.sv b/test/refdesigns/hdl/coyote_rdma_top.sv index 6b333a53..c586d7ab 100644 --- a/test/refdesigns/hdl/coyote_rdma_top.sv +++ b/test/refdesigns/hdl/coyote_rdma_top.sv @@ -30,39 +30,28 @@ module design_user_logic_c0_0 ( // AXI4L CONTROL AXI4L.s axi_ctrl, - // DESCRIPTOR BYPASS - metaIntf.m bpss_rd_req, - metaIntf.m bpss_wr_req, - metaIntf.s bpss_rd_done, - metaIntf.s bpss_wr_done, - - // AXI4S HOST STREAMS - AXI4SR.s axis_host_0_sink, - AXI4SR.m axis_host_0_src, - AXI4SR.s axis_host_1_sink, - AXI4SR.m axis_host_1_src, - AXI4SR.s axis_host_2_sink, - AXI4SR.m axis_host_2_src, - - // AXI4S CARD STREAMS - AXI4SR.s axis_card_0_sink, - AXI4SR.m axis_card_0_src, - AXI4SR.s axis_card_1_sink, - AXI4SR.m axis_card_1_src, - AXI4SR.s axis_card_2_sink, - AXI4SR.m axis_card_2_src, - - // RDMA QSFP0 CMD - metaIntf.s rdma_0_rd_req, - metaIntf.s rdma_0_wr_req, + // NOTIFY + metaIntf.m notify, + + // DESCRIPTORS + metaIntf.m sq_rd, + metaIntf.m sq_wr, + metaIntf.s cq_rd, + metaIntf.s cq_wr, + metaIntf.s rq_rd, + metaIntf.s rq_wr, + + // HOST DATA STREAMS + AXI4SR.s axis_host_recv [N_STRM_AXI], + AXI4SR.m axis_host_send [N_STRM_AXI], - // AXI4S RDMA QSFP0 STREAMS - AXI4SR.s axis_rdma_0_sink, - AXI4SR.m axis_rdma_0_src, + // RDMA DATA STREAMS REQUESTER + AXI4SR.s axis_rreq_recv [N_RDMA_AXI], + AXI4SR.m axis_rreq_send [N_RDMA_AXI], - // RDMA QSFP0 SQ and RQ - metaIntf.m rdma_0_sq, - metaIntf.s rdma_0_ack, + // RDMA DATA STREAMS RESPONDER + AXI4SR.s axis_rrsp_recv [N_RDMA_AXI], + AXI4SR.m axis_rrsp_send [N_RDMA_AXI], // Clock and reset input wire aclk, @@ -70,14 +59,10 @@ module design_user_logic_c0_0 ( ); /* -- Tie-off unused interfaces and signals ----------------------------- */ -// always_comb axis_host_0_sink.tie_off_s(); -// always_comb axis_host_0_src_s.tie_off_m(); -// always_comb axis_card_0_sink.tie_off_s(); -// always_comb axis_card_0_src_s.tie_off_m(); -// always_comb axis_host_1_sink.tie_off_s(); -// always_comb axis_host_1_src.tie_off_m(); -// always_comb axis_card_1_sink.tie_off_s(); -// always_comb axis_card_1_src_s.tie_off_m(); +always_comb notify.tie_off_m(); +always_comb axis_rrsp_send[0].tie_off_m(); +always_comb axis_rrsp_send[1].tie_off_m(); +always_comb axis_host_recv[2].tie_off_s(); /* -- USER LOGIC -------------------------------------------------------- */ @@ -85,39 +70,6 @@ module design_user_logic_c0_0 ( localparam integer COYOTE_AXIL_ADDR_LSB = $clog2(AXIL_DATA_BITS/8); localparam integer COYOTE_AXIL_ADDR_MSB = 16; -// Master Data Stream -AXI4SR axis_host_0_src_s (); -AXI4SR axis_host_1_src_s (); -AXI4SR axis_host_2_src_s (); -AXI4SR axis_card_0_src_s (); -AXI4SR axis_card_1_src_s (); -AXI4SR axis_card_2_src_s (); - -// register slices -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_0_src_s), .m_axis(axis_host_0_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_1_src_s), .m_axis(axis_host_1_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_2_src_s), .m_axis(axis_host_2_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_0_src_s), .m_axis(axis_card_0_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_1_src_s), .m_axis(axis_card_1_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_2_src_s), .m_axis(axis_card_2_src)); - -// Slave Data Stream -AXI4SR axis_host_0_sink_s (); -AXI4SR axis_host_1_sink_s (); -AXI4SR axis_host_2_sink_s (); -AXI4SR axis_card_0_sink_s (); -AXI4SR axis_card_1_sink_s (); -AXI4SR axis_card_2_sink_s (); - -// register slices -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_0_sink), .m_axis(axis_host_0_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_1_sink), .m_axis(axis_host_1_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_2_sink), .m_axis(axis_host_2_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_0_sink), .m_axis(axis_card_0_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_1_sink), .m_axis(axis_card_1_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_2_sink), .m_axis(axis_card_2_sink_s)); - - // ACCL Block Design accl_bd_wrapper accl_system( .ap_clk_0(aclk), @@ -143,138 +95,163 @@ accl_bd_wrapper accl_system( .S00_AXI_0_wstrb(axi_ctrl.wstrb), .S00_AXI_0_wvalid(axi_ctrl.wvalid), - .cyt_byp_rd_cmd_0_tdata(bpss_rd_req.data), - .cyt_byp_rd_cmd_0_tready(bpss_rd_req.ready), - .cyt_byp_rd_cmd_0_tvalid(bpss_rd_req.valid), + .cyt_sq_rd_cmd_tdata(sq_rd.data), + .cyt_sq_rd_cmd_tready(sq_rd.ready), + .cyt_sq_rd_cmd_tvalid(sq_rd.valid), + + .cyt_cq_rd_sts_0_tdata(cq_rd.data), + .cyt_cq_rd_sts_0_tready(cq_rd.ready), + .cyt_cq_rd_sts_0_tvalid(cq_rd.valid), - .cyt_byp_rd_sts_0_tdata(bpss_rd_done.data), - .cyt_byp_rd_sts_0_tready(bpss_rd_done.ready), - .cyt_byp_rd_sts_0_tvalid(bpss_rd_done.valid), + .cyt_sq_wr_cmd_tdata(sq_wr.data), + .cyt_sq_wr_cmd_tready(sq_wr.ready), + .cyt_sq_wr_cmd_tvalid(sq_wr.valid), - .cyt_byp_wr_cmd_0_tdata(bpss_wr_req.data), - .cyt_byp_wr_cmd_0_tready(bpss_wr_req.ready), - .cyt_byp_wr_cmd_0_tvalid(bpss_wr_req.valid), + .cyt_cq_wr_sts_0_tdata(cq_wr.data), + .cyt_cq_wr_sts_0_tready(cq_wr.ready), + .cyt_cq_wr_sts_0_tvalid(cq_wr.valid), - .cyt_byp_wr_sts_0_tdata(bpss_wr_done.data), - .cyt_byp_wr_sts_0_tready(bpss_wr_done.ready), - .cyt_byp_wr_sts_0_tvalid(bpss_wr_done.valid), + .cyt_rq_rd_tdata(rq_rd.data), + .cyt_rq_rd_tready(rq_rd.ready), + .cyt_rq_rd_tvalid(rq_rd.valid), - .m_axis_host_0_tdata(axis_host_0_src_s.tdata), - .m_axis_host_0_tkeep(axis_host_0_src_s.tkeep), - .m_axis_host_0_tlast(axis_host_0_src_s.tlast), - .m_axis_host_0_tready(axis_host_0_src_s.tready), - .m_axis_host_0_tvalid(axis_host_0_src_s.tvalid), + .cyt_rq_wr_tdata(rq_wr.data), + .cyt_rq_wr_tready(rq_wr.ready), + .cyt_rq_wr_tvalid(rq_wr.valid), + + .m_axis_host_0_tdata(axis_host_send[0].tdata), + .m_axis_host_0_tkeep(axis_host_send[0].tkeep), + .m_axis_host_0_tlast(axis_host_send[0].tlast), + .m_axis_host_0_tready(axis_host_send[0].tready), + .m_axis_host_0_tvalid(axis_host_send[0].tvalid), .m_axis_host_0_tdest(), - .m_axis_host_1_tdata(axis_host_1_src_s.tdata), - .m_axis_host_1_tkeep(axis_host_1_src_s.tkeep), - .m_axis_host_1_tlast(axis_host_1_src_s.tlast), - .m_axis_host_1_tready(axis_host_1_src_s.tready), - .m_axis_host_1_tvalid(axis_host_1_src_s.tvalid), + .m_axis_host_1_tdata(axis_host_send[1].tdata), + .m_axis_host_1_tkeep(axis_host_send[1].tkeep), + .m_axis_host_1_tlast(axis_host_send[1].tlast), + .m_axis_host_1_tready(axis_host_send[1].tready), + .m_axis_host_1_tvalid(axis_host_send[1].tvalid), .m_axis_host_1_tdest(), - .m_axis_host_2_tdata(axis_host_2_src_s.tdata), - .m_axis_host_2_tkeep(axis_host_2_src_s.tkeep), - .m_axis_host_2_tlast(axis_host_2_src_s.tlast), - .m_axis_host_2_tready(axis_host_2_src_s.tready), - .m_axis_host_2_tvalid(axis_host_2_src_s.tvalid), + .m_axis_host_2_tdata(axis_host_send[2].tdata), + .m_axis_host_2_tkeep(axis_host_send[2].tkeep), + .m_axis_host_2_tlast(axis_host_send[2].tlast), + .m_axis_host_2_tready(axis_host_send[2].tready), + .m_axis_host_2_tvalid(axis_host_send[2].tvalid), .m_axis_host_2_tdest(), - .m_axis_card_0_tdata(axis_card_0_src_s.tdata), - .m_axis_card_0_tkeep(axis_card_0_src_s.tkeep), - .m_axis_card_0_tlast(axis_card_0_src_s.tlast), - .m_axis_card_0_tready(axis_card_0_src_s.tready), - .m_axis_card_0_tvalid(axis_card_0_src_s.tvalid), - .m_axis_card_0_tdest(), - - .m_axis_card_1_tdata(axis_card_1_src_s.tdata), - .m_axis_card_1_tkeep(axis_card_1_src_s.tkeep), - .m_axis_card_1_tlast(axis_card_1_src_s.tlast), - .m_axis_card_1_tready(axis_card_1_src_s.tready), - .m_axis_card_1_tvalid(axis_card_1_src_s.tvalid), - .m_axis_card_1_tdest(), - - .m_axis_card_2_tdata(axis_card_2_src_s.tdata), - .m_axis_card_2_tkeep(axis_card_2_src_s.tkeep), - .m_axis_card_2_tlast(axis_card_2_src_s.tlast), - .m_axis_card_2_tready(axis_card_2_src_s.tready), - .m_axis_card_2_tvalid(axis_card_2_src_s.tvalid), - .m_axis_card_2_tdest(), - - .s_axis_host_0_tdata(axis_host_0_sink_s.tdata), - .s_axis_host_0_tkeep(axis_host_0_sink_s.tkeep), - .s_axis_host_0_tlast(axis_host_0_sink_s.tlast), - .s_axis_host_0_tready(axis_host_0_sink_s.tready), - .s_axis_host_0_tvalid(axis_host_0_sink_s.tvalid), - - .s_axis_host_1_tdata(axis_host_1_sink_s.tdata), - .s_axis_host_1_tkeep(axis_host_1_sink_s.tkeep), - .s_axis_host_1_tlast(axis_host_1_sink_s.tlast), - .s_axis_host_1_tready(axis_host_1_sink_s.tready), - .s_axis_host_1_tvalid(axis_host_1_sink_s.tvalid), - - .s_axis_host_2_tdata(axis_host_2_sink_s.tdata), - .s_axis_host_2_tkeep(axis_host_2_sink_s.tkeep), - .s_axis_host_2_tlast(axis_host_2_sink_s.tlast), - .s_axis_host_2_tready(axis_host_2_sink_s.tready), - .s_axis_host_2_tvalid(axis_host_2_sink_s.tvalid), - - .s_axis_card_0_tdata(axis_card_0_sink_s.tdata), - .s_axis_card_0_tkeep(axis_card_0_sink_s.tkeep), - .s_axis_card_0_tlast(axis_card_0_sink_s.tlast), - .s_axis_card_0_tready(axis_card_0_sink_s.tready), - .s_axis_card_0_tvalid(axis_card_0_sink_s.tvalid), - - .s_axis_card_1_tdata(axis_card_1_sink_s.tdata), - .s_axis_card_1_tkeep(axis_card_1_sink_s.tkeep), - .s_axis_card_1_tlast(axis_card_1_sink_s.tlast), - .s_axis_card_1_tready(axis_card_1_sink_s.tready), - .s_axis_card_1_tvalid(axis_card_1_sink_s.tvalid), - - .s_axis_card_2_tdata(axis_card_2_sink_s.tdata), - .s_axis_card_2_tkeep(axis_card_2_sink_s.tkeep), - .s_axis_card_2_tlast(axis_card_2_sink_s.tlast), - .s_axis_card_2_tready(axis_card_2_sink_s.tready), - .s_axis_card_2_tvalid(axis_card_2_sink_s.tvalid), - - .s_axis_eth_rx_data_tdata(axis_rdma_0_sink.tdata), - .s_axis_eth_rx_data_tdest(axis_rdma_0_sink.tid), - .s_axis_eth_rx_data_tkeep(axis_rdma_0_sink.tkeep), - .s_axis_eth_rx_data_tlast(axis_rdma_0_sink.tlast), - .s_axis_eth_rx_data_tready(axis_rdma_0_sink.tready), - .s_axis_eth_rx_data_tvalid(axis_rdma_0_sink.tvalid), - - .m_axis_eth_tx_data_tdata(axis_rdma_0_src.tdata), - .m_axis_eth_tx_data_tdest(axis_rdma_0_src.tid), // not driven, default 0 - .m_axis_eth_tx_data_tkeep(axis_rdma_0_src.tkeep), - .m_axis_eth_tx_data_tlast(axis_rdma_0_src.tlast), - .m_axis_eth_tx_data_tready(axis_rdma_0_src.tready), - .m_axis_eth_tx_data_tvalid(axis_rdma_0_src.tvalid), - - .s_axis_rdma_wr_req_tdata(rdma_0_wr_req.data), - .s_axis_rdma_wr_req_tvalid(rdma_0_wr_req.valid), - .s_axis_rdma_wr_req_tready(rdma_0_wr_req.ready), - - .s_axis_rdma_rd_req_tdata(rdma_0_rd_req.data), - .s_axis_rdma_rd_req_tvalid(rdma_0_rd_req.valid), - .s_axis_rdma_rd_req_tready(rdma_0_rd_req.ready), - - .m_axis_rdma_sq_tdata(rdma_0_sq.data), - .m_axis_rdma_sq_tvalid(rdma_0_sq.valid), - .m_axis_rdma_sq_tready(rdma_0_sq.ready) + .s_axis_host_0_tdata(axis_host_recv[0].tdata), + .s_axis_host_0_tkeep(axis_host_recv[0].tkeep), + .s_axis_host_0_tlast(axis_host_recv[0].tlast), + .s_axis_host_0_tready(axis_host_recv[0].tready), + .s_axis_host_0_tvalid(axis_host_recv[0].tvalid), + + .s_axis_host_1_tdata(axis_host_recv[1].tdata), + .s_axis_host_1_tkeep(axis_host_recv[1].tkeep), + .s_axis_host_1_tlast(axis_host_recv[1].tlast), + .s_axis_host_1_tready(axis_host_recv[1].tready), + .s_axis_host_1_tvalid(axis_host_recv[1].tvalid), + + .cyt_rreq_recv_0_tdata(axis_rreq_recv[0].tdata), + .cyt_rreq_recv_0_tkeep(axis_rreq_recv[0].tkeep), + .cyt_rreq_recv_0_tlast(axis_rreq_recv[0].tlast), + .cyt_rreq_recv_0_tready(axis_rreq_recv[0].tready), + .cyt_rreq_recv_0_tvalid(axis_rreq_recv[0].tvalid), + + .cyt_rreq_recv_1_tdata(axis_rreq_recv[1].tdata), + .cyt_rreq_recv_1_tkeep(axis_rreq_recv[1].tkeep), + .cyt_rreq_recv_1_tlast(axis_rreq_recv[1].tlast), + .cyt_rreq_recv_1_tready(axis_rreq_recv[1].tready), + .cyt_rreq_recv_1_tvalid(axis_rreq_recv[1].tvalid), + + .cyt_rreq_send_0_tdata(axis_rreq_send[0].tdata), + .cyt_rreq_send_0_tdest(), + .cyt_rreq_send_0_tkeep(axis_rreq_send[0].tkeep), + .cyt_rreq_send_0_tlast(axis_rreq_send[0].tlast), + .cyt_rreq_send_0_tready(axis_rreq_send[0].tready), + .cyt_rreq_send_0_tstrb(), + .cyt_rreq_send_0_tvalid(axis_rreq_send[0].tvalid), + + .cyt_rreq_send_1_tdata(axis_rreq_send[1].tdata), + .cyt_rreq_send_1_tdest(), + .cyt_rreq_send_1_tkeep(axis_rreq_send[1].tkeep), + .cyt_rreq_send_1_tlast(axis_rreq_send[1].tlast), + .cyt_rreq_send_1_tready(axis_rreq_send[1].tready), + .cyt_rreq_send_1_tstrb(), + .cyt_rreq_send_1_tvalid(axis_rreq_send[1].tvalid), + + .cyt_rrsp_recv_0_tdata(axis_rrsp_recv[0].tdata), + .cyt_rrsp_recv_0_tkeep(axis_rrsp_recv[0].tkeep), + .cyt_rrsp_recv_0_tlast(axis_rrsp_recv[0].tlast), + .cyt_rrsp_recv_0_tready(axis_rrsp_recv[0].tready), + .cyt_rrsp_recv_0_tvalid(axis_rrsp_recv[0].tvalid), + + .cyt_rrsp_recv_1_tdata(axis_rrsp_recv[1].tdata), + .cyt_rrsp_recv_1_tkeep(axis_rrsp_recv[1].tkeep), + .cyt_rrsp_recv_1_tlast(axis_rrsp_recv[1].tlast), + .cyt_rrsp_recv_1_tready(axis_rrsp_recv[1].tready), + .cyt_rrsp_recv_1_tvalid(axis_rrsp_recv[1].tvalid) ); -assign axis_host_0_src_s.tid = 0; -assign axis_host_1_src_s.tid = 0; -assign axis_host_2_src_s.tid = 0; - -assign axis_card_0_src_s.tid = 0; -assign axis_card_1_src_s.tid = 0; -assign axis_card_2_src_s.tid = 0; - -assign rdma_0_ack.ready = 1'b1; - +/*ila_top inst_ila_top( + .clk(aclk), + .probe0(sq_wr.valid), //1 + .probe1(sq_wr.ready), //1 + .probe2(sq_wr.data), //128 + .probe3(sq_rd.valid), //1 + .probe4(sq_rd.ready), //1 + .probe5(sq_rd.data), //128 + .probe6(rq_wr.valid), //1 + .probe7(rq_wr.ready), //1 + .probe8(rq_wr.data), //128 + .probe9(rq_rd.valid), //1 + .probe10(rq_rd.ready), //1 + .probe11(rq_rd.data), //128 + .probe12(axis_rreq_send[0].tvalid), + .probe13(axis_rreq_send[0].tready), + .probe14(axis_rreq_send[1].tvalid), + .probe15(axis_rreq_send[1].tready), + .probe16(axis_rrsp_recv[0].tvalid), + .probe17(axis_rrsp_recv[0].tready), + .probe18(axis_rrsp_recv[1].tvalid), + .probe19(axis_rrsp_recv[1].tready), + .probe20(axis_rreq_send[0].tlast), + .probe21(axis_rreq_send[1].tlast), + .probe22(axis_rrsp_recv[0].tlast), + .probe23(axis_rrsp_recv[1].tlast), + .probe24(cq_rd.data), //32 + .probe25(cq_rd.valid), + .probe26(cq_rd.ready), + .probe27(cq_wr.data), //32 + .probe28(cq_wr.valid), + .probe29(cq_wr.ready), + .probe30(axis_host_send[0].tvalid), + .probe31(axis_host_send[0].tready), + .probe32(axis_host_send[0].tdata), + .probe33(axis_host_send[1].tvalid), + .probe34(axis_host_send[1].tready), + .probe35(axis_host_send[1].tdata), + .probe36(axis_host_send[2].tvalid), + .probe37(axis_host_send[2].tready), + .probe38(axis_host_send[2].tdata), + .probe39(axis_host_recv[0].tvalid), + .probe40(axis_host_recv[0].tready), + .probe41(axis_host_recv[0].tdata), + .probe42(axis_host_recv[1].tvalid), + .probe43(axis_host_recv[1].tready), + .probe44(axis_host_recv[1].tdata) + );*/ + + + + +assign axis_host_send[0].tid = 0; +assign axis_host_send[1].tid = 0; +assign axis_host_send[2].tid = 0; + +assign axis_rreq_send[0].tid = 0; +assign axis_rreq_send[1].tid = 0; endmodule \ No newline at end of file diff --git a/test/refdesigns/tcl/coyote.tcl b/test/refdesigns/tcl/coyote.tcl index 5225362d..00352f79 100644 --- a/test/refdesigns/tcl/coyote.tcl +++ b/test/refdesigns/tcl/coyote.tcl @@ -1,250 +1,1499 @@ -# /******************************************************************************* -# Copyright (C) 2023 Advanced Micro Devices, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. + +################################################################ +# This is a generated script based on design: accl_bd # -# *******************************************************************************/ +# Though there are limitations about the generated script, +# the main purpose of this utility is to make learning +# IP Integrator Tcl commands easier. +################################################################ + +namespace eval _tcl { + proc get_script_folder {} { + set script_path [file normalize [info script]] + set script_folder [file dirname $script_path] + return $script_folder + } +} +variable script_folder +set script_folder [_tcl::get_script_folder] + +################################################################ +# Check if script is running in correct Vivado version. +################################################################ +set scripts_vivado_version 2024.2 +set current_vivado_version [version -short] + +if { [string first $scripts_vivado_version $current_vivado_version] == -1 } { + puts "" + if { [string compare $scripts_vivado_version $current_vivado_version] > 0 } { + catch {common::send_gid_msg -ssname BD::TCL -id 2042 -severity "ERROR" " This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Sourcing the script failed since it was created with a future version of Vivado."} + + } else { + catch {common::send_gid_msg -ssname BD::TCL -id 2041 -severity "ERROR" "This script was generated using Vivado <$scripts_vivado_version> and is being run in <$current_vivado_version> of Vivado. Please run the script in Vivado <$scripts_vivado_version> then open the design in Vivado <$current_vivado_version>. Upgrade the design by running \"Tools => Report => Report IP Status...\", then run write_bd_tcl to create an updated script."} + + } + + return 1 +} + +################################################################ +# START +################################################################ set nettype [lindex $::argv 0] set build_dir [lindex $::argv 1] -open_project "$build_dir/lynx/lynx.xpr" +open_project "$build_dir/test_config_0/user_c0_0/test.xpr" update_compile_order -fileset sources_1 create_bd_design "accl_bd" update_compile_order -fileset sources_1 update_ip_catalog -create_bd_cell -type ip -vlnv Xilinx:ACCL:ccl_offload:1.0 ccl_offload_0 -create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_dma_adapter:1.0 cyt_dma_adapter_0 - -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_mm2s_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma0_mm2s_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_mm2s_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma1_mm2s_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma1_s2mm_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma0_s2mm_cmd] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma0_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_s2mm_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma1_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_s2mm_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma0_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma1_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s_sts] -make_bd_pins_external [get_bd_pins ccl_offload_0/ap_clk] -make_bd_pins_external [get_bd_pins ccl_offload_0/ap_rst_n] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_dma_adapter_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_dma_adapter_0/ap_rst_n] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_wr_sts] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_rd_sts] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_wr_cmd] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_rd_cmd] - -create_bd_cell -type ip -vlnv xilinx.com:ACCL:reduce_ops:1.0 reduce_ops_0 -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_arith_op0] [get_bd_intf_pins reduce_ops_0/in0] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_arith_op1] [get_bd_intf_pins reduce_ops_0/in1] -connect_bd_intf_net [get_bd_intf_pins reduce_ops_0/out_r] [get_bd_intf_pins ccl_offload_0/s_axis_arith_res] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins reduce_ops_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins reduce_ops_0/ap_rst_n] - -create_bd_cell -type ip -vlnv xilinx.com:ACCL:hostctrl:1.0 hostctrl_0 -connect_bd_intf_net [get_bd_intf_pins hostctrl_0/cmd] [get_bd_intf_pins ccl_offload_0/s_axis_call_req] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_call_ack] [get_bd_intf_pins hostctrl_0/sts] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins hostctrl_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins hostctrl_0/ap_rst_n] - -# direct loopback for compression and kernel streams -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_krnl] [get_bd_intf_pins ccl_offload_0/s_axis_krnl] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression0] [get_bd_intf_pins ccl_offload_0/s_axis_compression0] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression1] [get_bd_intf_pins ccl_offload_0/s_axis_compression1] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression2] [get_bd_intf_pins ccl_offload_0/s_axis_compression2] - -# create axis switch -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_0 -set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_0}] [get_bd_cells axis_switch_2_to_1_inst_0] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_1 -set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_1}] [get_bd_cells axis_switch_2_to_1_inst_1] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_0 -set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_0}] [get_bd_cells axis_switch_1_to_2_inst_0] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_1 -set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_1}] [get_bd_cells axis_switch_1_to_2_inst_1] - -switch $nettype { - "TCP" { - # externalize TCP streams - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_tx_status] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_read_pkg] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_meta] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_meta] - } - "RDMA" { - # externalize RDMA streams - # data streams - set m_axis_eth_tx_data [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_eth_tx_data ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_eth_tx_data - set s_axis_eth_rx_data [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_eth_rx_data ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {1} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {8} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_eth_rx_data - - # RDMA sq and rq - set m_axis_rdma_sq [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_rdma_sq ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_rdma_sq - - # RDMA extra pair of host/card streams - set m_axis_host_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_2 - set m_axis_card_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_2 - set s_axis_host_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_2 - set s_axis_card_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_2 - - # RDMA wr_req and rd_req - set s_axis_rdma_wr_req [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_rdma_wr_req ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {0} CONFIG.HAS_TLAST {0} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {12} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_rdma_wr_req - set s_axis_rdma_rd_req [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_rdma_rd_req ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {0} CONFIG.HAS_TLAST {0} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {12} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_rdma_rd_req - - # connections for rdma_arbiter and the axi 1-to-2 switch - create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_arbiter:1.0 cyt_rdma_arbiter_0 - create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_2 - set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_2}] [get_bd_cells axis_switch_1_to_2_inst_2] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_rdma_arbiter_0/ap_clk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_rdma_arbiter_0/ap_rst_n] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_2/aclk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_2/aresetn] - - connect_bd_intf_net [get_bd_intf_ports s_axis_eth_rx_data] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis] - connect_bd_intf_net [get_bd_intf_ports s_axis_rdma_wr_req] [get_bd_intf_pins cyt_rdma_arbiter_0/s_meta] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_0] [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_0] [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_1] [get_bd_intf_pins cyt_dma_adapter_0/rdma_wr_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_1] [get_bd_intf_pins axis_switch_1_to_2_inst_2/S00_AXIS] - connect_bd_intf_net [get_bd_intf_pins axis_switch_1_to_2_inst_2/M00_AXIS] [get_bd_intf_ports m_axis_card_2] - connect_bd_intf_net [get_bd_intf_pins axis_switch_1_to_2_inst_2/M01_AXIS] [get_bd_intf_ports m_axis_host_2] - - # connections for rdma_mux and the axi 2-to-1 switch - create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_mux:1.0 cyt_rdma_mux_0 - create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_2 - set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_2}] [get_bd_cells axis_switch_2_to_1_inst_2] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_rdma_mux_0/ap_clk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_rdma_mux_0/ap_rst_n] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_2/aclk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_2/aresetn] - create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_2 - set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_2] - set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_2] - connect_bd_net [get_bd_pins xlconstant_2/dout] [get_bd_pins axis_switch_2_to_1_inst_2/s_req_suppress] - - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_meta_0] [get_bd_intf_pins ccl_offload_0/m_axis_rdma_sq] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_axis_0] [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_meta_1] [get_bd_intf_ports s_axis_rdma_rd_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_axis_1] [get_bd_intf_pins axis_switch_2_to_1_inst_2/M00_AXIS] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_meta_0] [get_bd_intf_ports m_axis_rdma_sq] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_meta_1] [get_bd_intf_pins cyt_dma_adapter_0/rdma_rd_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_axis] [get_bd_intf_ports m_axis_eth_tx_data] - connect_bd_intf_net [get_bd_intf_ports s_axis_host_2] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S00_AXIS] - connect_bd_intf_net [get_bd_intf_ports s_axis_card_2] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S01_AXIS] - - - } - default { - puts "Unrecognized network backend" - exit - } +set bCheckIPsPassed 1 +################################################################## +# CHECK IPs +################################################################## +set bCheckIPs 1 +if { $bCheckIPs == 1 } { + set list_check_ips "\ +xilinx.com:ip:axis_register_slice:1.1\ +xilinx.com:ip:proc_sys_reset:5.0\ +xilinx.com:ip:system_ila:1.1\ +xilinx.com:ACCL:hostctrl:1.0\ +xilinx.com:ip:smartconnect:1.0\ +xilinx.com:ACCL:reduce_ops:1.0\ +Xilinx:ACCL:ccl_offload:1.0\ +xilinx.com:ACCL:cyt_rdma_arbiter:1.0\ +xilinx.com:ip:axis_data_fifo:2.0\ +xilinx.com:ACCL:cyt_cq_dm_sts_converter:1.0\ +xilinx.com:ACCL:cyt_dma_sq_adapter:1.0\ +xilinx.com:ip:axis_switch:1.1\ +xilinx.com:ACCL:cclo_sq_adapter:1.0\ +xilinx.com:ip:xlconstant:1.1\ +" + + set list_ips_missing "" + common::send_gid_msg -ssname BD::TCL -id 2011 -severity "INFO" "Checking if the following IPs exist in the project's IP catalog: $list_check_ips ." + + foreach ip_vlnv $list_check_ips { + set ip_obj [get_ipdefs -all $ip_vlnv] + if { $ip_obj eq "" } { + lappend list_ips_missing $ip_vlnv + } + } + + if { $list_ips_missing ne "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2012 -severity "ERROR" "The following IPs are not found in the IP Catalog:\n $list_ips_missing\n\nResolution: Please add the repository containing the IP(s) to the project." } + set bCheckIPsPassed 0 + } + +} + +if { $bCheckIPsPassed != 1 } { + common::send_gid_msg -ssname BD::TCL -id 2023 -severity "WARNING" "Will not continue with creation of design due to the error(s) above." + return 3 +} + +################################################################## +# DESIGN PROCs +################################################################## + + +# Hierarchical cell: local_dma_output_demuxing +proc create_hier_cell_local_dma_output_demuxing { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_local_dma_output_demuxing() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_0 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_1 + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst ap_rst_n_0 + + # Create instance: axis_register_slice_12, and set properties + set axis_register_slice_12 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_12 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_12 + + + # Create instance: axis_register_slice_13, and set properties + set axis_register_slice_13 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_13 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_13 + + + # Create interface connections + connect_bd_intf_net -intf_net axis_register_slice_12_M_AXIS [get_bd_intf_pins m_axis_host_0] [get_bd_intf_pins axis_register_slice_12/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_13_M_AXIS [get_bd_intf_pins m_axis_host_1] [get_bd_intf_pins axis_register_slice_13/M_AXIS] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm [get_bd_intf_pins S_AXIS] [get_bd_intf_pins axis_register_slice_12/S_AXIS] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm [get_bd_intf_pins S_AXIS1] [get_bd_intf_pins axis_register_slice_13/S_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_register_slice_12/aclk] \ + [get_bd_pins axis_register_slice_13/aclk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_register_slice_12/aresetn] \ + [get_bd_pins axis_register_slice_13/aresetn] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: local_dma_input_muxing +proc create_hier_cell_local_dma_input_muxing { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_local_dma_input_muxing() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_0 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 M_AXIS + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 M_AXIS1 + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst ap_rst_n_0 + + # Create instance: axis_register_slice_7, and set properties + set axis_register_slice_7 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_7 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_7 + + + # Create instance: axis_register_slice_8, and set properties + set axis_register_slice_8 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_8 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_8 + + + # Create interface connections + connect_bd_intf_net -intf_net axis_register_slice_7_M_AXIS [get_bd_intf_pins M_AXIS] [get_bd_intf_pins axis_register_slice_7/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_8_M_AXIS [get_bd_intf_pins M_AXIS1] [get_bd_intf_pins axis_register_slice_8/M_AXIS] + connect_bd_intf_net -intf_net s_axis_host_0_1 [get_bd_intf_pins s_axis_host_0] [get_bd_intf_pins axis_register_slice_8/S_AXIS] + connect_bd_intf_net -intf_net s_axis_host_1_1 [get_bd_intf_pins s_axis_host_1] [get_bd_intf_pins axis_register_slice_7/S_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_register_slice_7/aclk] \ + [get_bd_pins axis_register_slice_8/aclk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_register_slice_7/aresetn] \ + [get_bd_pins axis_register_slice_8/aresetn] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: sq_conversion +proc create_hier_cell_sq_conversion { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_sq_conversion() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_0 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_0 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_1 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cclo_sq + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_cclo + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_wr + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_rd + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst aresetn + create_bd_pin -dir I -type rst ap_rst_n_0 + + # Create instance: axis_switch_2_to_1_inst_2, and set properties + set axis_switch_2_to_1_inst_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_2 ] + set_property -dict [list \ + CONFIG.ARB_ON_MAX_XFERS {0} \ + CONFIG.ARB_ON_TLAST {1} \ + CONFIG.DECODER_REG {0} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + CONFIG.TDATA_NUM_BYTES {64} \ + ] $axis_switch_2_to_1_inst_2 + + + # Create instance: axis_switch_1_to_2_inst_3, and set properties + set axis_switch_1_to_2_inst_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_3 ] + set_property -dict [list \ + CONFIG.DECODER_REG {1} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {8} \ + ] $axis_switch_1_to_2_inst_3 + + + # Create instance: axis_register_slice_11, and set properties + set axis_register_slice_11 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_11 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_11 + + + # Create instance: axis_register_slice_6, and set properties + set axis_register_slice_6 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_6 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_6 + + + # Create instance: cclo_sq_adapter_0, and set properties + set cclo_sq_adapter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cclo_sq_adapter:1.0 cclo_sq_adapter_0 ] + + # Create instance: xlconstant_2, and set properties + set xlconstant_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_2 ] + set_property -dict [list \ + CONFIG.CONST_VAL {0} \ + CONFIG.CONST_WIDTH {2} \ + ] $xlconstant_2 + + + # Create interface connections + connect_bd_intf_net -intf_net axis_register_slice_11_M_AXIS [get_bd_intf_pins axis_register_slice_11/M_AXIS] [get_bd_intf_pins axis_switch_1_to_2_inst_3/S00_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_6_M_AXIS [get_bd_intf_pins axis_register_slice_6/M_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/s_axis_cyt] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M00_AXIS [get_bd_intf_pins cyt_rreq_send_0] [get_bd_intf_pins axis_switch_1_to_2_inst_3/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M01_AXIS [get_bd_intf_pins cyt_rreq_send_1] [get_bd_intf_pins axis_switch_1_to_2_inst_3/M01_AXIS] + connect_bd_intf_net -intf_net axis_switch_2_to_1_inst_2_M00_AXIS [get_bd_intf_pins axis_register_slice_6/S_AXIS] [get_bd_intf_pins axis_switch_2_to_1_inst_2/M00_AXIS] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_eth_tx_data [get_bd_intf_pins s_axis_cclo] [get_bd_intf_pins cclo_sq_adapter_0/s_axis_cclo] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_rdma_sq [get_bd_intf_pins cclo_sq] [get_bd_intf_pins cclo_sq_adapter_0/cclo_sq] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_rd [get_bd_intf_pins cyt_sq_rd] [get_bd_intf_pins cclo_sq_adapter_0/cyt_sq_rd] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_wr [get_bd_intf_pins cyt_sq_wr] [get_bd_intf_pins cclo_sq_adapter_0/cyt_sq_wr] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_m_axis_cyt [get_bd_intf_pins axis_register_slice_11/S_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/m_axis_cyt] + connect_bd_intf_net -intf_net cyt_rreq_recv_0_1 [get_bd_intf_pins cyt_rreq_recv_0] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S00_AXIS] + connect_bd_intf_net -intf_net cyt_rreq_recv_1_1 [get_bd_intf_pins cyt_rreq_recv_1] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S01_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_register_slice_11/aclk] \ + [get_bd_pins axis_register_slice_6/aclk] \ + [get_bd_pins axis_switch_1_to_2_inst_3/aclk] \ + [get_bd_pins cclo_sq_adapter_0/ap_clk] \ + [get_bd_pins axis_switch_2_to_1_inst_2/aclk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_register_slice_11/aresetn] \ + [get_bd_pins cclo_sq_adapter_0/ap_rst_n] \ + [get_bd_pins axis_switch_1_to_2_inst_3/aresetn] + connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins aresetn] \ + [get_bd_pins axis_register_slice_6/aresetn] \ + [get_bd_pins axis_switch_2_to_1_inst_2/aresetn] + connect_bd_net -net xlconstant_2_dout [get_bd_pins xlconstant_2/dout] \ + [get_bd_pins axis_switch_2_to_1_inst_2/s_req_suppress] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: request_conversion +proc create_hier_cell_request_conversion { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_request_conversion() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_wr_cmd + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_wr_cmd1 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS1 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS2 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS3 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 dma0_s2mm_cmd + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 dma1_s2mm_cmd + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 dma0_mm2s_cmd + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 dma1_mm2s_cmd + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS4 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dma0_s2mm_meta + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dma1_s2mm_meta + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dma0_mm2s_meta + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dma1_mm2s_meta + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_rd_cmd + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst ap_rst_n_0 + create_bd_pin -dir I -type rst s_axis_aresetn + + # Create instance: axis_register_slice_9, and set properties + set axis_register_slice_9 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_9 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_9 + + + # Create instance: axis_data_fifo_2, and set properties + set axis_data_fifo_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_2 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_2 + + + # Create instance: axis_data_fifo_3, and set properties + set axis_data_fifo_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_3 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_3 + + + # Create instance: axis_data_fifo_4, and set properties + set axis_data_fifo_4 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_4 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_4 + + + # Create instance: axis_data_fifo_5, and set properties + set axis_data_fifo_5 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_5 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_5 + + + # Create instance: cyt_dma_sq_adapter_0, and set properties + set cyt_dma_sq_adapter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_dma_sq_adapter:1.0 cyt_dma_sq_adapter_0 ] + + # Create instance: axis_register_slice_10, and set properties + set axis_register_slice_10 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_10 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_10 + + + # Create interface connections + connect_bd_intf_net -intf_net axis_data_fifo_2_M_AXIS [get_bd_intf_pins axis_data_fifo_2/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_rq_wr_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_3_M_AXIS [get_bd_intf_pins axis_data_fifo_3/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_rq_rd_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_4_M_AXIS [get_bd_intf_pins axis_data_fifo_4/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cclo_sq_wr_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_5_M_AXIS [get_bd_intf_pins axis_data_fifo_5/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cclo_sq_rd_cmd] + connect_bd_intf_net -intf_net axis_register_slice_10_M_AXIS [get_bd_intf_pins cyt_sq_rd_cmd] [get_bd_intf_pins axis_register_slice_10/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_5_M_AXIS [get_bd_intf_pins S_AXIS1] [get_bd_intf_pins axis_data_fifo_3/S_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_9_M_AXIS [get_bd_intf_pins cyt_sq_wr_cmd1] [get_bd_intf_pins axis_register_slice_9/M_AXIS] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_mm2s_cmd [get_bd_intf_pins dma0_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm_cmd [get_bd_intf_pins dma0_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_s2mm_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_mm2s_cmd [get_bd_intf_pins dma1_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm_cmd [get_bd_intf_pins dma1_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_s2mm_cmd] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_rd [get_bd_intf_pins S_AXIS3] [get_bd_intf_pins axis_data_fifo_5/S_AXIS] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_wr [get_bd_intf_pins S_AXIS2] [get_bd_intf_pins axis_data_fifo_4/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_rd_cmd [get_bd_intf_pins axis_register_slice_10/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_sq_rd_cmd] + connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_rd_cmd] [get_bd_intf_pins axis_register_slice_10/S_AXIS] [get_bd_intf_pins S_AXIS4] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_wr_cmd [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_sq_wr_cmd] [get_bd_intf_pins axis_register_slice_9/S_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_wr_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_sq_wr_cmd] [get_bd_intf_pins cyt_sq_wr_cmd] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_mm2s_meta [get_bd_intf_pins dma0_mm2s_meta] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_mm2s_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_s2mm_meta [get_bd_intf_pins dma0_s2mm_meta] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_s2mm_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_mm2s_meta [get_bd_intf_pins dma1_mm2s_meta] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_mm2s_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_s2mm_meta [get_bd_intf_pins dma1_s2mm_meta] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_s2mm_meta] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_1 [get_bd_intf_pins S_AXIS] [get_bd_intf_pins axis_data_fifo_2/S_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_data_fifo_2/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_3/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_4/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_5/s_axis_aclk] \ + [get_bd_pins axis_register_slice_10/aclk] \ + [get_bd_pins cyt_dma_sq_adapter_0/ap_clk] \ + [get_bd_pins axis_register_slice_9/aclk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_register_slice_10/aresetn] \ + [get_bd_pins cyt_dma_sq_adapter_0/ap_rst_n] \ + [get_bd_pins axis_register_slice_9/aresetn] + connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_3/s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_4/s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_5/s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_2/s_axis_aresetn] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: completion_conversion +proc create_hier_cell_completion_conversion { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_completion_conversion() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_wr_sts_0 + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 cq_sts + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 dm1_meta + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS1 + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 dm0_meta + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_rd_sts_0 + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 cq_sts1 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS2 + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 dm1_meta1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dm0_sts + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dm1_sts + + create_bd_intf_pin -mode Monitor -vlnv xilinx.com:interface:axis_rtl:1.0 M_AXIS + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 S_AXIS3 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dm0_sts1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 dm1_sts1 + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst aresetn + create_bd_pin -dir I -type rst ap_rst_n_0 + + # Create instance: axis_register_slice_3, and set properties + set axis_register_slice_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_3 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_3 + + + # Create instance: axis_data_fifo_6, and set properties + set axis_data_fifo_6 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_6 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_6 + + + # Create instance: axis_data_fifo_7, and set properties + set axis_data_fifo_7 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_7 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_7 + + + # Create instance: axis_register_slice_4, and set properties + set axis_register_slice_4 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_4 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_4 + + + # Create instance: axis_data_fifo_8, and set properties + set axis_data_fifo_8 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_8 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_8 + + + # Create instance: cyt_cq_dm_sts_conver_0, and set properties + set cyt_cq_dm_sts_conver_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_cq_dm_sts_converter:1.0 cyt_cq_dm_sts_conver_0 ] + + # Create instance: axis_data_fifo_9, and set properties + set axis_data_fifo_9 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_9 ] + set_property CONFIG.FIFO_DEPTH {16} $axis_data_fifo_9 + + + # Create instance: cyt_cq_dm_sts_conver_1, and set properties + set cyt_cq_dm_sts_conver_1 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_cq_dm_sts_converter:1.0 cyt_cq_dm_sts_conver_1 ] + + # Create interface connections + connect_bd_intf_net -intf_net axis_data_fifo_6_M_AXIS [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_meta] [get_bd_intf_pins axis_data_fifo_6/M_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_6_M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_meta] [get_bd_intf_pins dm1_meta] + connect_bd_intf_net -intf_net axis_data_fifo_7_M_AXIS [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_meta] [get_bd_intf_pins axis_data_fifo_7/M_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_7_M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_meta] [get_bd_intf_pins dm0_meta] + connect_bd_intf_net -intf_net axis_data_fifo_8_M_AXIS [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_meta] [get_bd_intf_pins axis_data_fifo_8/M_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_8_M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_meta] [get_bd_intf_pins dm1_meta1] + connect_bd_intf_net -intf_net axis_data_fifo_9_M_AXIS [get_bd_intf_pins axis_data_fifo_9/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm0_meta] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_9_M_AXIS] [get_bd_intf_pins axis_data_fifo_9/M_AXIS] [get_bd_intf_pins M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_3_M_AXIS [get_bd_intf_pins cyt_cq_dm_sts_conver_0/cq_sts] [get_bd_intf_pins axis_register_slice_3/M_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_register_slice_3_M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/cq_sts] [get_bd_intf_pins cq_sts] + connect_bd_intf_net -intf_net axis_register_slice_4_M_AXIS [get_bd_intf_pins cyt_cq_dm_sts_conver_1/cq_sts] [get_bd_intf_pins axis_register_slice_4/M_AXIS] + connect_bd_intf_net -intf_net [get_bd_intf_nets axis_register_slice_4_M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/cq_sts] [get_bd_intf_pins cq_sts1] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm0_sts [get_bd_intf_pins dm0_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm0_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm1_sts [get_bd_intf_pins dm1_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm0_sts [get_bd_intf_pins dm0_sts1] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm1_sts [get_bd_intf_pins dm1_sts1] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_sts] + connect_bd_intf_net -intf_net cyt_cq_rd_sts_0_1 [get_bd_intf_pins cyt_cq_rd_sts_0] [get_bd_intf_pins axis_register_slice_4/S_AXIS] + connect_bd_intf_net -intf_net cyt_cq_wr_sts_0_1 [get_bd_intf_pins cyt_cq_wr_sts_0] [get_bd_intf_pins axis_register_slice_3/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_mm2s_meta [get_bd_intf_pins S_AXIS1] [get_bd_intf_pins axis_data_fifo_7/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_s2mm_meta [get_bd_intf_pins S_AXIS3] [get_bd_intf_pins axis_data_fifo_9/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_mm2s_meta [get_bd_intf_pins S_AXIS] [get_bd_intf_pins axis_data_fifo_6/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_s2mm_meta [get_bd_intf_pins S_AXIS2] [get_bd_intf_pins axis_data_fifo_8/S_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_data_fifo_6/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_7/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_8/s_axis_aclk] \ + [get_bd_pins axis_data_fifo_9/s_axis_aclk] \ + [get_bd_pins axis_register_slice_4/aclk] \ + [get_bd_pins cyt_cq_dm_sts_conver_0/ap_clk] \ + [get_bd_pins cyt_cq_dm_sts_conver_1/ap_clk] \ + [get_bd_pins axis_register_slice_3/aclk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_data_fifo_7/s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_8/s_axis_aresetn] \ + [get_bd_pins axis_data_fifo_9/s_axis_aresetn] \ + [get_bd_pins cyt_cq_dm_sts_conver_0/ap_rst_n] \ + [get_bd_pins cyt_cq_dm_sts_conver_1/ap_rst_n] \ + [get_bd_pins axis_data_fifo_6/s_axis_aresetn] + connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins aresetn] \ + [get_bd_pins axis_register_slice_4/aresetn] \ + [get_bd_pins axis_register_slice_3/aresetn] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: rrsp_arbitration +proc create_hier_cell_rrsp_arbitration { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_rrsp_arbitration() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_2 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_0 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_1 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_meta_0 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_0 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_meta_1 + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rq_wr + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst ap_rst_n_0 + create_bd_pin -dir I -type rst aresetn + + # Create instance: axis_register_slice_0, and set properties + set axis_register_slice_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_0 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_0 + + + # Create instance: axis_register_slice_1, and set properties + set axis_register_slice_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_1 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_1 + + + # Create instance: cyt_rdma_arbiter_0, and set properties + set cyt_rdma_arbiter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_arbiter:1.0 cyt_rdma_arbiter_0 ] + + # Create instance: axis_register_slice_14, and set properties + set axis_register_slice_14 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_14 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_14 + + + # Create instance: axis_register_slice_2, and set properties + set axis_register_slice_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_2 ] + set_property CONFIG.REG_CONFIG {16} $axis_register_slice_2 + + + # Create interface connections + connect_bd_intf_net -intf_net axis_register_slice_0_M_AXIS [get_bd_intf_pins axis_register_slice_0/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis_0] + connect_bd_intf_net -intf_net axis_register_slice_14_M_AXIS [get_bd_intf_pins m_axis_host_2] [get_bd_intf_pins axis_register_slice_14/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_1_M_AXIS [get_bd_intf_pins axis_register_slice_1/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis_1] + connect_bd_intf_net -intf_net axis_register_slice_2_M_AXIS [get_bd_intf_pins axis_register_slice_2/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_meta] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_0 [get_bd_intf_pins m_axis_0] [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_0] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_1 [get_bd_intf_pins axis_register_slice_14/S_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_1] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_0 [get_bd_intf_pins m_meta_0] [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_0] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_1 [get_bd_intf_pins m_meta_1] [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_1] + connect_bd_intf_net -intf_net cyt_rq_wr_1 [get_bd_intf_pins cyt_rq_wr] [get_bd_intf_pins axis_register_slice_2/S_AXIS] + connect_bd_intf_net -intf_net cyt_rrsp_recv_0_1 [get_bd_intf_pins cyt_rrsp_recv_0] [get_bd_intf_pins axis_register_slice_0/S_AXIS] + connect_bd_intf_net -intf_net cyt_rrsp_recv_1_1 [get_bd_intf_pins cyt_rrsp_recv_1] [get_bd_intf_pins axis_register_slice_1/S_AXIS] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins axis_register_slice_0/aclk] \ + [get_bd_pins axis_register_slice_1/aclk] \ + [get_bd_pins axis_register_slice_14/aclk] \ + [get_bd_pins axis_register_slice_2/aclk] \ + [get_bd_pins cyt_rdma_arbiter_0/ap_clk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins axis_register_slice_0/aresetn] \ + [get_bd_pins axis_register_slice_1/aresetn] \ + [get_bd_pins axis_register_slice_14/aresetn] \ + [get_bd_pins cyt_rdma_arbiter_0/ap_rst_n] + connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins aresetn] \ + [get_bd_pins axis_register_slice_2/aresetn] + + # Restore current instance + current_bd_instance $oldCurInst +} + +# Hierarchical cell: cclo +proc create_hier_cell_cclo { parentCell nameHier } { + + variable script_folder + + if { $parentCell eq "" || $nameHier eq "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2092 -severity "ERROR" "create_hier_cell_cclo() - Empty argument(s)!"} + return + } + + # Get object for parentCell + set parentObj [get_bd_cells $parentCell] + if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return + } + + # Make sure parentObj is hier blk + set parentType [get_property TYPE $parentObj] + if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return + } + + # Save current instance; Restore later + set oldCurInst [current_bd_instance .] + + # Set parent object as current + current_bd_instance $parentObj + + # Create cell and set as current instance + set hier_obj [create_bd_cell -type hier $nameHier] + current_bd_instance $hier_obj + + # Create interface pins + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 S00_AXI_0 + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma0_mm2s_cmd + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma0_s2mm + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma0_s2mm_cmd + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma1_mm2s_cmd + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma1_s2mm + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_dma1_s2mm_cmd + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_eth_tx_data + + create_bd_intf_pin -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_rdma_sq + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma0_mm2s + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma0_mm2s_sts + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma0_s2mm_sts + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma1_mm2s + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma1_mm2s_sts + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_dma1_s2mm_sts + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_eth_notification + + create_bd_intf_pin -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_eth_rx_data + + + # Create pins + create_bd_pin -dir I -type clk ap_clk_0 + create_bd_pin -dir I -type rst ap_rst_n_0 + + # Create instance: hostctrl_0, and set properties + set hostctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:hostctrl:1.0 hostctrl_0 ] + + # Create instance: smartconnect_0, and set properties + set smartconnect_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 ] + set_property -dict [list \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + ] $smartconnect_0 + + + # Create instance: reduce_ops_0, and set properties + set reduce_ops_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:reduce_ops:1.0 reduce_ops_0 ] + + # Create instance: ccl_offload_0, and set properties + set ccl_offload_0 [ create_bd_cell -type ip -vlnv Xilinx:ACCL:ccl_offload:1.0 ccl_offload_0 ] + + # Create interface connections + connect_bd_intf_net -intf_net S00_AXI_0_1 [get_bd_intf_pins S00_AXI_0] [get_bd_intf_pins smartconnect_0/S00_AXI] + connect_bd_intf_net -intf_net axis_register_slice_7_M_AXIS [get_bd_intf_pins s_axis_dma0_mm2s] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s] + connect_bd_intf_net -intf_net axis_register_slice_8_M_AXIS [get_bd_intf_pins s_axis_dma1_mm2s] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_arith_op0 [get_bd_intf_pins ccl_offload_0/m_axis_arith_op0] [get_bd_intf_pins reduce_ops_0/in0] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_arith_op1 [get_bd_intf_pins ccl_offload_0/m_axis_arith_op1] [get_bd_intf_pins reduce_ops_0/in1] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_call_ack [get_bd_intf_pins ccl_offload_0/m_axis_call_ack] [get_bd_intf_pins hostctrl_0/sts] + # connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression0 [get_bd_intf_pins ccl_offload_0/m_axis_compression0] [get_bd_intf_pins ccl_offload_0/s_axis_compression0] + # connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression1 [get_bd_intf_pins ccl_offload_0/m_axis_compression1] [get_bd_intf_pins ccl_offload_0/s_axis_compression1] + # connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression2 [get_bd_intf_pins ccl_offload_0/m_axis_compression2] [get_bd_intf_pins ccl_offload_0/s_axis_compression2] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_mm2s_cmd [get_bd_intf_pins m_axis_dma0_mm2s_cmd] [get_bd_intf_pins ccl_offload_0/m_axis_dma0_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm [get_bd_intf_pins m_axis_dma0_s2mm] [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm_cmd [get_bd_intf_pins m_axis_dma0_s2mm_cmd] [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_mm2s_cmd [get_bd_intf_pins m_axis_dma1_mm2s_cmd] [get_bd_intf_pins ccl_offload_0/m_axis_dma1_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm [get_bd_intf_pins m_axis_dma1_s2mm] [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm_cmd [get_bd_intf_pins m_axis_dma1_s2mm_cmd] [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_eth_tx_data [get_bd_intf_pins m_axis_eth_tx_data] [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_krnl [get_bd_intf_pins ccl_offload_0/m_axis_krnl] [get_bd_intf_pins ccl_offload_0/s_axis_krnl] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_rdma_sq [get_bd_intf_pins m_axis_rdma_sq] [get_bd_intf_pins ccl_offload_0/m_axis_rdma_sq] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm0_sts [get_bd_intf_pins s_axis_dma0_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_s2mm_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm1_sts [get_bd_intf_pins s_axis_dma1_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_s2mm_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm0_sts [get_bd_intf_pins s_axis_dma0_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm1_sts [get_bd_intf_pins s_axis_dma1_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s_sts] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_0 [get_bd_intf_pins s_axis_eth_rx_data] [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_0 [get_bd_intf_pins s_axis_eth_notification] [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] + connect_bd_intf_net -intf_net hostctrl_0_cmd [get_bd_intf_pins ccl_offload_0/s_axis_call_req] [get_bd_intf_pins hostctrl_0/cmd] + connect_bd_intf_net -intf_net reduce_ops_0_out_r [get_bd_intf_pins ccl_offload_0/s_axis_arith_res] [get_bd_intf_pins reduce_ops_0/out_r] + connect_bd_intf_net -intf_net smartconnect_0_M00_AXI [get_bd_intf_pins hostctrl_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M00_AXI] + connect_bd_intf_net -intf_net smartconnect_0_M01_AXI [get_bd_intf_pins ccl_offload_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M01_AXI] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_pins ap_clk_0] \ + [get_bd_pins ccl_offload_0/ap_clk] \ + [get_bd_pins reduce_ops_0/ap_clk] \ + [get_bd_pins smartconnect_0/aclk] \ + [get_bd_pins hostctrl_0/ap_clk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_pins ap_rst_n_0] \ + [get_bd_pins ccl_offload_0/ap_rst_n] \ + [get_bd_pins reduce_ops_0/ap_rst_n] \ + [get_bd_pins smartconnect_0/aresetn] \ + [get_bd_pins hostctrl_0/ap_rst_n] + + # Restore current instance + current_bd_instance $oldCurInst +} + +### MAIN ### + +variable parentCell +set parentCell [get_bd_cells /] + +# Get object for parentCell +set parentObj [get_bd_cells $parentCell] +if { $parentObj == "" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2090 -severity "ERROR" "Unable to find parent cell <$parentCell>!"} + return } +# Make sure parentObj is hier blk +set parentType [get_property TYPE $parentObj] +if { $parentType ne "hier" } { + catch {common::send_gid_msg -ssname BD::TCL -id 2091 -severity "ERROR" "Parent <$parentObj> has TYPE = <$parentType>. Expected to be ."} + return +} + +# Save current instance; Restore later +set oldCurInst [current_bd_instance .] + +# Set parent object as current +current_bd_instance $parentObj + -# externalize DMA data streams +# Create interface ports +set S00_AXI_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 S00_AXI_0 ] +set_property -dict [ list \ + CONFIG.ADDR_WIDTH {14} \ + CONFIG.ARUSER_WIDTH {0} \ + CONFIG.AWUSER_WIDTH {0} \ + CONFIG.BUSER_WIDTH {0} \ + CONFIG.DATA_WIDTH {32} \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {0} \ + CONFIG.HAS_CACHE {0} \ + CONFIG.HAS_LOCK {0} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.ID_WIDTH {0} \ + CONFIG.NUM_READ_OUTSTANDING {1} \ + CONFIG.NUM_READ_THREADS {1} \ + CONFIG.NUM_WRITE_OUTSTANDING {1} \ + CONFIG.NUM_WRITE_THREADS {1} \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + CONFIG.RUSER_BITS_PER_BYTE {0} \ + CONFIG.RUSER_WIDTH {0} \ + CONFIG.WUSER_BITS_PER_BYTE {0} \ + CONFIG.WUSER_WIDTH {0} \ + ] $S00_AXI_0 + +set cyt_cq_rd_sts_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_rd_sts_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {4} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_cq_rd_sts_0 + +set cyt_cq_wr_sts_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_wr_sts_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {4} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_cq_wr_sts_0 + +set cyt_rq_rd [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rq_rd ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {16} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rq_rd + +set cyt_rq_wr [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rq_wr ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {16} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rq_wr + +set cyt_rreq_recv_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {0} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rreq_recv_0 + +set cyt_rreq_recv_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_1 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rreq_recv_1 + +set cyt_rreq_send_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rreq_send_0 + +set cyt_rreq_send_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_1 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rreq_send_1 + +set cyt_rrsp_recv_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rrsp_recv_0 + +set cyt_rrsp_recv_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_1 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rrsp_recv_1 + +set cyt_rrsp_send_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_send_0 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rrsp_send_0 + +set cyt_rrsp_send_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_send_1 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rrsp_send_1 + +set cyt_sq_rd_cmd [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_rd_cmd ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_sq_rd_cmd + +set cyt_sq_wr_cmd [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_wr_cmd ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_sq_wr_cmd set m_axis_host_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_0 +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_0 + set m_axis_host_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_1 -set m_axis_card_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_0 -set m_axis_card_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_1 +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_1 + +set m_axis_host_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_2 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_2 set s_axis_host_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_0 +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_0 + set s_axis_host_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_1 -set s_axis_card_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_0 -set s_axis_card_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_1 - - -# s_axis_host_0 and s_axis_card_0 multiplexed to single s_axis_dma0_mm2s stream, round-robin by tlast -connect_bd_intf_net [get_bd_intf_ports s_axis_host_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports s_axis_card_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S01_AXIS] -connect_bd_intf_net [get_bd_intf_pins axis_switch_2_to_1_inst_0/M00_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_0/aresetn] - -create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_0 -set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_0] -set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_0] -connect_bd_net [get_bd_pins xlconstant_0/dout] [get_bd_pins axis_switch_2_to_1_inst_0/s_req_suppress] - -# s_axis_host_1 and s_axis_card_1 multiplexed to single s_axis_dma1_mm2s stream, round-robin by tlast -connect_bd_intf_net [get_bd_intf_ports s_axis_host_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports s_axis_card_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S01_AXIS] -connect_bd_intf_net [get_bd_intf_pins axis_switch_2_to_1_inst_1/M00_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_1/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_1/aresetn] - -create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_1 -set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_1] -set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_1] -connect_bd_net [get_bd_pins xlconstant_1/dout] [get_bd_pins axis_switch_2_to_1_inst_1/s_req_suppress] - -# m_axis_dma0_s2mm multiplex to m_axis_host_0 and m_axis_card_0 according to the strm flag encoded in m_axis_dma0_s2mm tdest -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm] [get_bd_intf_pins axis_switch_1_to_2_inst_0/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_card_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_host_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M01_AXIS] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_0/aresetn] - -# m_axis_dma1_s2mm multiplex to m_axis_host_1 and m_axis_card_1 according to the strm flag encoded in m_axis_dma1_s2mm tdest -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm] [get_bd_intf_pins axis_switch_1_to_2_inst_1/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_card_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_host_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M01_AXIS] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_1/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_1/aresetn] - - - -# connect up AXI lite -create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 -set_property -dict [list CONFIG.NUM_MI {2} CONFIG.NUM_SI {1}] [get_bd_cells smartconnect_0] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins smartconnect_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins smartconnect_0/aresetn] -connect_bd_intf_net [get_bd_intf_pins hostctrl_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M00_AXI] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M01_AXI] -make_bd_intf_pins_external [get_bd_intf_pins smartconnect_0/S00_AXI] -set_property -dict [list CONFIG.ADDR_WIDTH {16}] [get_bd_intf_ports S00_AXI_0] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_1 + +set s_axis_host_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_2 ] +set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_2 + + +# Create ports +set ap_clk_0 [ create_bd_port -dir I -type clk -freq_hz 250000000 ap_clk_0 ] +set_property -dict [ list \ + CONFIG.ASSOCIATED_BUSIF {cyt_cq_wr_sts_0:cyt_cq_rd_sts_0:cyt_sq_wr_cmd:cyt_sq_rd_cmd:m_axis_host_2:s_axis_host_2:cyt_rq_wr:m_axis_host_0:m_axis_host_1:s_axis_host_0:s_axis_host_1:S00_AXI_0:cyt_rreq_send_0:cyt_rreq_send_1:cyt_rrsp_recv_0:cyt_rrsp_recv_1:cyt_rrsp_send_0:cyt_rrsp_send_1:cyt_rq_rd:cyt_rreq_recv_0:cyt_rreq_recv_1} \ +] $ap_clk_0 +set ap_rst_n_0 [ create_bd_port -dir I -type rst ap_rst_n_0 ] + +# Create instance: axis_register_slice_5, and set properties +set axis_register_slice_5 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_5 ] +set_property CONFIG.REG_CONFIG {16} $axis_register_slice_5 + + +# Create instance: rst_ap_clk_0_250M, and set properties +set rst_ap_clk_0_250M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ap_clk_0_250M ] + +# Create instance: system_ila_0, and set properties +set system_ila_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:system_ila:1.1 system_ila_0 ] +set_property -dict [list \ + CONFIG.C_INPUT_PIPE_STAGES {2} \ + CONFIG.C_MON_TYPE {INTERFACE} \ + CONFIG.C_NUM_MONITOR_SLOTS {16} \ + CONFIG.C_SLOT_0_APC_EN {0} \ + CONFIG.C_SLOT_0_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_0_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_0_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_0_TYPE {0} \ + CONFIG.C_SLOT_10_APC_EN {0} \ + CONFIG.C_SLOT_10_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_10_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_10_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_10_TYPE {0} \ + CONFIG.C_SLOT_11_APC_EN {0} \ + CONFIG.C_SLOT_11_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_11_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_11_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_11_TYPE {0} \ + CONFIG.C_SLOT_12_APC_EN {0} \ + CONFIG.C_SLOT_12_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_12_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_12_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_12_TYPE {0} \ + CONFIG.C_SLOT_13_APC_EN {0} \ + CONFIG.C_SLOT_13_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_13_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_13_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_13_TYPE {0} \ + CONFIG.C_SLOT_14_APC_EN {0} \ + CONFIG.C_SLOT_14_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_14_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_14_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_14_TYPE {0} \ + CONFIG.C_SLOT_15_APC_EN {0} \ + CONFIG.C_SLOT_15_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_15_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_15_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_1_APC_EN {0} \ + CONFIG.C_SLOT_1_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_1_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_1_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_1_TYPE {0} \ + CONFIG.C_SLOT_2_APC_EN {0} \ + CONFIG.C_SLOT_2_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_2_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_2_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_2_TYPE {0} \ + CONFIG.C_SLOT_3_APC_EN {0} \ + CONFIG.C_SLOT_3_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_3_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_3_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_3_TYPE {0} \ + CONFIG.C_SLOT_4_APC_EN {0} \ + CONFIG.C_SLOT_4_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_4_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_4_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_4_TYPE {0} \ + CONFIG.C_SLOT_5_APC_EN {0} \ + CONFIG.C_SLOT_5_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_5_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_5_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_5_TYPE {0} \ + CONFIG.C_SLOT_6_APC_EN {0} \ + CONFIG.C_SLOT_6_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_6_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_6_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_6_TYPE {0} \ + CONFIG.C_SLOT_7_APC_EN {0} \ + CONFIG.C_SLOT_7_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_7_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_7_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_7_TYPE {0} \ + CONFIG.C_SLOT_8_APC_EN {0} \ + CONFIG.C_SLOT_8_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_8_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_8_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_8_TYPE {0} \ + CONFIG.C_SLOT_9_APC_EN {0} \ + CONFIG.C_SLOT_9_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_9_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_9_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_9_TYPE {0} \ +] $system_ila_0 + + +# Create instance: cclo +create_hier_cell_cclo [current_bd_instance .] cclo + +# Create instance: rrsp_arbitration +create_hier_cell_rrsp_arbitration [current_bd_instance .] rrsp_arbitration + +# Create instance: completion_conversion +create_hier_cell_completion_conversion [current_bd_instance .] completion_conversion + +# Create instance: request_conversion +create_hier_cell_request_conversion [current_bd_instance .] request_conversion + +# Create instance: sq_conversion +create_hier_cell_sq_conversion [current_bd_instance .] sq_conversion + +# Create instance: local_dma_input_muxing +create_hier_cell_local_dma_input_muxing [current_bd_instance .] local_dma_input_muxing + +# Create instance: local_dma_output_demuxing +create_hier_cell_local_dma_output_demuxing [current_bd_instance .] local_dma_output_demuxing + +# Create interface connections +connect_bd_intf_net -intf_net S00_AXI_0_1 [get_bd_intf_ports S00_AXI_0] [get_bd_intf_pins cclo/S00_AXI_0] +connect_bd_intf_net -intf_net axis_data_fifo_6_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_0_AXIS] [get_bd_intf_pins completion_conversion/dm1_meta] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_6_M_AXIS] +connect_bd_intf_net -intf_net axis_data_fifo_7_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_1_AXIS] [get_bd_intf_pins completion_conversion/dm0_meta] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_7_M_AXIS] +connect_bd_intf_net -intf_net axis_data_fifo_8_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_6_AXIS] [get_bd_intf_pins completion_conversion/dm1_meta1] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_8_M_AXIS] +connect_bd_intf_net -intf_net axis_data_fifo_9_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_7_AXIS] [get_bd_intf_pins completion_conversion/M_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_9_M_AXIS] +connect_bd_intf_net -intf_net axis_register_slice_10_M_AXIS [get_bd_intf_ports cyt_sq_rd_cmd] [get_bd_intf_pins request_conversion/cyt_sq_rd_cmd] +connect_bd_intf_net -intf_net axis_register_slice_3_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_8_AXIS] [get_bd_intf_pins completion_conversion/cq_sts] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_register_slice_3_M_AXIS] +connect_bd_intf_net -intf_net axis_register_slice_4_M_AXIS [get_bd_intf_pins system_ila_0/SLOT_9_AXIS] [get_bd_intf_pins completion_conversion/cq_sts1] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_register_slice_4_M_AXIS] +connect_bd_intf_net -intf_net axis_register_slice_5_M_AXIS [get_bd_intf_pins request_conversion/S_AXIS1] [get_bd_intf_pins axis_register_slice_5/M_AXIS] +connect_bd_intf_net -intf_net axis_register_slice_7_M_AXIS [get_bd_intf_pins local_dma_input_muxing/M_AXIS] [get_bd_intf_pins cclo/s_axis_dma0_mm2s] +connect_bd_intf_net -intf_net axis_register_slice_8_M_AXIS [get_bd_intf_pins local_dma_input_muxing/M_AXIS1] [get_bd_intf_pins cclo/s_axis_dma1_mm2s] +connect_bd_intf_net -intf_net axis_register_slice_9_M_AXIS [get_bd_intf_ports cyt_sq_wr_cmd] [get_bd_intf_pins request_conversion/cyt_sq_wr_cmd1] +connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_0_M01_AXIS [get_bd_intf_ports m_axis_host_0] [get_bd_intf_pins local_dma_output_demuxing/m_axis_host_0] +connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_1_M01_AXIS [get_bd_intf_ports m_axis_host_1] [get_bd_intf_pins local_dma_output_demuxing/m_axis_host_1] +connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_2_M01_AXIS [get_bd_intf_ports m_axis_host_2] [get_bd_intf_pins rrsp_arbitration/m_axis_host_2] +connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M00_AXIS [get_bd_intf_ports cyt_rreq_send_0] [get_bd_intf_pins sq_conversion/cyt_rreq_send_0] +connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M01_AXIS [get_bd_intf_ports cyt_rreq_send_1] [get_bd_intf_pins sq_conversion/cyt_rreq_send_1] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_mm2s_cmd [get_bd_intf_pins cclo/m_axis_dma0_mm2s_cmd] [get_bd_intf_pins request_conversion/dma0_mm2s_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma0_mm2s_cmd] [get_bd_intf_pins cclo/m_axis_dma0_mm2s_cmd] [get_bd_intf_pins system_ila_0/SLOT_2_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma0_mm2s_cmd] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm [get_bd_intf_pins local_dma_output_demuxing/S_AXIS] [get_bd_intf_pins cclo/m_axis_dma0_s2mm] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm_cmd [get_bd_intf_pins cclo/m_axis_dma0_s2mm_cmd] [get_bd_intf_pins request_conversion/dma0_s2mm_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma0_s2mm_cmd] [get_bd_intf_pins cclo/m_axis_dma0_s2mm_cmd] [get_bd_intf_pins system_ila_0/SLOT_3_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma0_s2mm_cmd] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_mm2s_cmd [get_bd_intf_pins cclo/m_axis_dma1_mm2s_cmd] [get_bd_intf_pins request_conversion/dma1_mm2s_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma1_mm2s_cmd] [get_bd_intf_pins cclo/m_axis_dma1_mm2s_cmd] [get_bd_intf_pins system_ila_0/SLOT_4_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma1_mm2s_cmd] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm [get_bd_intf_pins local_dma_output_demuxing/S_AXIS1] [get_bd_intf_pins cclo/m_axis_dma1_s2mm] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm_cmd [get_bd_intf_pins cclo/m_axis_dma1_s2mm_cmd] [get_bd_intf_pins request_conversion/dma1_s2mm_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma1_s2mm_cmd] [get_bd_intf_pins cclo/m_axis_dma1_s2mm_cmd] [get_bd_intf_pins system_ila_0/SLOT_5_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma1_s2mm_cmd] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_eth_tx_data [get_bd_intf_pins cclo/m_axis_eth_tx_data] [get_bd_intf_pins sq_conversion/s_axis_cclo] +connect_bd_intf_net -intf_net ccl_offload_0_m_axis_rdma_sq [get_bd_intf_pins cclo/m_axis_rdma_sq] [get_bd_intf_pins sq_conversion/cclo_sq] +connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_rd [get_bd_intf_pins request_conversion/S_AXIS3] [get_bd_intf_pins sq_conversion/cyt_sq_rd] +connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_wr [get_bd_intf_pins request_conversion/S_AXIS2] [get_bd_intf_pins sq_conversion/cyt_sq_wr] +connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm0_sts [get_bd_intf_pins cclo/s_axis_dma0_s2mm_sts] [get_bd_intf_pins completion_conversion/dm0_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm0_sts] [get_bd_intf_pins cclo/s_axis_dma0_s2mm_sts] [get_bd_intf_pins system_ila_0/SLOT_12_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm0_sts] +connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm1_sts [get_bd_intf_pins cclo/s_axis_dma1_s2mm_sts] [get_bd_intf_pins completion_conversion/dm1_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm1_sts] [get_bd_intf_pins cclo/s_axis_dma1_s2mm_sts] [get_bd_intf_pins system_ila_0/SLOT_13_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm1_sts] +connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm0_sts [get_bd_intf_pins cclo/s_axis_dma0_mm2s_sts] [get_bd_intf_pins completion_conversion/dm0_sts1] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm0_sts] [get_bd_intf_pins cclo/s_axis_dma0_mm2s_sts] [get_bd_intf_pins system_ila_0/SLOT_14_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm0_sts] +connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm1_sts [get_bd_intf_pins cclo/s_axis_dma1_mm2s_sts] [get_bd_intf_pins completion_conversion/dm1_sts1] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm1_sts] [get_bd_intf_pins cclo/s_axis_dma1_mm2s_sts] [get_bd_intf_pins system_ila_0/SLOT_15_AXIS] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm1_sts] +connect_bd_intf_net -intf_net cyt_cq_rd_sts_0_1 [get_bd_intf_ports cyt_cq_rd_sts_0] [get_bd_intf_pins completion_conversion/cyt_cq_rd_sts_0] +connect_bd_intf_net -intf_net cyt_cq_wr_sts_0_1 [get_bd_intf_ports cyt_cq_wr_sts_0] [get_bd_intf_pins completion_conversion/cyt_cq_wr_sts_0] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_rd_cmd [get_bd_intf_pins system_ila_0/SLOT_10_AXIS] [get_bd_intf_pins request_conversion/S_AXIS4] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_rd_cmd] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_wr_cmd [get_bd_intf_pins system_ila_0/SLOT_11_AXIS] [get_bd_intf_pins request_conversion/cyt_sq_wr_cmd] +set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_wr_cmd] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_mm2s_meta [get_bd_intf_pins completion_conversion/S_AXIS1] [get_bd_intf_pins request_conversion/dma0_mm2s_meta] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_s2mm_meta [get_bd_intf_pins completion_conversion/S_AXIS3] [get_bd_intf_pins request_conversion/dma0_s2mm_meta] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_mm2s_meta [get_bd_intf_pins completion_conversion/S_AXIS] [get_bd_intf_pins request_conversion/dma1_mm2s_meta] +connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_s2mm_meta [get_bd_intf_pins completion_conversion/S_AXIS2] [get_bd_intf_pins request_conversion/dma1_s2mm_meta] +connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_0 [get_bd_intf_pins cclo/s_axis_eth_rx_data] [get_bd_intf_pins rrsp_arbitration/m_axis_0] +connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_0 [get_bd_intf_pins cclo/s_axis_eth_notification] [get_bd_intf_pins rrsp_arbitration/m_meta_0] +connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_1 [get_bd_intf_pins request_conversion/S_AXIS] [get_bd_intf_pins rrsp_arbitration/m_meta_1] +connect_bd_intf_net -intf_net cyt_rq_rd_1 [get_bd_intf_ports cyt_rq_rd] [get_bd_intf_pins axis_register_slice_5/S_AXIS] +connect_bd_intf_net -intf_net cyt_rq_wr_1 [get_bd_intf_ports cyt_rq_wr] [get_bd_intf_pins rrsp_arbitration/cyt_rq_wr] +connect_bd_intf_net -intf_net cyt_rreq_recv_0_1 [get_bd_intf_ports cyt_rreq_recv_0] [get_bd_intf_pins sq_conversion/cyt_rreq_recv_0] +connect_bd_intf_net -intf_net cyt_rreq_recv_1_1 [get_bd_intf_ports cyt_rreq_recv_1] [get_bd_intf_pins sq_conversion/cyt_rreq_recv_1] +connect_bd_intf_net -intf_net cyt_rrsp_recv_0_1 [get_bd_intf_ports cyt_rrsp_recv_0] [get_bd_intf_pins rrsp_arbitration/cyt_rrsp_recv_0] +connect_bd_intf_net -intf_net cyt_rrsp_recv_1_1 [get_bd_intf_ports cyt_rrsp_recv_1] [get_bd_intf_pins rrsp_arbitration/cyt_rrsp_recv_1] +connect_bd_intf_net -intf_net s_axis_host_0_1 [get_bd_intf_ports s_axis_host_0] [get_bd_intf_pins local_dma_input_muxing/s_axis_host_0] +connect_bd_intf_net -intf_net s_axis_host_1_1 [get_bd_intf_ports s_axis_host_1] [get_bd_intf_pins local_dma_input_muxing/s_axis_host_1] + +# Create port connections +connect_bd_net -net ap_clk_0_1 [get_bd_ports ap_clk_0] \ +[get_bd_pins axis_register_slice_5/aclk] \ +[get_bd_pins rst_ap_clk_0_250M/slowest_sync_clk] \ +[get_bd_pins system_ila_0/clk] \ +[get_bd_pins cclo/ap_clk_0] \ +[get_bd_pins rrsp_arbitration/ap_clk_0] \ +[get_bd_pins completion_conversion/ap_clk_0] \ +[get_bd_pins request_conversion/ap_clk_0] \ +[get_bd_pins sq_conversion/ap_clk_0] \ +[get_bd_pins local_dma_input_muxing/ap_clk_0] \ +[get_bd_pins local_dma_output_demuxing/ap_clk_0] +connect_bd_net -net ap_rst_n_0_1 [get_bd_ports ap_rst_n_0] \ +[get_bd_pins rst_ap_clk_0_250M/ext_reset_in] \ +[get_bd_pins system_ila_0/resetn] \ +[get_bd_pins cclo/ap_rst_n_0] \ +[get_bd_pins rrsp_arbitration/ap_rst_n_0] \ +[get_bd_pins completion_conversion/ap_rst_n_0] \ +[get_bd_pins request_conversion/ap_rst_n_0] \ +[get_bd_pins sq_conversion/ap_rst_n_0] \ +[get_bd_pins local_dma_input_muxing/ap_rst_n_0] \ +[get_bd_pins local_dma_output_demuxing/ap_rst_n_0] +connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins rst_ap_clk_0_250M/peripheral_aresetn] \ +[get_bd_pins axis_register_slice_5/aresetn] \ +[get_bd_pins rrsp_arbitration/aresetn] \ +[get_bd_pins completion_conversion/aresetn] \ +[get_bd_pins request_conversion/s_axis_aresetn] \ +[get_bd_pins sq_conversion/aresetn] # Create address segments -assign_bd_address -offset 0x00000000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs ccl_offload_0/s_axi_control/reg0] -force -assign_bd_address -offset 0x00002000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs hostctrl_0/s_axi_control/Reg] -force +assign_bd_address -offset 0x00000000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs cclo/ccl_offload_0/s_axi_control/reg0] -force +assign_bd_address -offset 0x00002000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs cclo/hostctrl_0/s_axi_control/Reg] -force + + +# Restore current instance +current_bd_instance $oldCurInst + +#create ila_top +#create_ip -name ila -vendor xilinx.com -library ip -version 6.2 -module_name ila_top +#set_property -dict [ list CONFIG.C_PROBE2_WIDTH {128} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE5_WIDTH {128} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE8_WIDTH {128} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE11_WIDTH {128} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE24_WIDTH {32} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE27_WIDTH {32} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE32_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE35_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE38_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE41_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_PROBE44_WIDTH {512} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_NUM_OF_PROBES {45} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_EN_STRG_QUAL {1} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_INPUT_PIPE_STAGES {2} ] [get_ips ila_top] +#set_property -dict [ list CONFIG.C_DATA_DEPTH {1024} ] [get_ips ila_top] -set_property CONFIG.PROTOCOL AXI4LITE [get_bd_intf_ports /S00_AXI_0] -set_property -dict [list CONFIG.HAS_BURST {0} CONFIG.HAS_CACHE {0} CONFIG.HAS_LOCK {0} CONFIG.HAS_QOS {0} CONFIG.HAS_REGION {0}] [get_bd_intf_ports S00_AXI_0] validate_bd_design save_bd_design -make_wrapper -files [get_files "$build_dir/lynx/lynx.srcs/sources_1/bd/accl_bd/accl_bd.bd"] -top -add_files -norecurse "$build_dir/lynx/lynx.gen/sources_1/bd/accl_bd/hdl/accl_bd_wrapper.v" +make_wrapper -files [get_files "$build_dir/test_config_0/user_c0_0/test.srcs/sources_1/bd/accl_bd/accl_bd.bd"] -top +add_files -norecurse "$build_dir/test_config_0/user_c0_0/test.srcs/sources_1/bd/accl_bd/hdl/accl_bd_wrapper.v" update_compile_order -fileset sources_1 exit