diff --git a/.gitignore b/.gitignore index 46f1b628..2ccd22ad 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,10 @@ driver/xrt/src/m2m driver/xrt/build .vscode +coyote_build* +*xcu55c-fsvh2892-2L-e* +*_prj +*.gen +*.ip_user_files +*.cache +*.srcs \ No newline at end of file diff --git a/driver/xrt/CMakeLists.txt b/driver/xrt/CMakeLists.txt index f6ed4236..c6b62a9b 100644 --- a/driver/xrt/CMakeLists.txt +++ b/driver/xrt/CMakeLists.txt @@ -74,6 +74,7 @@ set(ACCL_DOCS_RST ) set(EN_COYOTE ON) +set(EN_AVX 1 CACHE STRING "AVX environment.") if(EN_COYOTE) message("Enable Coyote") set(ACCL_HEADERS @@ -88,6 +89,7 @@ if(EN_COYOTE) file(GLOB COYOTE_SOURCE "${COYOTE_SOURCE_PATH}/*.cpp") if(EN_AVX) + add_definitions(-DEN_AVX) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -mavx -march=native -O3") else() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -march=native -O1") diff --git a/driver/xrt/include/accl/coyotebuffer.hpp b/driver/xrt/include/accl/coyotebuffer.hpp index c082a856..4ba10933 100644 --- a/driver/xrt/include/accl/coyotebuffer.hpp +++ b/driver/xrt/include/accl/coyotebuffer.hpp @@ -19,7 +19,7 @@ #pragma once #include "buffer.hpp" #include "common.hpp" -#include "cProcess.hpp" +#include "cThread.hpp" #include "cDefs.hpp" #include "coyotedevice.hpp" #include @@ -59,7 +59,7 @@ template class CoyoteBuffer : public Buffer { this->n_pages = (buffer_size + page_size - 1) / page_size; std::cerr << "CoyoteBuffer contructor called! page_size:"<aligned_buffer = (dtype *)this->device->coyote_proc->getMem({fpga::CoyoteAlloc::HUGE_2M, n_pages}); + this->aligned_buffer = (dtype *)this->device->coyote_proc->getMem({fpga::CoyoteAlloc::HPF, n_pages}); this->update_buffer(this->aligned_buffer, (addr_t)this->aligned_buffer); @@ -114,20 +114,28 @@ template class CoyoteBuffer : public Buffer { { std::cerr << "calling sync: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; - this->device->coyote_proc->invoke({fpga::CoyoteOper::SYNC, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false}); + fpga::sgEntry sg; + memset(&sg, 0, sizeof(fpga::syncSg)); + sg.sync.addr = this->aligned_buffer; + + this->device->coyote_proc->invoke(fpga::CoyoteOper::LOCAL_SYNC, &sg, {true, true, true}); this->host_flag = true; } /** * Sync the data from the host to the device. - * + * */ void sync_to_device() override { std::cerr << "calling offload: " << std::setbase(16) << (uint64_t)this->aligned_buffer << ", size: " << std::setbase(10) << this->size() << std::endl; - this->device->coyote_proc->invoke({fpga::CoyoteOper::OFFLOAD, this->aligned_buffer, (uint32_t)this->_size, true, true, 0, false}); + fpga::sgEntry sg; + memset(&sg, 0, sizeof(fpga::syncSg)); + sg.sync.addr = this->aligned_buffer; + + this->device->coyote_proc->invoke(fpga::CoyoteOper::LOCAL_OFFLOAD, &sg, {true, true, true}); this->host_flag = false; } @@ -144,7 +152,7 @@ template class CoyoteBuffer : public Buffer { // } // } - std::cerr << "Free user buffer from cProc cPid:"<< std::setbase(10)<device->coyote_proc->getCpid()<<", buffer_size:"<aligned_buffer<device->coyote_proc->getCtid()<<", buffer_size:"<aligned_buffer<device->coyote_proc->freeMem(this->aligned_buffer); return; } diff --git a/driver/xrt/include/accl/coyotedevice.hpp b/driver/xrt/include/accl/coyotedevice.hpp index 7d3e1689..a33d69d8 100644 --- a/driver/xrt/include/accl/coyotedevice.hpp +++ b/driver/xrt/include/accl/coyotedevice.hpp @@ -20,9 +20,9 @@ #include "acclrequest.hpp" #include "cclo.hpp" #include "constants.hpp" -#include "cProcess.hpp" -#include "ibvQpConn.hpp" -#include "ibvStructs.hpp" +#include "cThread.hpp" +// #include "ibvQpConn.hpp" +// #include "ibvStructs.hpp" #include #include #include @@ -108,7 +108,7 @@ class CoyoteDevice : public CCLO { void printDebug() override; - fpga::cProcess* get_device(){ + fpga::cThread* get_device(){ return coyote_proc; } @@ -120,13 +120,13 @@ class CoyoteDevice : public CCLO { val_t get_retcode(ACCLRequest *request) override; - fpga::cProcess* coyote_proc; + fpga::cThread* coyote_proc; // RDMA related // RDMA requires multiple processes to establish queue pairs // The CCLO kernel is still managed by coyote_proc unsigned int num_qp; - std::vector coyote_qProc_vec; + std::vector*> coyote_qProc_vec; private: const size_t OFFSET_CCLO = 0x0; diff --git a/driver/xrt/src/coyotedevice.cpp b/driver/xrt/src/coyotedevice.cpp index fd96b904..5c495c84 100644 --- a/driver/xrt/src/coyotedevice.cpp +++ b/driver/xrt/src/coyotedevice.cpp @@ -18,7 +18,7 @@ #include "accl/coyotedevice.hpp" #include "accl/common.hpp" -#include "cProcess.hpp" +#include "cThread.hpp" #include #include @@ -276,28 +276,28 @@ void CoyoteRequest::wait_kernel() { } CoyoteDevice::CoyoteDevice(): num_qp(0) { - this->coyote_proc = new fpga::cProcess(targetRegion, getpid()); - std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCpid() << std::endl; + this->coyote_proc = new fpga::cThread(targetRegion, getpid(), 0); + std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCtid() << std::endl; } CoyoteDevice::CoyoteDevice(unsigned int num_qp): num_qp(num_qp) { for (unsigned int i=0; i<(num_qp+1); i++) { - fpga::cProcess* cproc = new fpga::cProcess(targetRegion, getpid()); + fpga::cThread* cproc = new fpga::cThread(targetRegion, getpid(), 0); coyote_qProc_vec.push_back(cproc); } for (unsigned int i=0; igetCpid() == 0){ + if(coyote_qProc_vec[i]->getCtid() == 0){ this->coyote_proc = coyote_qProc_vec[i]; - std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCpid() << std::endl; + std::cerr << "ACLL DEBUG: aquiring cProc: targetRegion: " << targetRegion << ", cPid: " << coyote_proc->getCtid() << std::endl; coyote_qProc_vec.erase(coyote_qProc_vec.begin() + i); break; } } - if(coyote_proc == NULL || coyote_proc->getCpid() != 0){ + if(coyote_proc == NULL || coyote_proc->getCtid() != 0){ std::cerr << "cProc initialization error!"<getCpid() << std::endl; + std::cerr << "ACLL DEBUG: aquiring qProc: targetRegion: " << targetRegion << ", cPid: " << coyote_qProc_vec[i]->getCtid() << std::endl; } } @@ -369,7 +369,7 @@ CCLO::deviceType CoyoteDevice::get_device_type() void CoyoteDevice::printDebug(){ coyote_proc->printDebug(); - std::ifstream inputFile("/sys/kernel/coyote_cnfg/cyt_attr_nstats_q0"); + std::ifstream inputFile("/sys/kernel/coyote_sysfs_0/cyt_attr_nstats"); if (!inputFile.is_open()) { std::cerr << "Failed to open net sts file." << std::endl; diff --git a/kernels/plugins/Makefile b/kernels/plugins/Makefile index da39ea18..c176941d 100644 --- a/kernels/plugins/Makefile +++ b/kernels/plugins/Makefile @@ -16,8 +16,8 @@ # # *******************************************************************************/ -PERIPHERAL_IPS = hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma dummy_cyt_rdma_stack dummy_cyt_dma tcp_session_handler -DEVICE=xcu280-fsvh2892-2L-e +PERIPHERAL_IPS = hostctrl loopback reduce_ops hp_compression dummy_tcp_stack client_arbiter vadd_put cyt_adapter external_dma tcp_session_handler +DEVICE=xcu55c-fsvh2892-2L-e TARGET=ip all: $(PERIPHERAL_IPS) diff --git a/kernels/plugins/cyt_adapter/Makefile b/kernels/plugins/cyt_adapter/Makefile index be1a85c2..2db4e41d 100644 --- a/kernels/plugins/cyt_adapter/Makefile +++ b/kernels/plugins/cyt_adapter/Makefile @@ -16,19 +16,24 @@ # *******************************************************************************/ TARGET=ip -DEVICE=xcu250-figd2104-2L-e -CYT_DMA_ADAPTER=cyt_dma_adapter_$(DEVICE).xo +DEVICE=xcu55c-fsvh2892-2L-e +CYT_DMA_SQ_ADAPTER=cyt_dma_sq_adapter_$(DEVICE).xo CYT_RDMA_ARBITER=cyt_rdma_arbiter_$(DEVICE).xo -CYT_RDMA_MUX=cyt_rdma_mux_$(DEVICE).xo +CCLO_SQ_ADAPTER=cclo_sq_adapter_$(DEVICE).xo +CYT_CQ_DM_STS_CONVERTER=cyt_cq_dm_sts_converter_$(DEVICE).xo -all: $(CYT_DMA_ADAPTER) $(CYT_RDMA_ARBITER) $(CYT_RDMA_MUX) -$(CYT_DMA_ADAPTER): build_cyt_dma_adapter.tcl cyt_dma_adapter.cpp +all: $(CYT_RDMA_ARBITER) $(CCLO_SQ_ADAPTER) $(CYT_DMA_SQ_ADAPTER) $(CYT_CQ_DM_STS_CONVERTER) + +$(CYT_CQ_DM_STS_CONVERTER): build_cyt_cq_dm_sts_converter.tcl cyt_cq_dm_sts_converter.cpp + vitis_hls $< -tclargs $(TARGET) $(DEVICE) + +$(CYT_DMA_SQ_ADAPTER): build_cyt_dma_sq_adapter.tcl cyt_dma_sq_adapter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) $(CYT_RDMA_ARBITER): build_cyt_rdma_arbiter.tcl cyt_rdma_arbiter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) -$(CYT_RDMA_MUX): build_cyt_rdma_mux.tcl cyt_rdma_mux.cpp +$(CCLO_SQ_ADAPTER): build_cclo_sq_adapter.tcl cclo_sq_adapter.cpp vitis_hls $< -tclargs $(TARGET) $(DEVICE) diff --git a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl b/kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl similarity index 81% rename from kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl rename to kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl index ac0e6dde..c4d915ea 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_rdma_mux.tcl +++ b/kernels/plugins/cyt_adapter/build_cclo_sq_adapter.tcl @@ -51,16 +51,15 @@ switch $command { } -open_project build_cyt_rdma_mux.${device} +open_project build_cclo_sq_adapter.${device} +add_files cclo_sq_adapter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" -add_files cyt_rdma_mux.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" - -set_top cyt_rdma_mux +set_top cclo_sq_adapter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_rdma_mux_${device}.xo +config_export -format xo -library ACCL -output [pwd]/cclo_sq_adapter_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl b/kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl similarity index 75% rename from kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl rename to kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl index 342b3400..1b868a48 100644 --- a/kernels/plugins/cyt_adapter/build_cyt_dma_adapter.tcl +++ b/kernels/plugins/cyt_adapter/build_cyt_cq_dm_sts_converter.tcl @@ -17,7 +17,6 @@ set command [lindex $argv 0] set device [lindex $argv 1] -set stack [lindex $argv 2] set do_sim 0 set do_syn 0 @@ -52,19 +51,15 @@ switch $command { } -open_project build_cyt_dma_adapter.${device} +open_project build_cyt_cq_dm_sts_converter.${device} -if {$stack eq "RDMA"} { - add_files cyt_dma_adapter.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS -DACCL_RDMA" -} else { - add_files cyt_dma_adapter.cpp -cflags "-std=c++14 -I. -I../../../driver/hls/ -DACCL_SYNTHESIS" -} +add_files cyt_cq_dm_sts_converter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" -set_top cyt_dma_adapter +set_top cyt_cq_dm_sts_converter open_solution sol1 -config_export -format xo -library ACCL -output [pwd]/cyt_dma_adapter_$device.xo +config_export -format xo -library ACCL -output [pwd]/cyt_cq_dm_sts_converter_${device}.xo if {$do_sim} { csim_design -clean diff --git a/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl b/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl new file mode 100644 index 00000000..e8e851a7 --- /dev/null +++ b/kernels/plugins/cyt_adapter/build_cyt_dma_sq_adapter.tcl @@ -0,0 +1,82 @@ +# /******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +set command [lindex $argv 0] +set device [lindex $argv 1] + +set do_sim 0 +set do_syn 0 +set do_export 0 +set do_cosim 0 + +switch $command { + "sim" { + set do_sim 1 + } + "syn" { + set do_syn 1 + } + "ip" { + set do_syn 1 + set do_export 1 + } + "cosim" { + set do_syn 1 + set do_cosim 1 + } + "all" { + set do_sim 1 + set do_syn 1 + set do_export 1 + set do_cosim 1 + } + default { + puts "Unrecognized command" + exit + } +} + + +open_project build_cyt_dma_sq_adapter.${device} + +add_files cyt_dma_sq_adapter.cpp -cflags "-std=c++14 -I. -I../../cclo/hls/eth_intf -I../../../driver/hls/ -DACCL_SYNTHESIS" + + +set_top cyt_dma_sq_adapter + +open_solution sol1 +config_export -format xo -library ACCL -output [pwd]/cyt_dma_sq_adapter_${device}.xo + +if {$do_sim} { + csim_design -clean +} + +if {$do_syn} { + set_part $device + create_clock -period 4 -name default + csynth_design +} + +if {$do_export} { + export_design +} + +if ${do_cosim} { + cosim_design +} + +exit diff --git a/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp b/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp new file mode 100644 index 00000000..cb15cbba --- /dev/null +++ b/kernels/plugins/cyt_adapter/cclo_sq_adapter.cpp @@ -0,0 +1,128 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" +#include "eth_intf.h" + +using namespace std; + +// convert the cclo sq (rdma) to cyt sq (rdma) +// currently the cclo sq only contains WRITE/SEND rdma command +// we keep the conversion to cyt_sq_rd output just for consistency of interfaces and future extension +// the m_axis_cyt data stream corresponds to rreq_send and we use the dest to indicate whether host/device +// the s_axis_cyt data stream corresponds to rreq_recv and we simply consume it +void cclo_sq_adapter( + hls::stream& cclo_sq, + hls::stream >& s_axis_cclo, + hls::stream& cyt_sq_wr, + hls::stream& cyt_sq_rd, + hls::stream >& m_axis_cyt, + hls::stream >& s_axis_cyt + ) +{ +#pragma HLS INTERFACE axis register port=cclo_sq +#pragma HLS INTERFACE axis register port=s_axis_cclo +#pragma HLS INTERFACE axis register port=cyt_sq_wr +#pragma HLS INTERFACE axis register port=cyt_sq_rd +#pragma HLS INTERFACE axis register port=m_axis_cyt +#pragma HLS INTERFACE axis register port=s_axis_cyt +#pragma HLS aggregate variable=cclo_sq compact=bit +#pragma HLS aggregate variable=cyt_sq_wr compact=bit +#pragma HLS aggregate variable=cyt_sq_rd compact=bit + +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS PIPELINE II=1 + + enum fsmStateType {META, WR_STREAM, RD_STREAM}; + static fsmStateType fsmState = META; + + static rdma_req_t cclo_req; + static cyt_req_t cyt_req; + static ap_axiu<512, 0, 0, 8> currWord; + static ap_uint<32> pkt_word; + static ap_uint<32> word_cnt = 0; + + switch (fsmState) + { + case META: + if(!STREAM_IS_EMPTY(cclo_sq)){ + cclo_req = STREAM_READ(cclo_sq); + + cyt_req.rsrvd = 0; + cyt_req.offs = 0; + cyt_req.host = 0; + cyt_req.actv = 0; + cyt_req.len = cclo_req.len; + cyt_req.vaddr = cclo_req.vaddr; + cyt_req.last = 1; // always assert last + cyt_req.dest = cclo_req.host; // 0-device memory, 1-host memory; + cyt_req.pid = cclo_req.qpn(CYT_PID_BITS-1,0); //qpn lowest bits are pid + cyt_req.vfid = cclo_req.qpn(CYT_DEST_BITS+CYT_PID_BITS-1,CYT_PID_BITS); + cyt_req.remote = 0; + cyt_req.rdma = 0; + cyt_req.mode = 0; // always PARSE + cyt_req.strm = CYT_STRM_RDMA; + cyt_req.opcode = cclo_req.opcode; + + pkt_word = (cyt_req.len + 63) >> 6; + + if(cyt_req.opcode == CYT_RDMA_WRITE || cyt_req.opcode == CYT_RDMA_SEND || cyt_req.opcode == CYT_RDMA_IMMED){ + STREAM_WRITE(cyt_sq_wr, cyt_req); + fsmState = WR_STREAM; + } else if (cyt_req.opcode == CYT_RDMA_READ) { + STREAM_WRITE(cyt_sq_rd, cyt_req); + fsmState = RD_STREAM; + } + } + break; + // move s_axis_cclo to m_axis_cyt and adjust the dest field + case WR_STREAM: + if (!STREAM_IS_EMPTY(s_axis_cclo)) + { + currWord = STREAM_READ(s_axis_cclo); + ap_axiu<512, 0, 0, 8> outWord; + + outWord.data = currWord.data; + outWord.keep = currWord.keep; + outWord.last = currWord.last; + outWord.dest = cyt_req.dest; // use the dest flag to indicate whether it is to host or device + word_cnt++; + + if (word_cnt == pkt_word) + { + word_cnt = 0; + outWord.last = 1; + fsmState = META; + } + STREAM_WRITE(m_axis_cyt, outWord); + } + break; + // just consume all the data + case RD_STREAM: + if(!STREAM_IS_EMPTY(s_axis_cyt)){ + currWord = STREAM_READ(s_axis_cyt); + word_cnt++; + if (word_cnt == pkt_word) + { + word_cnt = 0; + fsmState = META; + } + } + break; + } + +} \ No newline at end of file diff --git a/kernels/plugins/cyt_adapter/cyt.h b/kernels/plugins/cyt_adapter/cyt.h index ae8d428b..39f7bcf2 100644 --- a/kernels/plugins/cyt_adapter/cyt.h +++ b/kernels/plugins/cyt_adapter/cyt.h @@ -24,70 +24,15 @@ using namespace std; +#define CYT_OFFS_BITS 6 #define CYT_VADDR_BITS 48 #define CYT_LEN_BITS 28 #define CYT_DEST_BITS 4 #define CYT_PID_BITS 6 -#define CYT_N_REGIONS_BITS 1 -#define CYT_RSRVD_BITS 96-4-CYT_N_REGIONS_BITS-CYT_VADDR_BITS-CYT_LEN_BITS-CYT_DEST_BITS-CYT_PID_BITS - -struct cyt_req_t{ - ap_uint rsrvd; - ap_uint vfid; - ap_uint pid; - ap_uint dest; - ap_uint<1> host; - ap_uint<1> ctl; - ap_uint<1> sync; - ap_uint<1> stream; - ap_uint len; - ap_uint vaddr; - - cyt_req_t() : rsrvd(0), vfid(0), pid(0), dest(0), host(0), ctl(0), sync(0), stream(0), len(0), vaddr(0) {} - - cyt_req_t(ap_uint rsrvd_arg, ap_uint vfid_arg, ap_uint pid_arg, - ap_uint dest_arg, ap_uint<1> host_arg, ap_uint<1> ctl_arg, ap_uint<1> sync_arg, - ap_uint<1> stream_arg, ap_uint len_arg, ap_uint vaddr_arg) - : rsrvd(rsrvd_arg), - vfid(vfid_arg), - pid(pid_arg), - dest(dest_arg), - host(host_arg), - ctl(ctl_arg), - sync(sync_arg), - stream(stream_arg), - len(len_arg), - vaddr(vaddr_arg) {} - - cyt_req_t(ap_uint<96> in) { - rsrvd = in(CYT_RSRVD_BITS - 1, 0); - vfid = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS-1, CYT_RSRVD_BITS); - pid = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS); - dest = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS); - host = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS); - ctl = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1); - sync = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2); - stream = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3); - len = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4); - vaddr = in(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS+CYT_VADDR_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS); - } - - operator ap_uint<96>() { - ap_uint<96> ret; - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS+CYT_VADDR_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS) = vaddr; //vaddr - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4+CYT_LEN_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+4) = len; //len - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+3) = stream; //stream - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+2) = sync; //sync - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS+1) = ctl; //ctl - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS) = host; //host - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS+CYT_DEST_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS) = dest; //dest - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS+CYT_PID_BITS-1,CYT_RSRVD_BITS+CYT_N_REGIONS_BITS) = pid; //pid - ret(CYT_RSRVD_BITS+CYT_N_REGIONS_BITS-1,CYT_RSRVD_BITS) = vfid; //vfid - ret(CYT_RSRVD_BITS-1,0) = rsrvd; //rsrvd, disregard - return ret; - } -}; - +#define CYT_STRM_BITS 2 +#define CYT_OPCODE_BITS 5 +#define CYT_REQ_RSRVD_BITS (128 - CYT_OFFS_BITS - 2 - CYT_VADDR_BITS - CYT_LEN_BITS - 1 - 2 * CYT_DEST_BITS - CYT_PID_BITS - 3 - CYT_STRM_BITS - CYT_OPCODE_BITS) +#define CYT_ACK_RSRVD_BITS (32 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 - CYT_DEST_BITS - CYT_PID_BITS - CYT_DEST_BITS) // Coyote RDMA Opcode #define CYT_RDMA_READ 0 @@ -95,105 +40,158 @@ struct cyt_req_t{ #define CYT_RDMA_SEND 2 #define CYT_RDMA_IMMED 3 -// Coyote cyt_rdma_req_t structs -#define CYT_RDMA_OPCODE_BITS 5 -#define CYT_RDMA_MSG_BITS 448 -#define CYT_RDMA_OFFS_BITS 4 -#define CYT_RDMA_QPN_BITS 10 -#define CYT_RDMA_MSN_BITS 24 -#define CYT_RDMA_RSRVD_BITS 17 -#define CYT_RDMA_REQ_BITS CYT_RDMA_RSRVD_BITS+CYT_RDMA_MSG_BITS+CYT_RDMA_OFFS_BITS+CYT_RDMA_MSN_BITS+4+CYT_RDMA_QPN_BITS+CYT_RDMA_OPCODE_BITS - -#define CYT_RDMA_VADDR_BITS 64 -#define CYT_RDMA_LEN_BITS 32 -#define CYT_RDMA_PARAMS_BITS 288 - -struct cyt_rdma_req_t{ - ap_uint rsrvd; - ap_uint msg; - ap_uint offs; - ap_uint ssn; - ap_uint<1> cmplt; - ap_uint<1> last; - ap_uint<1> mode; - ap_uint<1> host; - ap_uint qpn; - ap_uint opcode; - - cyt_rdma_req_t() : rsrvd(0), msg(0), offs(0), ssn(0), cmplt(0), last(0), mode(0), host(0), qpn(0), opcode(0) {} - cyt_rdma_req_t(ap_uint in) { - rsrvd = in(CYT_RDMA_RSRVD_BITS - 1, 0); - msg = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS - 1, CYT_RDMA_RSRVD_BITS); - offs = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS); - ssn = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS); - cmplt = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS); - last = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1); - mode = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2); - host = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3); - qpn = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4); - opcode = in(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS + CYT_RDMA_OPCODE_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS); - } - operator ap_uint() { - ap_uint ret; - ret(CYT_RDMA_RSRVD_BITS - 1, 0) = rsrvd; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS - 1, CYT_RDMA_RSRVD_BITS) = msg; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS) = offs; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS) = ssn; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS) = cmplt; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 1) = last; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 2) = mode; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 3) = host; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4) = qpn; - ret(CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS + CYT_RDMA_OPCODE_BITS - 1, CYT_RDMA_RSRVD_BITS + CYT_RDMA_MSG_BITS + CYT_RDMA_OFFS_BITS + CYT_RDMA_MSN_BITS + 4 + CYT_RDMA_QPN_BITS) = opcode; - return ret; - } -} ; +#define RC_SEND_FIRST 0 +#define RC_SEND_MIDDLE 1 +#define RC_SEND_LAST 2 +#define RC_SEND_ONLY 4 +#define RC_RDMA_WRITE_FIRST 6 +#define RC_RDMA_WRITE_MIDDLE 7 +#define RC_RDMA_WRITE_LAST 8 +#define RC_RDMA_WRITE_LAST_WITH_IMD 9 +#define RC_RDMA_WRITE_ONLY 10 +#define RC_RDMA_WRITE_ONLY_WIT_IMD 11 +#define RC_RDMA_READ_REQUEST 12 +#define RC_RDMA_READ_RESP_FIRST 13 +#define RC_RDMA_READ_RESP_MIDDLE 14 +#define RC_RDMA_READ_RESP_LAST 15 +#define RC_RDMA_READ_RESP_ONLY 16 +#define RC_ACK 17 + +// Coyote STRM Opcode +#define CYT_STRM_CARD 0 +#define CYT_STRM_HOST 1 +#define CYT_STRM_RDMA 2 +#define CYT_STRM_TCP 3 +struct cyt_req_t{ + ap_uint rsrvd; // 19 bits + ap_uint offs; // 6 bits + ap_uint<1> host; // 1 bit + ap_uint<1> actv; // 1 bit + + ap_uint len; // 28 bits + ap_uint vaddr; // 48 bits + + ap_uint<1> last; // 1 bit -struct cyt_rdma_req_msg_t{ - ap_uint lvaddr; - ap_uint rvaddr; - ap_uint len; - ap_uint params; + ap_uint dest; // 4 bits + ap_uint pid; // 6 bits + ap_uint vfid; // 4 bits + + ap_uint<1> remote; // 1 bit + ap_uint<1> rdma; // 1 bit + ap_uint<1> mode; // 1 bit + ap_uint strm; // 2 bits + ap_uint opcode; // 5 bits - cyt_rdma_req_msg_t() : lvaddr(0), rvaddr(0), len(0), params(0) {} - cyt_rdma_req_msg_t(ap_uint in) { - lvaddr = in(CYT_RDMA_VADDR_BITS - 1, 0); - rvaddr = in(2 * CYT_RDMA_VADDR_BITS - 1, CYT_RDMA_VADDR_BITS); - len = in(2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS - 1, 2 * CYT_RDMA_VADDR_BITS); - params = in(CYT_RDMA_MSG_BITS - 1, 2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS); + // Default constructor + cyt_req_t() + : rsrvd(0), offs(0), host(0), actv(0), len(0), vaddr(0), last(0), + dest(0), pid(0), vfid(0), remote(0), rdma(0), mode(0), strm(0), opcode(0) {} + + // Parameterized constructor + cyt_req_t(ap_uint rsrvd_arg, ap_uint offs_arg, ap_uint<1> host_arg, ap_uint<1> actv_arg, + ap_uint len_arg, ap_uint vaddr_arg, ap_uint<1> last_arg, + ap_uint dest_arg, ap_uint pid_arg, ap_uint vfid_arg, + ap_uint<1> remote_arg, ap_uint<1> rdma_arg, ap_uint<1> mode_arg, ap_uint strm_arg, ap_uint opcode_arg) + : rsrvd(rsrvd_arg), offs(offs_arg), host(host_arg), actv(actv_arg), len(len_arg), vaddr(vaddr_arg), + last(last_arg), dest(dest_arg), pid(pid_arg), vfid(vfid_arg), remote(remote_arg), rdma(rdma_arg), + mode(mode_arg), strm(strm_arg), opcode(opcode_arg) {} + + // Constructor from a single ap_uint<128> argument + cyt_req_t(ap_uint<128> in) { + rsrvd = in(CYT_REQ_RSRVD_BITS - 1, 0); + offs = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS - 1, CYT_REQ_RSRVD_BITS); + host = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS); + actv = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1); + len = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1 + CYT_LEN_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2); + vaddr = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS); + last = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS); + dest = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + 1); + pid = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS); + vfid = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS); + remote = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS); + rdma = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1); + mode = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2); + strm = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3); + opcode = in(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS + CYT_OPCODE_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS); } - operator ap_uint() { - ap_uint ret; - ret(CYT_RDMA_VADDR_BITS - 1, 0) = lvaddr; - ret(2 * CYT_RDMA_VADDR_BITS - 1, CYT_RDMA_VADDR_BITS) = rvaddr; - ret(2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS - 1, 2 * CYT_RDMA_VADDR_BITS) = len; - ret(CYT_RDMA_MSG_BITS - 1, 2 * CYT_RDMA_VADDR_BITS + CYT_RDMA_LEN_BITS) = params; + + operator ap_uint<128>() { + ap_uint<128> ret; + + // Assigning fields to the appropriate bit positions in the 128-bit return value. + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS + CYT_OPCODE_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS) = opcode; // opcode + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3 + CYT_STRM_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 3) = strm; // strm + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 2) = mode; // mode + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS + 1) = rdma; // rdma + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS) = remote; // remote + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS) = vfid; // vfid + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS + CYT_PID_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS) = pid; // pid + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + CYT_DEST_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS + 1) = dest; // dest + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS) = last; // last + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS + CYT_VADDR_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS) = vaddr; // vaddr + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2 + CYT_LEN_BITS - 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 2) = len; // len + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS + 1) = actv; // actv + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS, CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS) = host; // host + ret(CYT_REQ_RSRVD_BITS + CYT_OFFS_BITS - 1, CYT_REQ_RSRVD_BITS) = offs; // offs + ret(CYT_REQ_RSRVD_BITS - 1, 0) = rsrvd; // rsrvd, disregard + return ret; } }; +struct cyt_ack_t { + ap_uint rsrvd; // 9 bits + ap_uint vfid; // 4 bits + ap_uint pid; // 6 bits + ap_uint dest; // 4 bits + ap_uint<1> host; // 1 bit + ap_uint<1> remote; // 1 bit + ap_uint strm; // 2 bits + ap_uint opcode; // 5 bits + + // Default constructor + cyt_ack_t() + : rsrvd(0), vfid(0), pid(0), dest(0), host(0), remote(0), strm(0), opcode(0) {} + + // Parameterized constructor + cyt_ack_t(ap_uint rsrvd_arg, + ap_uint vfid_arg, + ap_uint pid_arg, + ap_uint dest_arg, + ap_uint<1> host_arg, + ap_uint<1> remote_arg, + ap_uint strm_arg, + ap_uint opcode_arg) + : rsrvd(rsrvd_arg), vfid(vfid_arg), pid(pid_arg), dest(dest_arg), + host(host_arg), remote(remote_arg), strm(strm_arg), opcode(opcode_arg) {} + + // Constructor from a single ap_uint<32> argument + cyt_ack_t(ap_uint<32> in) { + opcode = in(31, 31 - CYT_OPCODE_BITS + 1); + strm = in(31 - CYT_OPCODE_BITS, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS + 1); + remote = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1); + host = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2); + dest = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 2); + pid = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 2); + vfid = in(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 * CYT_DEST_BITS - CYT_PID_BITS - 2); + rsrvd = in(CYT_ACK_RSRVD_BITS - 1, 0); // Remaining bits for reserved + } + + // Conversion operator to ap_uint<32> + operator ap_uint<32>() { + ap_uint<32> ret; + ret(31, 31 - CYT_OPCODE_BITS + 1) = opcode; + ret(31 - CYT_OPCODE_BITS, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS + 1) = strm; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 1) = remote; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2) = host; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 2) = dest; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 2) = pid; + ret(31 - CYT_OPCODE_BITS - CYT_STRM_BITS - CYT_DEST_BITS - CYT_PID_BITS - 3, 31 - CYT_OPCODE_BITS - CYT_STRM_BITS - 2 * CYT_DEST_BITS - CYT_PID_BITS - 2) = vfid; + ret(CYT_ACK_RSRVD_BITS - 1, 0) = rsrvd; -void cyt_dma_adapter( - //DM command streams - hls::stream> &dma0_s2mm_cmd, - hls::stream> &dma1_s2mm_cmd, - hls::stream> &dma0_mm2s_cmd, - hls::stream> &dma1_mm2s_cmd, - //DM status streams - hls::stream> &dma0_s2mm_sts, - hls::stream> &dma1_s2mm_sts, - hls::stream> &dma0_mm2s_sts, - hls::stream> &dma1_mm2s_sts, -#ifdef ACCL_RDMA - //RDMA rd_req and wr_req - hls::stream> & rdma_wr_req, - hls::stream> & rdma_rd_req, -#endif - //Coyote Bypass interface command and status - hls::stream> &cyt_byp_wr_cmd, - hls::stream> &cyt_byp_wr_sts, - hls::stream> &cyt_byp_rd_cmd, - hls::stream> &cyt_byp_rd_sts -); \ No newline at end of file + return ret; + } +}; diff --git a/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp b/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp new file mode 100644 index 00000000..6306434a --- /dev/null +++ b/kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter.cpp @@ -0,0 +1,119 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" + +using namespace std; + + +void cyt_cq_dm_sts_converter(hls::stream & cq_sts, + hls::stream> & dm0_sts, + hls::stream> & dm1_sts, + hls::stream>& dm0_meta, + hls::stream>& dm1_meta) +{ +#pragma HLS INTERFACE axis register port=cq_sts +#pragma HLS INTERFACE axis register port=dm0_sts +#pragma HLS INTERFACE axis register port=dm1_sts +#pragma HLS INTERFACE axis register port=dm0_meta +#pragma HLS INTERFACE axis register port=dm1_meta +#pragma HLS aggregate variable=cq_sts compact=bit + +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS PIPELINE II=1 + + enum fsmStateType {CYT_STS_0, CYT_STS_1, DM_STS_0, DM_STS_1}; + static fsmStateType fsmState = CYT_STS_0; + + static cyt_ack_t cq_sts_word; + + static ap_axiu<32,0,0,0> dm_sts_word; + static ap_uint<1+4+23> dm_meta_word; + + switch (fsmState) + { + // the first state reads a cq_sts as a workaround to handle the 2-cycle burst of cq_sts signal + case CYT_STS_0: + if (!STREAM_IS_EMPTY(cq_sts)) + { + STREAM_READ(cq_sts); + fsmState = CYT_STS_1; + } + break; + case CYT_STS_1: + if (!STREAM_IS_EMPTY(cq_sts)) + { + cq_sts_word = STREAM_READ(cq_sts); + + // only process status if it is local memory completion status + // only send back ack when the cq_sts stems from kernel issued bypass commands with host == 0 + // if dest == 2, this comes from wr_req/rd_req, no need to forward to data mover + if((cq_sts_word.opcode == CYT_STRM_CARD || cq_sts_word.opcode == CYT_STRM_HOST) && cq_sts_word.host == 0 && (cq_sts_word.dest == 0 || cq_sts_word.dest == 1)) + { + if (cq_sts_word.dest == 0) { + fsmState = DM_STS_0; + } else if (cq_sts_word.dest == 1) { + fsmState = DM_STS_1; + } + } + else{ + fsmState = CYT_STS_0; + } + } + break; + case DM_STS_0: + if(!STREAM_IS_EMPTY(dm0_meta)){ + + dm_meta_word = STREAM_READ(dm0_meta); + + dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag + dm_sts_word.data.range(4,4) = 0; // internal error + dm_sts_word.data.range(5,5) = 0; // decode erro + dm_sts_word.data.range(6,6) = 0; // slave error + dm_sts_word.data.range(7,7) = 1; // OK + dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received + dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP + dm_sts_word.last = 1; + + STREAM_WRITE(dm0_sts, dm_sts_word); + + fsmState = CYT_STS_0; // todo: add the check of eop flag + } + break; + case DM_STS_1: + if(!STREAM_IS_EMPTY(dm1_meta)){ + + dm_meta_word = STREAM_READ(dm1_meta); + + dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag + dm_sts_word.data.range(4,4) = 0; // internal error + dm_sts_word.data.range(5,5) = 0; // decode erro + dm_sts_word.data.range(6,6) = 0; // slave error + dm_sts_word.data.range(7,7) = 1; // OK + dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received + dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP + dm_sts_word.last = 1; + + STREAM_WRITE(dm1_sts, dm_sts_word); + + fsmState = CYT_STS_0; // todo: add the check of eop flag + } + break; + + } +} + diff --git a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp b/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp deleted file mode 100644 index 300e991a..00000000 --- a/kernels/plugins/cyt_adapter/cyt_dma_adapter.cpp +++ /dev/null @@ -1,238 +0,0 @@ -/******************************************************************************* -# Copyright (C) 2023 Advanced Micro Devices, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# *******************************************************************************/ - -#include "cyt.h" - -using namespace std; - -template -void dm_byp_cmd_converter(hls::stream>& dm_cmd, - hls::stream& byp_cmd, - hls::stream>& dm_meta - ) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if (!STREAM_IS_EMPTY(dm_cmd)) - { - ap_axiu<104,0,0,DEST_WIDTH> dm_cmd_with_dest = STREAM_READ(dm_cmd); - ap_uint<104> dm_cmd_word = dm_cmd_with_dest.data; - - ap_uint<23> btt = dm_cmd_word(22,0); - ap_uint<64> saddr = dm_cmd_word(95,32); - ap_uint<4> tag = dm_cmd_word(99,96); - ap_uint<1> strm = dm_cmd_with_dest.dest(2,0); // 1 if targeting host memory, 0 if targeting card memory - ap_uint<1> ctl = dm_cmd_word(30,30); // ctl field determines if a TLAST must be asserted at the end of the data stream - - cyt_req_t req(0, 0, 0, DMA_CHANNEL, 0, ctl, 0, strm, btt, saddr); - STREAM_WRITE(byp_cmd, req); - - ap_uint<1+4+23> dm_meta_word; - dm_meta_word(22,0) = btt; - dm_meta_word(26,23) = tag; - dm_meta_word(27,27) = ctl; - STREAM_WRITE(dm_meta, dm_meta_word); - } -} - -template -void rdma_req_byp_cmd_converter( - hls::stream& rdma_req, - hls::stream& byp_cmd -) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if(!STREAM_IS_EMPTY(rdma_req)){ - cyt_req_t req = STREAM_READ(rdma_req); - // TODO: - // Better mechanism of buffer & proc mapping - // Currently has to set the pid to 0, corresponding to coyote_proc instead of any coyote_qproc - // Every coyote_qproc has a unique physical address in device - cyt_req_t cmd(req.rsrvd, req.vfid, 0 /*req.pid*/, DMA_CHANNEL, 0, 1, 0, req.stream, req.len, req.vaddr); - STREAM_WRITE(byp_cmd, cmd); - } - -} - -void multiplexor(hls::stream& in0, - hls::stream& in1, - hls::stream& in2, - hls::stream& out) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - cyt_req_t currWord; - - if (!STREAM_IS_EMPTY(in0)) - { - currWord = STREAM_READ(in0); - STREAM_WRITE(out, currWord); - } - else if(!STREAM_IS_EMPTY(in1)) - { - currWord = STREAM_READ(in1); - STREAM_WRITE(out, currWord); - } - else if(!STREAM_IS_EMPTY(in2)) - { - currWord = STREAM_READ(in2); - STREAM_WRITE(out, currWord); - } - -} - - -void byp_dm_sts_converter(hls::stream> & byp_sts, - hls::stream> & dm0_sts, - hls::stream> & dm1_sts, - hls::stream>& dm0_meta, - hls::stream>& dm1_meta) -{ -#pragma HLS inline off -#pragma HLS pipeline II=1 - - if (!STREAM_IS_EMPTY(byp_sts)) - { - ap_uint<16> byp_sts_word = STREAM_READ(byp_sts); - // PID in LSB according to Coyote dma_rsp_t: - ap_uint pid = byp_sts_word(CYT_PID_BITS-1,0); - ap_uint dest = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS-1,CYT_PID_BITS); - ap_uint<1> strm = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS,CYT_DEST_BITS+CYT_PID_BITS); - ap_uint<1> host = byp_sts_word(CYT_DEST_BITS+CYT_PID_BITS+1,CYT_DEST_BITS+CYT_PID_BITS+1); - - ap_axiu<32,0,0,0> dm_sts_word; - ap_uint<1+4+23> dm_meta_word; - - // only send back ack when the byp_sts stems from kernel issued bypass commands - // if dest == 2, this comes from wr_req/rd_req, no need to forward to data mover - if(host == 0) - { - do{ - if(dest == 0){ - dm_meta_word = STREAM_READ(dm0_meta); - } else if (dest == 1){ - dm_meta_word = STREAM_READ(dm1_meta); - } - dm_sts_word.data.range(3,0) = dm_meta_word(26,23); //tag - dm_sts_word.data.range(4,4) = 0; // internal error - dm_sts_word.data.range(5,5) = 0; // decode error - dm_sts_word.data.range(6,6) = 0; // slave error - dm_sts_word.data.range(7,7) = 1; // OK - dm_sts_word.data.range(30,8) = dm_meta_word(22,0); // bytes received - dm_sts_word.data.range(31,31) = dm_meta_word(27,27); // EOP - dm_sts_word.last = 1; - if(dest == 0){ - STREAM_WRITE(dm0_sts, dm_sts_word); - } else if (dest == 1){ - STREAM_WRITE(dm1_sts, dm_sts_word); - } - } while(dm_meta_word(27,27) == 0); - } - } - -} - -// The cyt bypass commands have 3 sources if RDMA is enabled -// 2 DMA channels from the CCLO and the rdma req interface -void cyt_dma_adapter( - //DM command streams - hls::stream> &dma0_s2mm_cmd, - hls::stream> &dma1_s2mm_cmd, - hls::stream> &dma0_mm2s_cmd, - hls::stream> &dma1_mm2s_cmd, - //DM status streams - hls::stream> &dma0_s2mm_sts, - hls::stream> &dma1_s2mm_sts, - hls::stream> &dma0_mm2s_sts, - hls::stream> &dma1_mm2s_sts, - - //RDMA rd_req and wr_req - hls::stream & rdma_wr_req, - hls::stream & rdma_rd_req, - - //Coyote Bypass interface command and status - hls::stream &cyt_byp_wr_cmd, - hls::stream> &cyt_byp_wr_sts, - hls::stream &cyt_byp_rd_cmd, - hls::stream> &cyt_byp_rd_sts -) { -#pragma HLS INTERFACE axis port=dma0_s2mm_cmd -#pragma HLS INTERFACE axis port=dma1_s2mm_cmd -#pragma HLS INTERFACE axis port=dma0_mm2s_cmd -#pragma HLS INTERFACE axis port=dma1_mm2s_cmd -#pragma HLS INTERFACE axis port=dma0_s2mm_sts -#pragma HLS INTERFACE axis port=dma1_s2mm_sts -#pragma HLS INTERFACE axis port=dma0_mm2s_sts -#pragma HLS INTERFACE axis port=dma1_mm2s_sts -#pragma HLS INTERFACE axis port=cyt_byp_rd_cmd -#pragma HLS INTERFACE axis port=cyt_byp_rd_sts -#pragma HLS INTERFACE axis port=cyt_byp_wr_cmd -#pragma HLS INTERFACE axis port=cyt_byp_wr_sts -#pragma HLS INTERFACE ap_ctrl_none port=return -#pragma HLS DATAFLOW disable_start_propagation - -#pragma HLS aggregate variable=cyt_byp_wr_cmd compact=bit -#pragma HLS aggregate variable=cyt_byp_rd_cmd compact=bit - -#pragma HLS INTERFACE axis port=rdma_wr_req -#pragma HLS INTERFACE axis port=rdma_rd_req -#pragma HLS aggregate variable=rdma_wr_req compact=bit -#pragma HLS aggregate variable=rdma_rd_req compact=bit - - static hls::stream byp_wr_cmd_0; - #pragma HLS stream variable=byp_wr_cmd_0 depth=16 - static hls::stream byp_wr_cmd_1; - #pragma HLS stream variable=byp_wr_cmd_1 depth=16 - static hls::stream byp_rd_cmd_0; - #pragma HLS stream variable=byp_rd_cmd_0 depth=16 - static hls::stream byp_rd_cmd_1; - #pragma HLS stream variable=byp_rd_cmd_1 depth=16 - - static hls::stream> dma0_mm2s_meta; - #pragma HLS stream variable=dma0_mm2s_meta depth=16 - static hls::stream> dma1_mm2s_meta; - #pragma HLS stream variable=dma1_mm2s_meta depth=16 - static hls::stream> dma0_s2mm_meta; - #pragma HLS stream variable=dma0_s2mm_meta depth=16 - static hls::stream> dma1_s2mm_meta; - #pragma HLS stream variable=dma1_s2mm_meta depth=16 - - static hls::stream byp_wr_cmd_2; - #pragma HLS stream variable=byp_wr_cmd_2 depth=16 - static hls::stream byp_rd_cmd_2; - #pragma HLS stream variable=byp_rd_cmd_2 depth=16 - - dm_byp_cmd_converter<0>(dma0_s2mm_cmd, byp_wr_cmd_0, dma0_s2mm_meta); - dm_byp_cmd_converter<1>(dma1_s2mm_cmd, byp_wr_cmd_1, dma1_s2mm_meta); - rdma_req_byp_cmd_converter<2>(rdma_wr_req, byp_wr_cmd_2); - multiplexor(byp_wr_cmd_0,byp_wr_cmd_1,byp_wr_cmd_2,cyt_byp_wr_cmd); - - - dm_byp_cmd_converter<0>(dma0_mm2s_cmd,byp_rd_cmd_0, dma0_mm2s_meta); - dm_byp_cmd_converter<1>(dma1_mm2s_cmd,byp_rd_cmd_1, dma1_mm2s_meta); - rdma_req_byp_cmd_converter<2>(rdma_rd_req, byp_rd_cmd_2); - multiplexor(byp_rd_cmd_0,byp_rd_cmd_1,byp_rd_cmd_2,cyt_byp_rd_cmd); - - byp_dm_sts_converter(cyt_byp_wr_sts, dma0_s2mm_sts, dma1_s2mm_sts, dma0_s2mm_meta, dma1_s2mm_meta); - byp_dm_sts_converter(cyt_byp_rd_sts, dma0_mm2s_sts, dma1_mm2s_sts, dma0_mm2s_meta, dma1_mm2s_meta); - - -} diff --git a/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp b/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp new file mode 100644 index 00000000..abb76505 --- /dev/null +++ b/kernels/plugins/cyt_adapter/cyt_dma_sq_adapter.cpp @@ -0,0 +1,210 @@ +/******************************************************************************* +# Copyright (C) 2023 Advanced Micro Devices, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# *******************************************************************************/ + +#include "cyt.h" + +using namespace std; + +// convert the data mover command (dma) to the cyt_req_t (dma) +// currently all the memory accesses initialized by the CCLO is associated with pid 0 (coyote_proc) +// also we assume a vfid 0 for single cyt region +// the dest field of the dm cmd indicates the host/card accesses +// the dest field is converted to strm flag in the cyt_sq_cmd +// DMA Channel is used to select axis streams, channel 0 and 1 are reserved + +template +void dm_sq_cmd_converter(hls::stream>& dm_cmd, + hls::stream& cyt_sq_cmd, + hls::stream>& dm_meta + ) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + if (!STREAM_IS_EMPTY(dm_cmd)) + { + ap_axiu<104,0,0,DEST_WIDTH> dm_cmd_with_dest = STREAM_READ(dm_cmd); + ap_uint<104> dm_cmd_word = dm_cmd_with_dest.data; + + ap_uint<23> btt = dm_cmd_word(22,0); + ap_uint<64> saddr = dm_cmd_word(95,32); + ap_uint<4> tag = dm_cmd_word(99,96); + ap_uint<1> strm = dm_cmd_with_dest.dest(2,0); // 1 if targeting host memory, 0 if targeting card memory + ap_uint<1> ctl = dm_cmd_word(30,30); // ctl field determines if a TLAST must be asserted at the end of the data stream + + cyt_req_t req(0/*rsrvd_arg*/, 0 /*offs_arg*/, 0 /*host_arg*/, 0 /*actv_arg*/, + btt/*len_arg*/, saddr /*vaddr_arg*/, ctl /*last_arg*/, + DMA_CHANNEL /*dest_arg*/, 0 /*pid_arg*/, 0 /*vfid_arg*/, + 0 /*remote_arg*/, 0 /*rdma_arg*/, 0 /*mode_arg*/, strm /*strm_arg*/, 0 /*opcode_arg*/); + + STREAM_WRITE(cyt_sq_cmd, req); + + ap_uint<1+4+23> dm_meta_word; + dm_meta_word(22,0) = btt; + dm_meta_word(26,23) = tag; + dm_meta_word(27,27) = ctl; + STREAM_WRITE(dm_meta, dm_meta_word); + } +} + +// convert the cyt_rq (rdma) to cyt_sq (dma) +// Channel 2 of the host/card axis stream is reserved for cyt_rq command +// the rq dest field is used to indicate whether this is host/device access, it should be converted to strm field here +// the sq opcode is not relevant as it is targeting dma +template +void cyt_rq_sq_cmd_converter( + hls::stream& cyt_rq_cmd, + hls::stream& cyt_sq_cmd +) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + if(!STREAM_IS_EMPTY(cyt_rq_cmd)){ + cyt_req_t req = STREAM_READ(cyt_rq_cmd); + + // Currently has to set the pid to 0, corresponding to coyote_proc instead of any coyote_qproc + // Because all the buffer allocation within the ACCL driver is associated with the coyote_proc + // And every coyote_qproc has a unique physical address in device which is different than the coyote_proc + // Also mark the host flag in the new output command to 0 to indicate the command is issued from the kernel instead of host + // However, the cq of this command is not processed in the cq_dm_sts_converter as the dest channel is 2 + cyt_req_t cmd(req.rsrvd/*rsrvd_arg*/, req.offs /*offs_arg*/, 0/*host_arg*/, req.actv /*actv_arg*/, + req.len/*len_arg*/, req.vaddr /*vaddr_arg*/, req.last /*last_arg*/, + DMA_CHANNEL /*dest_arg*/, 0 /*pid_arg*/, req.vfid /*vfid_arg*/, + req.remote /*remote_arg*/, req.rdma /*rdma_arg*/, req.mode /*mode_arg*/, req.dest /*strm_arg*/, req.opcode /*opcode_arg*/); + + STREAM_WRITE(cyt_sq_cmd, cmd); + } + +} + +void multiplexor(hls::stream& in0, + hls::stream& in1, + hls::stream& in2, + hls::stream& in3, + hls::stream& out) +{ +#pragma HLS inline off +#pragma HLS pipeline II=1 + + cyt_req_t currWord; + + if (!STREAM_IS_EMPTY(in0)) + { + currWord = STREAM_READ(in0); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in1)) + { + currWord = STREAM_READ(in1); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in2)) + { + currWord = STREAM_READ(in2); + STREAM_WRITE(out, currWord); + } + else if(!STREAM_IS_EMPTY(in3)) + { + currWord = STREAM_READ(in3); + STREAM_WRITE(out, currWord); + } + +} + + + +// The cyt sq commands have 4 sources if RDMA is enabled +// 2 DMA channels from the CCLO, CCLO sq command, and the Cyt rq interface +void cyt_dma_sq_adapter( + //DM command streams + hls::stream> &dma0_s2mm_cmd, + hls::stream> &dma1_s2mm_cmd, + hls::stream> &dma0_mm2s_cmd, + hls::stream> &dma1_mm2s_cmd, + + //Coyote rq rd_req and wr_req + hls::stream & cyt_rq_wr_cmd, + hls::stream & cyt_rq_rd_cmd, + + //CCLO sq command + hls::stream& cclo_sq_wr_cmd, + hls::stream& cclo_sq_rd_cmd, + + //Coyote sq interface command + hls::stream &cyt_sq_wr_cmd, + hls::stream &cyt_sq_rd_cmd, + + //DM command meta + hls::stream> & dma0_s2mm_meta, + hls::stream> & dma1_s2mm_meta, + hls::stream> & dma0_mm2s_meta, + hls::stream> & dma1_mm2s_meta +) { +#pragma HLS INTERFACE axis port=dma0_s2mm_cmd +#pragma HLS INTERFACE axis port=dma1_s2mm_cmd +#pragma HLS INTERFACE axis port=dma0_mm2s_cmd +#pragma HLS INTERFACE axis port=dma1_mm2s_cmd +#pragma HLS INTERFACE axis port=dma0_s2mm_meta +#pragma HLS INTERFACE axis port=dma1_s2mm_meta +#pragma HLS INTERFACE axis port=dma0_mm2s_meta +#pragma HLS INTERFACE axis port=dma1_mm2s_meta +#pragma HLS INTERFACE axis port=cyt_sq_rd_cmd +#pragma HLS INTERFACE axis port=cyt_sq_wr_cmd +#pragma HLS INTERFACE ap_ctrl_none port=return +#pragma HLS DATAFLOW disable_start_propagation + +#pragma HLS aggregate variable=cyt_sq_wr_cmd compact=bit +#pragma HLS aggregate variable=cyt_sq_rd_cmd compact=bit + +#pragma HLS INTERFACE axis port=cyt_rq_wr_cmd +#pragma HLS INTERFACE axis port=cyt_rq_rd_cmd +#pragma HLS aggregate variable=cyt_rq_wr_cmd compact=bit +#pragma HLS aggregate variable=cyt_rq_rd_cmd compact=bit + +#pragma HLS INTERFACE axis port=cclo_sq_wr_cmd +#pragma HLS aggregate variable=cclo_sq_wr_cmd compact=bit +#pragma HLS INTERFACE axis port=cclo_sq_rd_cmd +#pragma HLS aggregate variable=cclo_sq_rd_cmd compact=bit + + static hls::stream sq_wr_cmd_0; + #pragma HLS stream variable=sq_wr_cmd_0 depth=16 + static hls::stream sq_wr_cmd_1; + #pragma HLS stream variable=sq_wr_cmd_1 depth=16 + static hls::stream sq_rd_cmd_0; + #pragma HLS stream variable=sq_rd_cmd_0 depth=16 + static hls::stream sq_rd_cmd_1; + #pragma HLS stream variable=sq_rd_cmd_1 depth=16 + + static hls::stream sq_wr_cmd_2; + #pragma HLS stream variable=sq_wr_cmd_2 depth=16 + static hls::stream sq_rd_cmd_2; + #pragma HLS stream variable=sq_rd_cmd_2 depth=16 + + dm_sq_cmd_converter<0>(dma0_s2mm_cmd, sq_wr_cmd_0, dma0_s2mm_meta); + dm_sq_cmd_converter<1>(dma1_s2mm_cmd, sq_wr_cmd_1, dma1_s2mm_meta); + cyt_rq_sq_cmd_converter<2>(cyt_rq_wr_cmd, sq_wr_cmd_2); + multiplexor(cclo_sq_wr_cmd, sq_wr_cmd_0,sq_wr_cmd_1,sq_wr_cmd_2, cyt_sq_wr_cmd); + + + dm_sq_cmd_converter<0>(dma0_mm2s_cmd,sq_rd_cmd_0, dma0_mm2s_meta); + dm_sq_cmd_converter<1>(dma1_mm2s_cmd,sq_rd_cmd_1, dma1_mm2s_meta); + cyt_rq_sq_cmd_converter<2>(cyt_rq_rd_cmd, sq_rd_cmd_2); + multiplexor(cclo_sq_rd_cmd, sq_rd_cmd_0,sq_rd_cmd_1,sq_rd_cmd_2, cyt_sq_rd_cmd); + + +} diff --git a/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp b/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp index da59244c..8b478808 100644 --- a/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp +++ b/kernels/plugins/cyt_adapter/cyt_rdma_arbiter.cpp @@ -38,23 +38,25 @@ void cyt_rdma_arbiter_meta( if (!STREAM_IS_EMPTY(s_meta)){ reqWord = STREAM_READ(s_meta); - if (reqWord.host == 0){ + // if (reqWord.strm == CYT_STRM_RDMA && reqWord.opcode == CYT_RDMA_SEND){ + if(reqWord.opcode == RC_SEND_FIRST || reqWord.opcode == RC_SEND_MIDDLE || reqWord.opcode == RC_SEND_LAST || reqWord.opcode == RC_SEND_ONLY ){ meta_notif.type = 0; //don't care meta_notif.session_id(CYT_PID_BITS-1,0) = reqWord.pid; - meta_notif.session_id(CYT_PID_BITS+CYT_DEST_BITS-1,CYT_PID_BITS) = reqWord.dest; + meta_notif.session_id(CYT_PID_BITS+CYT_DEST_BITS-1,CYT_PID_BITS) = reqWord.vfid; //TODO: check this meta_notif.length = reqWord.len; STREAM_WRITE(m_meta_0, meta_notif); - meta_internal(15,0) = reqWord.host; - meta_internal(31,16) = reqWord.stream; + meta_internal(15,0) = reqWord.dest; + meta_internal(31,16) = CYT_RDMA_SEND; meta_internal(63,32) = reqWord.len; STREAM_WRITE(meta_int, meta_internal); - } else if (reqWord.host == 1) { + // } else if (reqWord.strm == CYT_STRM_RDMA && reqWord.opcode == CYT_RDMA_WRITE) { + } else if (reqWord.opcode == RC_RDMA_WRITE_FIRST || reqWord.opcode == RC_RDMA_WRITE_MIDDLE || reqWord.opcode == RC_RDMA_WRITE_LAST || reqWord.opcode == RC_RDMA_WRITE_LAST_WITH_IMD || reqWord.opcode == RC_RDMA_WRITE_ONLY || reqWord.opcode == RC_RDMA_WRITE_ONLY_WIT_IMD) { + // simply forward the rq_wr and the conversion from rq_wr to sq_wr is done downstream STREAM_WRITE(m_meta_1, reqWord); - - meta_internal(15,0) = reqWord.host; - meta_internal(31,16) = reqWord.stream; + meta_internal(15,0) = reqWord.dest; + meta_internal(31,16) = CYT_RDMA_WRITE; meta_internal(63,32) = reqWord.len; STREAM_WRITE(meta_int, meta_internal); } @@ -68,7 +70,8 @@ void cyt_rdma_arbiter_meta( // We also append the last signal for WRITE data stream for each packet as the cyt adapter set the ctl bits always to 1 void cyt_rdma_arbiter_data( hls::stream >& meta_int, - hls::stream >& s_axis, + hls::stream >& s_axis_0, + hls::stream >& s_axis_1, hls::stream >& m_axis_0, hls::stream >& m_axis_1 ) @@ -76,14 +79,14 @@ void cyt_rdma_arbiter_data( #pragma HLS PIPELINE II=1 #pragma HLS INLINE off - enum fsmStateType {META, SEND_STREAM, WRITE_STREAM}; + enum fsmStateType {META, SEND_STREAM, WRITE_STREAM_CARD, WRITE_STREAM_HOST}; static fsmStateType fsmState = META; static ap_axiu<512, 0, 0, 8> currWord; static ap_uint<64> meta_internal; static ap_uint<16> meta_internal_host; - static ap_uint<16> meta_internal_stream; + static ap_uint<16> meta_internal_opcode; static ap_uint<32> meta_internal_len; static ap_uint<32> pkt_word; static ap_uint<32> word_cnt = 0; @@ -95,22 +98,24 @@ void cyt_rdma_arbiter_data( { meta_internal = STREAM_READ(meta_int); meta_internal_host = meta_internal(15,0); - meta_internal_stream = meta_internal(31,16); + meta_internal_opcode = meta_internal(31,16); meta_internal_len = meta_internal(63,32); pkt_word = (meta_internal_len + 63) >> 6; - if (meta_internal_host == 0){ + if (meta_internal_opcode == CYT_RDMA_SEND){ fsmState = SEND_STREAM; - } else if (meta_internal_host == 1){ - fsmState = WRITE_STREAM; + } else if (meta_internal_opcode == CYT_RDMA_WRITE && meta_internal_host == CYT_STRM_CARD){ + fsmState = WRITE_STREAM_CARD; + } else if (meta_internal_opcode == CYT_RDMA_WRITE && meta_internal_host == CYT_STRM_HOST){ + fsmState = WRITE_STREAM_HOST; } } break; case SEND_STREAM: - if (!s_axis.empty()) + if (!s_axis_0.empty()) { - currWord = STREAM_READ(s_axis); + currWord = STREAM_READ(s_axis_0); word_cnt++; if (word_cnt == pkt_word) { @@ -121,22 +126,43 @@ void cyt_rdma_arbiter_data( STREAM_WRITE(m_axis_0, currWord); } break; - case WRITE_STREAM: - if (!s_axis.empty()) + case WRITE_STREAM_CARD: + if (!s_axis_0.empty()) { - currWord = STREAM_READ(s_axis); + currWord = STREAM_READ(s_axis_0); ap_axiu<512, 0, 0, 8> outWord; outWord.data = currWord.data; outWord.keep = currWord.keep; outWord.last = currWord.last; - outWord.dest = meta_internal_stream; + outWord.dest = meta_internal_host; // use the host flag to indicate whether it is to host or device word_cnt++; if (word_cnt == pkt_word) { word_cnt = 0; - currWord.last = 1; + outWord.last = 1; + fsmState = META; + } + STREAM_WRITE(m_axis_1, outWord); + } + break; + case WRITE_STREAM_HOST: + if (!s_axis_1.empty()) + { + currWord = STREAM_READ(s_axis_1); + ap_axiu<512, 0, 0, 8> outWord; + + outWord.data = currWord.data; + outWord.keep = currWord.keep; + outWord.last = currWord.last; + outWord.dest = meta_internal_host; // use the host flag to indicate whether it is to host or device + word_cnt++; + + if (word_cnt == pkt_word) + { + word_cnt = 0; + outWord.last = 1; fsmState = META; } STREAM_WRITE(m_axis_1, outWord); @@ -145,15 +171,17 @@ void cyt_rdma_arbiter_data( } } -// check the host bit of the s_meta, which corresponds to the wr_req -// if host bit equals 0, this is a SEND Verb, route meta to eth notification and route data stream to channel 0 -// if host bit equals 1, this is an WRITE Verb, route meta and data to channel 1 -// if data routes to channel 1, set the meta_internal field according to the stream flag in the cyt_req_t to indicate host/card +// check the command type of the s_meta, which corresponds to the rq_wr +// the input data stream can come from either s_axis_0/s_axis_1, corresponding to rrsp_recv_0/1. It should be selected based on the dest field of the s_meta +// if the dest is CYT_STRM_CARD, then consume from s_axis_0, and if dest is CYT_STRM_HOST, consume from s_axis_1 +// if strm flag is CYT_STRM_RDMA, and the opcode is CYT_RDMA_SEND, this is a SEND Verb, route meta to eth notification and route data stream to channel 0 +// if strm flag is CYT_STRM_RDMA, and the opcode is CYT_RDMA_WRITE, this is a WRITE Verb, route meta and data to channel 1 +// if data routes to channel 1 (CYT_RDMA_WRITE), set the meta_internal field according to the dest flag in the cyt_req_t to indicate host/card // compact bit pragma required for cyt_req_t as this interfaces with Coyote. - void cyt_rdma_arbiter( hls::stream& s_meta, - hls::stream >& s_axis, + hls::stream >& s_axis_0, + hls::stream >& s_axis_1, hls::stream& m_meta_0, hls::stream >& m_axis_0, hls::stream& m_meta_1, @@ -161,7 +189,8 @@ void cyt_rdma_arbiter( ) { #pragma HLS INTERFACE axis register port=s_meta -#pragma HLS INTERFACE axis register port=s_axis +#pragma HLS INTERFACE axis register port=s_axis_1 +#pragma HLS INTERFACE axis register port=s_axis_0 #pragma HLS INTERFACE axis register port=m_meta_0 #pragma HLS INTERFACE axis register port=m_axis_0 #pragma HLS INTERFACE axis register port=m_meta_1 @@ -186,7 +215,8 @@ void cyt_rdma_arbiter( cyt_rdma_arbiter_data( meta_int, - s_axis, + s_axis_0, + s_axis_1, m_axis_0, m_axis_1 ); diff --git a/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp b/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp deleted file mode 100644 index 0febd79c..00000000 --- a/kernels/plugins/cyt_adapter/cyt_rdma_mux.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/******************************************************************************* -# Copyright (C) 2023 Advanced Micro Devices, Inc -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# *******************************************************************************/ - -#include "cyt.h" -#include "eth_intf.h" - -using namespace std; - - -void cyt_rdma_mux_meta( - hls::stream& s_meta_0, - hls::stream& s_meta_1, - hls::stream& m_meta_0, - hls::stream& m_meta_1, - hls::stream >& meta_int - ) -{ - -#pragma HLS PIPELINE II=1 -#pragma HLS INLINE off - - static rdma_req_t s_metaWord_0; - static cyt_rdma_req_t m_metaWord_0; - static cyt_req_t s_metaWord_1; - static cyt_rdma_req_msg_t rdma_req_msg; - static ap_uint<8> dest = 0; - - // if there is a rdma_sq cmd - // sq command comes from CCLO only has WRITE and SEND Verb - if (!STREAM_IS_EMPTY(s_meta_0)){ - s_metaWord_0 = STREAM_READ(s_meta_0); - m_metaWord_0.opcode = s_metaWord_0.opcode; - m_metaWord_0.qpn = s_metaWord_0.qpn; - m_metaWord_0.host = 0; // data always managed by CCLO - m_metaWord_0.mode = 0; // always PARSE - m_metaWord_0.last = 1; // always assert last - m_metaWord_0.cmplt = 0; // no need to ack - m_metaWord_0.ssn = 0; - m_metaWord_0.offs = 0; - m_metaWord_0.rsrvd = 0; - - rdma_req_msg.lvaddr = 0; // we don't care about local vaddr - rdma_req_msg.rvaddr(47,0) = s_metaWord_0.vaddr; - rdma_req_msg.rvaddr(52,52) = s_metaWord_0.host; - rdma_req_msg.len = s_metaWord_0.len; - rdma_req_msg.params = 0; - - m_metaWord_0.msg = (ap_uint)rdma_req_msg; - - STREAM_WRITE(m_meta_0, m_metaWord_0); - dest = 0; - STREAM_WRITE(meta_int, dest); - } - else if (!STREAM_IS_EMPTY(s_meta_1)){ - s_metaWord_1 = STREAM_READ(s_meta_1); - STREAM_WRITE(m_meta_1, s_metaWord_1); - dest = 1; - STREAM_WRITE(meta_int, dest); - } -} - -void cyt_rdma_mux_data( - hls::stream >& meta_int, - hls::stream >& s_axis_0, - hls::stream >& s_axis_1, - hls::stream >& m_axis -) -{ - #pragma HLS PIPELINE II=1 - #pragma HLS INLINE off - - enum fsmStateType {META, STREAM_0, STREAM_1}; - static fsmStateType fsmState = META; - - static ap_axiu<512, 0, 0, 8> currWord; - - switch (fsmState) - { - case META: - if (!STREAM_IS_EMPTY(meta_int)) - { - ap_uint<8> dest = STREAM_READ(meta_int); - if (dest == 0){ - fsmState = STREAM_0; - } else { - fsmState = STREAM_1; - } - } - break; - case STREAM_0: - if (!STREAM_IS_EMPTY(s_axis_0)) - { - currWord = STREAM_READ(s_axis_0); - STREAM_WRITE(m_axis, currWord); - if (currWord.last) // TODO: check by cnt instead of last - { - fsmState = META; - } - } - break; - case STREAM_1: - if (!STREAM_IS_EMPTY(s_axis_1)) - { - currWord = STREAM_READ(s_axis_1); - STREAM_WRITE(m_axis, currWord); - if (currWord.last) // TODO: check by cnt instead of last - { - fsmState = META; - } - } - break; - } -} - - -// cyt rdma mux will arbitrate the data stream according to the accepted command signal -// the command can be either rdma_sq or the rd_req -// the data stream can be data stream coming from the cclo or from the host/card data stream -// these two streams are mux into single rdma m_axis data stream - -void cyt_rdma_mux( - hls::stream& s_meta_0, - hls::stream >& s_axis_0, - hls::stream& s_meta_1, - hls::stream >& s_axis_1, - hls::stream& m_meta_0, - hls::stream& m_meta_1, - hls::stream >& m_axis - ) -{ -#pragma HLS INTERFACE axis register port=s_meta_0 -#pragma HLS INTERFACE axis register port=s_axis_0 -#pragma HLS INTERFACE axis register port=s_meta_1 -#pragma HLS INTERFACE axis register port=s_axis_1 -#pragma HLS INTERFACE axis register port=m_meta_0 -#pragma HLS INTERFACE axis register port=m_meta_1 -#pragma HLS INTERFACE axis register port=m_axis -#pragma HLS aggregate variable=s_meta_0 compact=bit -#pragma HLS aggregate variable=s_meta_1 compact=bit -#pragma HLS aggregate variable=m_meta_0 compact=bit -#pragma HLS aggregate variable=m_meta_1 compact=bit - -#pragma HLS INTERFACE ap_ctrl_none port=return - -#pragma HLS DATAFLOW disable_start_propagation - - static hls::stream > meta_int; - #pragma HLS STREAM depth=4 variable=meta_int - - cyt_rdma_mux_meta( - s_meta_0, - s_meta_1, - m_meta_0, - m_meta_1, - meta_int - ); - - cyt_rdma_mux_data( - meta_int, - s_axis_0, - s_axis_1, - m_axis - ); - -} \ No newline at end of file diff --git a/test/host/Coyote/CMakeLists.txt b/test/host/Coyote/CMakeLists.txt index 7c6b9842..78b7ca5c 100644 --- a/test/host/Coyote/CMakeLists.txt +++ b/test/host/Coyote/CMakeLists.txt @@ -16,6 +16,8 @@ set(EN_AVX 1 CACHE STRING "AVX environment.") add_subdirectory(${CMAKE_SOURCE_DIR}/../../../driver/xrt/ ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/xrt/) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) # Exec set(EXEC accl_on_coyote) diff --git a/test/host/Coyote/run_scripts/flow_u55c.sh b/test/host/Coyote/run_scripts/flow_u55c.sh index 8976bb7a..f58dfcb7 100755 --- a/test/host/Coyote/run_scripts/flow_u55c.sh +++ b/test/host/Coyote/run_scripts/flow_u55c.sh @@ -2,7 +2,7 @@ # parameters SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_RDMA/lynx/lynx.runs/impl_1/cyt_top +FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/coyote_build_RDMA_u55c/bitstreams/cyt_top # FPGA_BIT_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/hw/build_TCP/lynx/lynx.runs/impl_1/cyt_top DRIVER_PATH=$SCRIPT_DIR/../../../refdesigns/Coyote/driver/ @@ -81,12 +81,12 @@ if [ $HOT_RESET -eq 1 ]; then for servid in "${SERVID[@]}"; do boardidx=$(expr $servid - 1) host="alveo-u55c-$(printf "%02d" $servid)" - ssh -q -tt $host "sudo insmod $DRIVER_PATH/coyote_drv.ko ip_addr_q0=${IPADDR[boardidx]} mac_addr_q0=${MACADDR[boardidx]}" & + ssh -q -tt $host "sudo insmod $DRIVER_PATH/coyote_drv.ko ip_addr=${IPADDR[boardidx]} mac_addr=${MACADDR[boardidx]}" & done wait echo "Driver loaded." - echo "Getting permissions for fpga..." - parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" + # echo "Getting permissions for fpga..." + # parallel-ssh -H "$hostlist" -x '-tt' "sudo /opt/sgrt/cli/program/fpga_chmod 0" echo "Done." fi diff --git a/test/host/Coyote/run_scripts/run.sh b/test/host/Coyote/run_scripts/run.sh index 59b628a7..83bc3577 100755 --- a/test/host/Coyote/run_scripts/run.sh +++ b/test/host/Coyote/run_scripts/run.sh @@ -40,17 +40,17 @@ done #define ACCL_BARRIER 12 ARG=" -d -f -r" # debug, hardware, and tcp/rdma flags -TEST_MODE=(10) -N_ELEMENTS=(512) # 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 +TEST_MODE=(3) +N_ELEMENTS=(256) # 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 NRUN=(1) # number of runs HOST=(1) -PROTOC=(1) # eager=0, rendezevous=1 +PROTOC=(0) # eager=0, rendezevous=1 echo "Run command: $EXEC $ARG -y $TEST_MODE -c 1024 -l $FPGA_FILE" rm -f $(pwd)/accl_log/rank* -for NP in `seq 4 $NUM_PROCESS`; do +for NP in `seq $NUM_PROCESS $NUM_PROCESS`; do for MODE in ${TEST_MODE[@]}; do for N_ELE in ${N_ELEMENTS[@]}; do for H in ${HOST[@]}; do diff --git a/test/host/Coyote/test.cpp b/test/host/Coyote/test.cpp index c7c21216..8dac8b6b 100644 --- a/test/host/Coyote/test.cpp +++ b/test/host/Coyote/test.cpp @@ -354,14 +354,14 @@ options_t parse_options(int argc, char *argv[]) } -void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int local_rank, std::vector &ibvQpConn_vec, std::vector &ranks) +void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int local_rank, std::vector &ranks, ACCL::CoyoteDevice* device) { if (local_rank == master_rank) { std::cout<<"Local rank "<getQpairStruct()->local), sizeof(fpga::ibvQ), MPI_CHAR, slave_rank, 0, MPI_COMM_WORLD); + MPI_Send(&(device->coyote_qProc_vec[slave_rank]->getQpair()->local), sizeof(fpga::ibvQ), MPI_CHAR, slave_rank, 0, MPI_COMM_WORLD); } else if (local_rank == slave_rank) { @@ -371,7 +371,7 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int MPI_Recv(&received_q, sizeof(fpga::ibvQ), MPI_CHAR, master_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // Copy the received data to the remote queue pair - ibvQpConn_vec[master_rank]->getQpairStruct()->remote = received_q; + device->coyote_qProc_vec[master_rank]->getQpair()->remote = received_q; } // Synchronize after the first exchange to avoid race conditions @@ -381,7 +381,7 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int { std::cout<<"Local rank "<getQpairStruct()->local), sizeof(fpga::ibvQ), MPI_CHAR, master_rank, 0, MPI_COMM_WORLD); + MPI_Send(&(device->coyote_qProc_vec[master_rank]->getQpair()->local), sizeof(fpga::ibvQ), MPI_CHAR, master_rank, 0, MPI_COMM_WORLD); } else if (local_rank == master_rank) { @@ -391,7 +391,7 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int MPI_Recv(&received_q, sizeof(fpga::ibvQ), MPI_CHAR, slave_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); // Copy the received data to the remote queue pair - ibvQpConn_vec[slave_rank]->getQpairStruct()->remote = received_q; + device->coyote_qProc_vec[slave_rank]->getQpair()->remote = received_q; } MPI_Barrier(MPI_COMM_WORLD); @@ -399,20 +399,22 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int // write established connection to hardware and perform arp lookup if (local_rank == master_rank) { - int connection = (ibvQpConn_vec[slave_rank]->getQpairStruct()->local.qpn & 0xFFFF) | ((ibvQpConn_vec[slave_rank]->getQpairStruct()->remote.qpn & 0xFFFF) << 16); - ibvQpConn_vec[slave_rank]->getQpairStruct()->print(); - ibvQpConn_vec[slave_rank]->setConnection(connection); - ibvQpConn_vec[slave_rank]->writeContext(ranks[slave_rank].port); - ibvQpConn_vec[slave_rank]->doArpLookup(); - ranks[slave_rank].session_id = ibvQpConn_vec[slave_rank]->getQpairStruct()->local.qpn; + int connection = (device->coyote_qProc_vec[slave_rank]->getQpair()->local.qpn & 0xFFFF) | ((device->coyote_qProc_vec[slave_rank]->getQpair()->remote.qpn & 0xFFFF) << 16); + device->coyote_qProc_vec[slave_rank]->getQpair()->local.print("Local "); + device->coyote_qProc_vec[slave_rank]->getQpair()->remote.print("Remote"); + device->coyote_qProc_vec[slave_rank]->setConnection(connection); + device->coyote_qProc_vec[slave_rank]->writeQpContext(ranks[slave_rank].port); + device->coyote_qProc_vec[slave_rank]->doArpLookup(device->coyote_qProc_vec[slave_rank]->getQpair()->remote.ip_addr); + ranks[slave_rank].session_id = device->coyote_qProc_vec[slave_rank]->getQpair()->local.qpn; } else if (local_rank == slave_rank) { - int connection = (ibvQpConn_vec[master_rank]->getQpairStruct()->local.qpn & 0xFFFF) | ((ibvQpConn_vec[master_rank]->getQpairStruct()->remote.qpn & 0xFFFF) << 16); - ibvQpConn_vec[master_rank]->getQpairStruct()->print(); - ibvQpConn_vec[master_rank]->setConnection(connection); - ibvQpConn_vec[master_rank]->writeContext(ranks[master_rank].port); - ibvQpConn_vec[master_rank]->doArpLookup(); - ranks[master_rank].session_id = ibvQpConn_vec[master_rank]->getQpairStruct()->local.qpn; + int connection = (device->coyote_qProc_vec[master_rank]->getQpair()->local.qpn & 0xFFFF) | ((device->coyote_qProc_vec[master_rank]->getQpair()->remote.qpn & 0xFFFF) << 16); + device->coyote_qProc_vec[master_rank]->getQpair()->local.print("Local "); + device->coyote_qProc_vec[master_rank]->getQpair()->remote.print("Remote"); + device->coyote_qProc_vec[master_rank]->setConnection(connection); + device->coyote_qProc_vec[master_rank]->writeQpContext(ranks[master_rank].port); + device->coyote_qProc_vec[master_rank]->doArpLookup(device->coyote_qProc_vec[slave_rank]->getQpair()->remote.ip_addr); + ranks[master_rank].session_id = device->coyote_qProc_vec[master_rank]->getQpair()->local.qpn; } MPI_Barrier(MPI_COMM_WORLD); @@ -422,64 +424,80 @@ void exchange_qp(unsigned int master_rank, unsigned int slave_rank, unsigned int void configure_cyt_rdma(std::vector &ranks, int local_rank, ACCL::CoyoteDevice* device) { - std::cout<<"Initializing QP connections..."< ibvQpConn_vec; - // create single page dummy memory space for each qp - uint32_t n_pages = 1; - for(int i=0; icoyote_qProc_vec[i], ranks[local_rank].ip, n_pages); - ibvQpConn_vec.push_back(qpConn); - // qpConn->getQpairStruct()->print(); - } + // std::cout<<"Initializing QP connections..."< ibvQp_vec; + // // create single page dummy memory space for each qp + // uint32_t n_pages = 1; + // for(int i=0; icoyote_qProc_vec[i], ranks[local_rank].ip, n_pages); + // ibvQp_vec.push_back(qpConn); + // // qpConn->getQpair()->print(); + // } std::cout<<"Exchanging QP..."< &ranks, int local_rank, ACCL::CoyoteDevice* device) -{ - std::cout<<"Configuring Coyote TCP..."<get_device()->doArpLookup(_ip_encode(ranks[i].ip)); - } - } - - //open port - for (int i=0; iget_device()->tcpOpenPort(dstPort); - } - - std::this_thread::sleep_for(10ms); - - //open con - for (int i=0; iget_device()->tcpOpenCon(dstIp, dstPort, &session); - ranks[i].session_id = session; - } - } - +// void configure_cyt_tcp(std::vector &ranks, int local_rank, ACCL::CoyoteDevice* device) +// { +// std::cout<<"Configuring Coyote TCP..."<get_device()->doArpLookup(_ip_encode(ranks[i].ip)); +// } +// } + +// //open port +// for (int i=0; iget_device()->tcpOpenPort(dstPort); +// } + +// std::this_thread::sleep_for(10ms); + +// //open con +// for (int i=0; iget_device()->tcpOpenCon(dstIp, dstPort, &session); +// ranks[i].session_id = session; +// } +// } + +// } + +void test_copy(ACCL::ACCL &accl, options_t &options){ + unsigned int count = options.count; + auto op_buf = accl.create_coyotebuffer(count, dataType::float32); + auto res_buf = accl.create_coyotebuffer(count, dataType::float32); + for (int i = 0; i < count; i++) op_buf.get()->buffer()[i] = (float)i; + if (options.host == 0){ op_buf->sync_to_device(); } + ACCL::ACCLRequest* req; + req = accl.copy(*op_buf, *res_buf, count); + accl.wait(req, 1000ms); + for (int i = 0; i < count; i++) { + if (res_buf.get()->buffer()[i] != op_buf.get()->buffer()[i]) { + std::cout << std::to_string(i + 1) + "th item is incorrect!" << std::endl; + errors += 1; + } + } } - void test_sendrcv(ACCL::ACCL &accl, options_t &options) { std::cout << "Start send recv test..." << std::endl<dump_eager_rx_buffers(false)); MPI_Barrier(MPI_COMM_WORLD); @@ -1208,7 +1229,7 @@ void test_accl_base(options_t options) } } - + MPI_Barrier(MPI_COMM_WORLD); if (failed_tests == 0){ std::cout << "\nACCL base functionality test completed successfully!\n" << std::endl; } diff --git a/test/refdesigns/Coyote b/test/refdesigns/Coyote index ef4853fc..8d73404d 160000 --- a/test/refdesigns/Coyote +++ b/test/refdesigns/Coyote @@ -1 +1 @@ -Subproject commit ef4853fc4eefc2768213179abe1b9278834bd2d2 +Subproject commit 8d73404de24cd752b6aa16e0752bb4a3caf712ab diff --git a/test/refdesigns/Makefile b/test/refdesigns/Makefile index a5ff4ed6..4c2e4e5c 100644 --- a/test/refdesigns/Makefile +++ b/test/refdesigns/Makefile @@ -77,10 +77,11 @@ COMPRESSION_XO=../../kernels/plugins/hp_compression/hp_compression_$(FPGAPART).x LOOPBACK_XO=../../kernels/plugins/loopback/loopback_$(FPGAPART).xo TCP_SESS_XO=../../kernels/plugins/tcp_session_handler/tcp_session_handler_$(FPGAPART).xo VADD_XO=../../kernels/plugins/vadd_put/vadd_put_$(FPGAPART).xo -CYT_DMA_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cyt_dma_adapter_$(FPGAPART).xo +CYT_DMA_SQ_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cyt_dma_sq_adapter_$(FPGAPART).xo +CYT_CQ_DM_STS_CONVERTER_XO=../../kernels/plugins/cyt_adapter/cyt_cq_dm_sts_converter_$(FPGAPART).xo CYT_RDMA_ARBITER_XO=../../kernels/plugins/cyt_adapter/cyt_rdma_arbiter_$(FPGAPART).xo -CYT_RDMA_MUX_XO=../../kernels/plugins/cyt_adapter/cyt_rdma_mux_$(FPGAPART).xo -CYT_ADAPTER_XO = $(CYT_DMA_ADAPTER_XO) $(CYT_RDMA_ARBITER_XO) $(CYT_RDMA_MUX_XO) +CCLO_SQ_ADAPTER_XO=../../kernels/plugins/cyt_adapter/cclo_sq_adapter_$(FPGAPART).xo +CYT_ADAPTER_XO = $(CYT_DMA_SQ_ADAPTER_XO) $(CYT_RDMA_ARBITER_XO) $(CCLO_SQ_ADAPTER_XO) $(CYT_CQ_DM_STS_CONVERTER_XO) HWEMU_MST_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_master_512.xo HWEMU_SLV_XO=$$XILINX_VITIS/data/emulation/XO/sim_ipc_axis_slave_512.xo @@ -193,7 +194,7 @@ else ifeq (coyote_rdma, $(MODE)) N_DDR_CHAN = 2 endif OTHER_XO += $(CYT_ADAPTER_XO) - COYOTE_CONFIG = -DFDEV_NAME=$(BOARD) -DEN_MEM=1 -DEN_STRM=1 -DEN_BPSS=1 -DEN_RDMA_0=1 -DEN_RPC=1 -DN_STRM_AXI=3 -DN_CARD_AXI=3 -DEN_HLS=0 -DACLK_F=250 -DTLBL_A=12 -DN_DDR_CHAN=$(N_DDR_CHAN) + COYOTE_CONFIG = -DFDEV_NAME=$(BOARD) -DEXAMPLE=rdma -DSHELL_PROBE=8 -DN_REGIONS=1 -DEN_MEM=1 -DEN_STRM=1 -DEN_RDMA=1 -DN_RDMA_AXI=2 -DN_STRM_AXI=3 -DN_CARD_AXI=3 -DACLK_F=250 -DTLBL_A=12 -DN_DDR_CHAN=$(N_DDR_CHAN) CCLO_STACK_TYPE = RDMA OUTPUT_PRODUCT = $(CYT_BIT) USE_HOSTMEM = 1 @@ -232,19 +233,19 @@ $(VNX)/NetLayers/_x.%/networklayer.xo: $(MAKE) -C xup_vitis_network_example/NetLayers DEVICE=$* all .PHONY: coyote_shell -coyote_shell: $(CYT_BUILD_DIR)/lynx/lynx.xpr +coyote_shell: $(CYT_BUILD_DIR)/test_config_0/user_c0_0/test.xpr -$(CYT_BUILD_DIR)/lynx/lynx.xpr: - mkdir $(CYT_BUILD_DIR) && cd $(CYT_BUILD_DIR) && cmake ../Coyote/hw $(COYOTE_CONFIG) - $(MAKE) -C $(CYT_BUILD_DIR) shell +$(CYT_BUILD_DIR)/test_config_0/user_c0_0/test.xpr: + mkdir $(CYT_BUILD_DIR) && cd $(CYT_BUILD_DIR) && cmake ../hdl/ $(COYOTE_CONFIG) + $(MAKE) -C $(CYT_BUILD_DIR) project $(CYT_BIT): coyote_shell $(MAKE) -C ../../kernels/cclo PLATFORM=$(PLATFORM) STACK_TYPE=$(CCLO_STACK_TYPE) MB_DEBUG_LEVEL=$(CCLO_MB_DEBUG_LEVEL) $(MAKE) -C ../../kernels/plugins DEVICE=$(FPGAPART) cp -rf $(OTHER_XO) $(CYT_BUILD_DIR)/iprepo && cd $(CYT_BUILD_DIR)/iprepo && unzip -n '*.xo' vivado -mode tcl -source tcl/coyote.tcl -tclargs $(CCLO_STACK_TYPE) $(CYT_BUILD_DIR) - cp hdl/$(MODE)_top.sv $(CYT_BUILD_DIR)/lynx/hdl/config_0/user_logic_c0_0.sv - $(MAKE) -C $(CYT_BUILD_DIR) compile + cp hdl/$(MODE)_top.sv $(CYT_BUILD_DIR)/test_config_0/user_c0_0/hdl/wrappers/user_logic_c0_0.sv + $(MAKE) -C $(CYT_BUILD_DIR) bitgen .PHONY: tcp_stack tcp_stack: $(TCP_XO) $(CMAC_TCP_XO) diff --git a/test/refdesigns/hdl/CMakeLists.txt b/test/refdesigns/hdl/CMakeLists.txt new file mode 100644 index 00000000..cc8be38f --- /dev/null +++ b/test/refdesigns/hdl/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.0) +project(test) + +set(CYT_DIR ${CMAKE_SOURCE_DIR}/../Coyote) +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CYT_DIR}/cmake) + +find_package(CoyoteHW REQUIRED) + +# +# ACCL Test +# +if(EXAMPLE STREQUAL "rdma") + message("** ACCL-RDMA Test") + set(SHELL_PROBE 7) + set(N_REGIONS 1) + set(EN_STRM 1) + set(EN_RDMA 1) + set(N_STRM_AXI 3) + set(N_CARD_AXI 3) + set(N_RDMA_AXI 2) + set(ACLK_F 250) + set(TLBL_A 12) + set(EN_MEM 1) + + validation_checks_hw() + + # load_apps ( + # VFPGA_C0_0 "coyote_rdma_top.sv" + # ) + + create_hw() +endif() diff --git a/test/refdesigns/hdl/coyote_rdma_top.sv b/test/refdesigns/hdl/coyote_rdma_top.sv index 6b333a53..62a2164c 100644 --- a/test/refdesigns/hdl/coyote_rdma_top.sv +++ b/test/refdesigns/hdl/coyote_rdma_top.sv @@ -30,39 +30,32 @@ module design_user_logic_c0_0 ( // AXI4L CONTROL AXI4L.s axi_ctrl, - // DESCRIPTOR BYPASS - metaIntf.m bpss_rd_req, - metaIntf.m bpss_wr_req, - metaIntf.s bpss_rd_done, - metaIntf.s bpss_wr_done, - - // AXI4S HOST STREAMS - AXI4SR.s axis_host_0_sink, - AXI4SR.m axis_host_0_src, - AXI4SR.s axis_host_1_sink, - AXI4SR.m axis_host_1_src, - AXI4SR.s axis_host_2_sink, - AXI4SR.m axis_host_2_src, - - // AXI4S CARD STREAMS - AXI4SR.s axis_card_0_sink, - AXI4SR.m axis_card_0_src, - AXI4SR.s axis_card_1_sink, - AXI4SR.m axis_card_1_src, - AXI4SR.s axis_card_2_sink, - AXI4SR.m axis_card_2_src, - - // RDMA QSFP0 CMD - metaIntf.s rdma_0_rd_req, - metaIntf.s rdma_0_wr_req, - - // AXI4S RDMA QSFP0 STREAMS - AXI4SR.s axis_rdma_0_sink, - AXI4SR.m axis_rdma_0_src, - - // RDMA QSFP0 SQ and RQ - metaIntf.m rdma_0_sq, - metaIntf.s rdma_0_ack, + // NOTIFY + metaIntf.m notify, + + // DESCRIPTORS + metaIntf.m sq_rd, + metaIntf.m sq_wr, + metaIntf.s cq_rd, + metaIntf.s cq_wr, + metaIntf.s rq_rd, + metaIntf.s rq_wr, + + // HOST DATA STREAMS + AXI4SR.s axis_host_recv [N_STRM_AXI], + AXI4SR.m axis_host_send [N_STRM_AXI], + + // CARD DATA STREAMS + AXI4SR.s axis_card_recv [N_CARD_AXI], + AXI4SR.m axis_card_send [N_CARD_AXI], + + // RDMA DATA STREAMS REQUESTER + AXI4SR.s axis_rreq_recv [N_RDMA_AXI], + AXI4SR.m axis_rreq_send [N_RDMA_AXI], + + // RDMA DATA STREAMS RESPONDER + AXI4SR.s axis_rrsp_recv [N_RDMA_AXI], + AXI4SR.m axis_rrsp_send [N_RDMA_AXI], // Clock and reset input wire aclk, @@ -70,14 +63,8 @@ module design_user_logic_c0_0 ( ); /* -- Tie-off unused interfaces and signals ----------------------------- */ -// always_comb axis_host_0_sink.tie_off_s(); -// always_comb axis_host_0_src_s.tie_off_m(); -// always_comb axis_card_0_sink.tie_off_s(); -// always_comb axis_card_0_src_s.tie_off_m(); -// always_comb axis_host_1_sink.tie_off_s(); -// always_comb axis_host_1_src.tie_off_m(); -// always_comb axis_card_1_sink.tie_off_s(); -// always_comb axis_card_1_src_s.tie_off_m(); +always_comb notify.tie_off_m(); + /* -- USER LOGIC -------------------------------------------------------- */ @@ -85,39 +72,6 @@ module design_user_logic_c0_0 ( localparam integer COYOTE_AXIL_ADDR_LSB = $clog2(AXIL_DATA_BITS/8); localparam integer COYOTE_AXIL_ADDR_MSB = 16; -// Master Data Stream -AXI4SR axis_host_0_src_s (); -AXI4SR axis_host_1_src_s (); -AXI4SR axis_host_2_src_s (); -AXI4SR axis_card_0_src_s (); -AXI4SR axis_card_1_src_s (); -AXI4SR axis_card_2_src_s (); - -// register slices -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_0_src_s), .m_axis(axis_host_0_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_1_src_s), .m_axis(axis_host_1_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_2_src_s), .m_axis(axis_host_2_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_0_src_s), .m_axis(axis_card_0_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_1_src_s), .m_axis(axis_card_1_src)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_2_src_s), .m_axis(axis_card_2_src)); - -// Slave Data Stream -AXI4SR axis_host_0_sink_s (); -AXI4SR axis_host_1_sink_s (); -AXI4SR axis_host_2_sink_s (); -AXI4SR axis_card_0_sink_s (); -AXI4SR axis_card_1_sink_s (); -AXI4SR axis_card_2_sink_s (); - -// register slices -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_0_sink), .m_axis(axis_host_0_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_1_sink), .m_axis(axis_host_1_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_host_2_sink), .m_axis(axis_host_2_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_0_sink), .m_axis(axis_card_0_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_1_sink), .m_axis(axis_card_1_sink_s)); -axisr_reg_array #(.N_STAGES(4)) (.aclk(aclk), .aresetn(aresetn), .s_axis(axis_card_2_sink), .m_axis(axis_card_2_sink_s)); - - // ACCL Block Design accl_bd_wrapper accl_system( .ap_clk_0(aclk), @@ -143,138 +97,236 @@ accl_bd_wrapper accl_system( .S00_AXI_0_wstrb(axi_ctrl.wstrb), .S00_AXI_0_wvalid(axi_ctrl.wvalid), - .cyt_byp_rd_cmd_0_tdata(bpss_rd_req.data), - .cyt_byp_rd_cmd_0_tready(bpss_rd_req.ready), - .cyt_byp_rd_cmd_0_tvalid(bpss_rd_req.valid), + .cyt_sq_rd_cmd_tdata(sq_rd.data), + .cyt_sq_rd_cmd_tready(sq_rd.ready), + .cyt_sq_rd_cmd_tvalid(sq_rd.valid), + + .cyt_cq_rd_sts_0_tdata(cq_rd.data), + .cyt_cq_rd_sts_0_tready(cq_rd.ready), + .cyt_cq_rd_sts_0_tvalid(cq_rd.valid), + + .cyt_sq_wr_cmd_tdata(sq_wr.data), + .cyt_sq_wr_cmd_tready(sq_wr.ready), + .cyt_sq_wr_cmd_tvalid(sq_wr.valid), - .cyt_byp_rd_sts_0_tdata(bpss_rd_done.data), - .cyt_byp_rd_sts_0_tready(bpss_rd_done.ready), - .cyt_byp_rd_sts_0_tvalid(bpss_rd_done.valid), + .cyt_cq_wr_sts_0_tdata(cq_wr.data), + .cyt_cq_wr_sts_0_tready(cq_wr.ready), + .cyt_cq_wr_sts_0_tvalid(cq_wr.valid), - .cyt_byp_wr_cmd_0_tdata(bpss_wr_req.data), - .cyt_byp_wr_cmd_0_tready(bpss_wr_req.ready), - .cyt_byp_wr_cmd_0_tvalid(bpss_wr_req.valid), + .cyt_rq_rd_tdata(rq_rd.data), + .cyt_rq_rd_tready(rq_rd.ready), + .cyt_rq_rd_tvalid(rq_rd.valid), - .cyt_byp_wr_sts_0_tdata(bpss_wr_done.data), - .cyt_byp_wr_sts_0_tready(bpss_wr_done.ready), - .cyt_byp_wr_sts_0_tvalid(bpss_wr_done.valid), + .cyt_rq_wr_tdata(rq_wr.data), + .cyt_rq_wr_tready(rq_wr.ready), + .cyt_rq_wr_tvalid(rq_wr.valid), - .m_axis_host_0_tdata(axis_host_0_src_s.tdata), - .m_axis_host_0_tkeep(axis_host_0_src_s.tkeep), - .m_axis_host_0_tlast(axis_host_0_src_s.tlast), - .m_axis_host_0_tready(axis_host_0_src_s.tready), - .m_axis_host_0_tvalid(axis_host_0_src_s.tvalid), + .m_axis_host_0_tdata(axis_host_send[0].tdata), + .m_axis_host_0_tkeep(axis_host_send[0].tkeep), + .m_axis_host_0_tlast(axis_host_send[0].tlast), + .m_axis_host_0_tready(axis_host_send[0].tready), + .m_axis_host_0_tvalid(axis_host_send[0].tvalid), .m_axis_host_0_tdest(), - .m_axis_host_1_tdata(axis_host_1_src_s.tdata), - .m_axis_host_1_tkeep(axis_host_1_src_s.tkeep), - .m_axis_host_1_tlast(axis_host_1_src_s.tlast), - .m_axis_host_1_tready(axis_host_1_src_s.tready), - .m_axis_host_1_tvalid(axis_host_1_src_s.tvalid), + .m_axis_host_1_tdata(axis_host_send[1].tdata), + .m_axis_host_1_tkeep(axis_host_send[1].tkeep), + .m_axis_host_1_tlast(axis_host_send[1].tlast), + .m_axis_host_1_tready(axis_host_send[1].tready), + .m_axis_host_1_tvalid(axis_host_send[1].tvalid), .m_axis_host_1_tdest(), - .m_axis_host_2_tdata(axis_host_2_src_s.tdata), - .m_axis_host_2_tkeep(axis_host_2_src_s.tkeep), - .m_axis_host_2_tlast(axis_host_2_src_s.tlast), - .m_axis_host_2_tready(axis_host_2_src_s.tready), - .m_axis_host_2_tvalid(axis_host_2_src_s.tvalid), + .m_axis_host_2_tdata(axis_host_send[2].tdata), + .m_axis_host_2_tkeep(axis_host_send[2].tkeep), + .m_axis_host_2_tlast(axis_host_send[2].tlast), + .m_axis_host_2_tready(axis_host_send[2].tready), + .m_axis_host_2_tvalid(axis_host_send[2].tvalid), .m_axis_host_2_tdest(), - .m_axis_card_0_tdata(axis_card_0_src_s.tdata), - .m_axis_card_0_tkeep(axis_card_0_src_s.tkeep), - .m_axis_card_0_tlast(axis_card_0_src_s.tlast), - .m_axis_card_0_tready(axis_card_0_src_s.tready), - .m_axis_card_0_tvalid(axis_card_0_src_s.tvalid), + .m_axis_card_0_tdata(axis_card_send[0].tdata), + .m_axis_card_0_tkeep(axis_card_send[0].tkeep), + .m_axis_card_0_tlast(axis_card_send[0].tlast), + .m_axis_card_0_tready(axis_card_send[0].tready), + .m_axis_card_0_tvalid(axis_card_send[0].tvalid), .m_axis_card_0_tdest(), - .m_axis_card_1_tdata(axis_card_1_src_s.tdata), - .m_axis_card_1_tkeep(axis_card_1_src_s.tkeep), - .m_axis_card_1_tlast(axis_card_1_src_s.tlast), - .m_axis_card_1_tready(axis_card_1_src_s.tready), - .m_axis_card_1_tvalid(axis_card_1_src_s.tvalid), + .m_axis_card_1_tdata(axis_card_send[1].tdata), + .m_axis_card_1_tkeep(axis_card_send[1].tkeep), + .m_axis_card_1_tlast(axis_card_send[1].tlast), + .m_axis_card_1_tready(axis_card_send[1].tready), + .m_axis_card_1_tvalid(axis_card_send[1].tvalid), .m_axis_card_1_tdest(), - .m_axis_card_2_tdata(axis_card_2_src_s.tdata), - .m_axis_card_2_tkeep(axis_card_2_src_s.tkeep), - .m_axis_card_2_tlast(axis_card_2_src_s.tlast), - .m_axis_card_2_tready(axis_card_2_src_s.tready), - .m_axis_card_2_tvalid(axis_card_2_src_s.tvalid), + .m_axis_card_2_tdata(axis_card_send[2].tdata), + .m_axis_card_2_tkeep(axis_card_send[2].tkeep), + .m_axis_card_2_tlast(axis_card_send[2].tlast), + .m_axis_card_2_tready(axis_card_send[2].tready), + .m_axis_card_2_tvalid(axis_card_send[2].tvalid), .m_axis_card_2_tdest(), - .s_axis_host_0_tdata(axis_host_0_sink_s.tdata), - .s_axis_host_0_tkeep(axis_host_0_sink_s.tkeep), - .s_axis_host_0_tlast(axis_host_0_sink_s.tlast), - .s_axis_host_0_tready(axis_host_0_sink_s.tready), - .s_axis_host_0_tvalid(axis_host_0_sink_s.tvalid), - - .s_axis_host_1_tdata(axis_host_1_sink_s.tdata), - .s_axis_host_1_tkeep(axis_host_1_sink_s.tkeep), - .s_axis_host_1_tlast(axis_host_1_sink_s.tlast), - .s_axis_host_1_tready(axis_host_1_sink_s.tready), - .s_axis_host_1_tvalid(axis_host_1_sink_s.tvalid), - - .s_axis_host_2_tdata(axis_host_2_sink_s.tdata), - .s_axis_host_2_tkeep(axis_host_2_sink_s.tkeep), - .s_axis_host_2_tlast(axis_host_2_sink_s.tlast), - .s_axis_host_2_tready(axis_host_2_sink_s.tready), - .s_axis_host_2_tvalid(axis_host_2_sink_s.tvalid), - - .s_axis_card_0_tdata(axis_card_0_sink_s.tdata), - .s_axis_card_0_tkeep(axis_card_0_sink_s.tkeep), - .s_axis_card_0_tlast(axis_card_0_sink_s.tlast), - .s_axis_card_0_tready(axis_card_0_sink_s.tready), - .s_axis_card_0_tvalid(axis_card_0_sink_s.tvalid), - - .s_axis_card_1_tdata(axis_card_1_sink_s.tdata), - .s_axis_card_1_tkeep(axis_card_1_sink_s.tkeep), - .s_axis_card_1_tlast(axis_card_1_sink_s.tlast), - .s_axis_card_1_tready(axis_card_1_sink_s.tready), - .s_axis_card_1_tvalid(axis_card_1_sink_s.tvalid), - - .s_axis_card_2_tdata(axis_card_2_sink_s.tdata), - .s_axis_card_2_tkeep(axis_card_2_sink_s.tkeep), - .s_axis_card_2_tlast(axis_card_2_sink_s.tlast), - .s_axis_card_2_tready(axis_card_2_sink_s.tready), - .s_axis_card_2_tvalid(axis_card_2_sink_s.tvalid), - - .s_axis_eth_rx_data_tdata(axis_rdma_0_sink.tdata), - .s_axis_eth_rx_data_tdest(axis_rdma_0_sink.tid), - .s_axis_eth_rx_data_tkeep(axis_rdma_0_sink.tkeep), - .s_axis_eth_rx_data_tlast(axis_rdma_0_sink.tlast), - .s_axis_eth_rx_data_tready(axis_rdma_0_sink.tready), - .s_axis_eth_rx_data_tvalid(axis_rdma_0_sink.tvalid), - - .m_axis_eth_tx_data_tdata(axis_rdma_0_src.tdata), - .m_axis_eth_tx_data_tdest(axis_rdma_0_src.tid), // not driven, default 0 - .m_axis_eth_tx_data_tkeep(axis_rdma_0_src.tkeep), - .m_axis_eth_tx_data_tlast(axis_rdma_0_src.tlast), - .m_axis_eth_tx_data_tready(axis_rdma_0_src.tready), - .m_axis_eth_tx_data_tvalid(axis_rdma_0_src.tvalid), - - .s_axis_rdma_wr_req_tdata(rdma_0_wr_req.data), - .s_axis_rdma_wr_req_tvalid(rdma_0_wr_req.valid), - .s_axis_rdma_wr_req_tready(rdma_0_wr_req.ready), - - .s_axis_rdma_rd_req_tdata(rdma_0_rd_req.data), - .s_axis_rdma_rd_req_tvalid(rdma_0_rd_req.valid), - .s_axis_rdma_rd_req_tready(rdma_0_rd_req.ready), - - .m_axis_rdma_sq_tdata(rdma_0_sq.data), - .m_axis_rdma_sq_tvalid(rdma_0_sq.valid), - .m_axis_rdma_sq_tready(rdma_0_sq.ready) + .s_axis_host_0_tdata(axis_host_recv[0].tdata), + .s_axis_host_0_tkeep(axis_host_recv[0].tkeep), + .s_axis_host_0_tlast(axis_host_recv[0].tlast), + .s_axis_host_0_tready(axis_host_recv[0].tready), + .s_axis_host_0_tvalid(axis_host_recv[0].tvalid), + + .s_axis_host_1_tdata(axis_host_recv[1].tdata), + .s_axis_host_1_tkeep(axis_host_recv[1].tkeep), + .s_axis_host_1_tlast(axis_host_recv[1].tlast), + .s_axis_host_1_tready(axis_host_recv[1].tready), + .s_axis_host_1_tvalid(axis_host_recv[1].tvalid), + + .s_axis_host_2_tdata(axis_host_recv[2].tdata), + .s_axis_host_2_tkeep(axis_host_recv[2].tkeep), + .s_axis_host_2_tlast(axis_host_recv[2].tlast), + .s_axis_host_2_tready(axis_host_recv[2].tready), + .s_axis_host_2_tvalid(axis_host_recv[2].tvalid), + + .s_axis_card_0_tdata(axis_card_recv[0].tdata), + .s_axis_card_0_tkeep(axis_card_recv[0].tkeep), + .s_axis_card_0_tlast(axis_card_recv[0].tlast), + .s_axis_card_0_tready(axis_card_recv[0].tready), + .s_axis_card_0_tvalid(axis_card_recv[0].tvalid), + + .s_axis_card_1_tdata(axis_card_recv[1].tdata), + .s_axis_card_1_tkeep(axis_card_recv[1].tkeep), + .s_axis_card_1_tlast(axis_card_recv[1].tlast), + .s_axis_card_1_tready(axis_card_recv[1].tready), + .s_axis_card_1_tvalid(axis_card_recv[1].tvalid), + + .s_axis_card_2_tdata(axis_card_recv[2].tdata), + .s_axis_card_2_tkeep(axis_card_recv[2].tkeep), + .s_axis_card_2_tlast(axis_card_recv[2].tlast), + .s_axis_card_2_tready(axis_card_recv[2].tready), + .s_axis_card_2_tvalid(axis_card_recv[2].tvalid), + + .cyt_rreq_recv_0_tdata(axis_rreq_recv[0].tdata), + .cyt_rreq_recv_0_tkeep(axis_rreq_recv[0].tkeep), + .cyt_rreq_recv_0_tlast(axis_rreq_recv[0].tlast), + .cyt_rreq_recv_0_tready(axis_rreq_recv[0].tready), + .cyt_rreq_recv_0_tvalid(axis_rreq_recv[0].tvalid), + + .cyt_rreq_recv_1_tdata(axis_rreq_recv[1].tdata), + .cyt_rreq_recv_1_tkeep(axis_rreq_recv[1].tkeep), + .cyt_rreq_recv_1_tlast(axis_rreq_recv[1].tlast), + .cyt_rreq_recv_1_tready(axis_rreq_recv[1].tready), + .cyt_rreq_recv_1_tvalid(axis_rreq_recv[1].tvalid), + + .cyt_rreq_send_0_tdata(axis_rreq_send[0].tdata), + .cyt_rreq_send_0_tdest(), + .cyt_rreq_send_0_tkeep(axis_rreq_send[0].tkeep), + .cyt_rreq_send_0_tlast(axis_rreq_send[0].tlast), + .cyt_rreq_send_0_tready(axis_rreq_send[0].tready), + .cyt_rreq_send_0_tstrb(), + .cyt_rreq_send_0_tvalid(axis_rreq_send[0].tvalid), + + .cyt_rreq_send_1_tdata(axis_rreq_send[1].tdata), + .cyt_rreq_send_1_tdest(), + .cyt_rreq_send_1_tkeep(axis_rreq_send[1].tkeep), + .cyt_rreq_send_1_tlast(axis_rreq_send[1].tlast), + .cyt_rreq_send_1_tready(axis_rreq_send[1].tready), + .cyt_rreq_send_1_tstrb(), + .cyt_rreq_send_1_tvalid(axis_rreq_send[1].tvalid), + + .cyt_rrsp_recv_0_tdata(axis_rrsp_recv[0].tdata), + .cyt_rrsp_recv_0_tkeep(axis_rrsp_recv[0].tkeep), + .cyt_rrsp_recv_0_tlast(axis_rrsp_recv[0].tlast), + .cyt_rrsp_recv_0_tready(axis_rrsp_recv[0].tready), + .cyt_rrsp_recv_0_tvalid(axis_rrsp_recv[0].tvalid), + + .cyt_rrsp_recv_1_tdata(axis_rrsp_recv[1].tdata), + .cyt_rrsp_recv_1_tkeep(axis_rrsp_recv[1].tkeep), + .cyt_rrsp_recv_1_tlast(axis_rrsp_recv[1].tlast), + .cyt_rrsp_recv_1_tready(axis_rrsp_recv[1].tready), + .cyt_rrsp_recv_1_tvalid(axis_rrsp_recv[1].tvalid), + + .cyt_rrsp_send_0_tdata(axis_rrsp_send[0].tdata), + .cyt_rrsp_send_0_tkeep(axis_rrsp_send[0].tkeep), + .cyt_rrsp_send_0_tlast(axis_rrsp_send[0].tlast), + .cyt_rrsp_send_0_tready(axis_rrsp_send[0].tready), + .cyt_rrsp_send_0_tvalid(axis_rrsp_send[0].tvalid), + + .cyt_rrsp_send_1_tdata(axis_rrsp_send[1].tdata), + .cyt_rrsp_send_1_tkeep(axis_rrsp_send[1].tkeep), + .cyt_rrsp_send_1_tlast(axis_rrsp_send[1].tlast), + .cyt_rrsp_send_1_tready(axis_rrsp_send[1].tready), + .cyt_rrsp_send_1_tvalid(axis_rrsp_send[1].tvalid) ); -assign axis_host_0_src_s.tid = 0; -assign axis_host_1_src_s.tid = 0; -assign axis_host_2_src_s.tid = 0; - -assign axis_card_0_src_s.tid = 0; -assign axis_card_1_src_s.tid = 0; -assign axis_card_2_src_s.tid = 0; - -assign rdma_0_ack.ready = 1'b1; - +// ila_top ila_top( +// .clk(aclk), +// .probe0(sq_wr.valid), //1 +// .probe1(sq_wr.ready), //1 +// .probe2(sq_wr.data), //128 +// .probe3(sq_rd.valid), //1 +// .probe4(sq_rd.ready), //1 +// .probe5(sq_rd.data), //128 +// .probe6(rq_wr.valid), //1 +// .probe7(rq_wr.ready), //1 +// .probe8(rq_wr.data), //128 +// .probe9(rq_rd.valid), //1 +// .probe10(rq_rd.ready), //1 +// .probe11(rq_rd.data), //128 +// .probe12(axis_rreq_send[0].tvalid), +// .probe13(axis_rreq_send[0].tready), +// .probe14(axis_rreq_send[1].tvalid), +// .probe15(axis_rreq_send[1].tready), +// .probe16(axis_rrsp_recv[0].tvalid), +// .probe17(axis_rrsp_recv[0].tready), +// .probe18(axis_rrsp_recv[1].tvalid), +// .probe19(axis_rrsp_recv[1].tready), +// .probe20(axis_rreq_send[0].tlast), +// .probe21(axis_rreq_send[1].tlast), +// .probe22(axis_rrsp_recv[0].tlast), +// .probe23(axis_rrsp_recv[1].tlast), +// .probe24(cq_rd.data), //32 +// .probe25(cq_rd.valid), +// .probe26(cq_rd.ready), +// .probe27(cq_wr.data), //32 +// .probe28(cq_wr.valid), +// .probe29(cq_wr.ready), +// .probe30(axis_host_send[0].tvalid), +// .probe31(axis_host_send[0].tready), +// .probe32(axis_host_send[1].tvalid), +// .probe33(axis_host_send[1].tready), +// .probe34(axis_host_send[2].tvalid), +// .probe35(axis_host_send[2].tready), +// .probe36(axis_card_send[0].tvalid), +// .probe37(axis_card_send[0].tready), +// .probe38(axis_card_send[1].tvalid), +// .probe39(axis_card_send[1].tready), +// .probe40(axis_card_send[2].tvalid), +// .probe41(axis_card_send[2].tready), +// .probe42(axis_host_recv[0].tvalid), +// .probe43(axis_host_recv[0].tready), +// .probe44(axis_host_recv[1].tvalid), +// .probe45(axis_host_recv[1].tready), +// .probe46(axis_host_recv[2].tvalid), +// .probe47(axis_host_recv[2].tready), +// .probe48(axis_card_recv[0].tvalid), +// .probe49(axis_card_recv[0].tready), +// .probe50(axis_card_recv[1].tvalid), +// .probe51(axis_card_recv[1].tready), +// .probe52(axis_card_recv[2].tvalid), +// .probe53(axis_card_recv[2].tready) +// ); + + + + +assign axis_host_send[0].tid = 0; +assign axis_host_send[1].tid = 0; +assign axis_host_send[2].tid = 0; + +assign axis_card_send[0].tid = 0; +assign axis_card_send[1].tid = 0; +assign axis_card_send[2].tid = 0; + +assign axis_rreq_send[0].tid = 0; +assign axis_rreq_send[1].tid = 0; + +assign axis_rrsp_send[0].tid = 0; +assign axis_rrsp_send[1].tid = 0; endmodule \ No newline at end of file diff --git a/test/refdesigns/tcl/coyote.tcl b/test/refdesigns/tcl/coyote.tcl index 5225362d..832b8594 100644 --- a/test/refdesigns/tcl/coyote.tcl +++ b/test/refdesigns/tcl/coyote.tcl @@ -1,3 +1,4 @@ + # /******************************************************************************* # Copyright (C) 2023 Advanced Micro Devices, Inc # @@ -17,234 +18,884 @@ set nettype [lindex $::argv 0] set build_dir [lindex $::argv 1] -open_project "$build_dir/lynx/lynx.xpr" +open_project "$build_dir/test_config_0/user_c0_0/test.xpr" update_compile_order -fileset sources_1 create_bd_design "accl_bd" update_compile_order -fileset sources_1 update_ip_catalog -create_bd_cell -type ip -vlnv Xilinx:ACCL:ccl_offload:1.0 ccl_offload_0 -create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_dma_adapter:1.0 cyt_dma_adapter_0 - -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_mm2s_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma0_mm2s_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_mm2s_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma1_mm2s_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma1_s2mm_cmd] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm_cmd] [get_bd_intf_pins cyt_dma_adapter_0/dma0_s2mm_cmd] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma0_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_s2mm_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma1_s2mm_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_s2mm_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma0_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s_sts] -connect_bd_intf_net [get_bd_intf_pins cyt_dma_adapter_0/dma1_mm2s_sts] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s_sts] -make_bd_pins_external [get_bd_pins ccl_offload_0/ap_clk] -make_bd_pins_external [get_bd_pins ccl_offload_0/ap_rst_n] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_dma_adapter_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_dma_adapter_0/ap_rst_n] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_wr_sts] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_rd_sts] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_wr_cmd] -make_bd_intf_pins_external [get_bd_intf_pins cyt_dma_adapter_0/cyt_byp_rd_cmd] - -create_bd_cell -type ip -vlnv xilinx.com:ACCL:reduce_ops:1.0 reduce_ops_0 -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_arith_op0] [get_bd_intf_pins reduce_ops_0/in0] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_arith_op1] [get_bd_intf_pins reduce_ops_0/in1] -connect_bd_intf_net [get_bd_intf_pins reduce_ops_0/out_r] [get_bd_intf_pins ccl_offload_0/s_axis_arith_res] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins reduce_ops_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins reduce_ops_0/ap_rst_n] - -create_bd_cell -type ip -vlnv xilinx.com:ACCL:hostctrl:1.0 hostctrl_0 -connect_bd_intf_net [get_bd_intf_pins hostctrl_0/cmd] [get_bd_intf_pins ccl_offload_0/s_axis_call_req] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_call_ack] [get_bd_intf_pins hostctrl_0/sts] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins hostctrl_0/ap_clk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins hostctrl_0/ap_rst_n] - -# direct loopback for compression and kernel streams -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_krnl] [get_bd_intf_pins ccl_offload_0/s_axis_krnl] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression0] [get_bd_intf_pins ccl_offload_0/s_axis_compression0] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression1] [get_bd_intf_pins ccl_offload_0/s_axis_compression1] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_compression2] [get_bd_intf_pins ccl_offload_0/s_axis_compression2] - -# create axis switch -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_0 -set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_0}] [get_bd_cells axis_switch_2_to_1_inst_0] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_1 -set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_1}] [get_bd_cells axis_switch_2_to_1_inst_1] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_0 -set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_0}] [get_bd_cells axis_switch_1_to_2_inst_0] - -create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_1 -set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_1}] [get_bd_cells axis_switch_1_to_2_inst_1] - -switch $nettype { - "TCP" { - # externalize TCP streams - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_tx_status] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_read_pkg] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_meta] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] - make_bd_intf_pins_external [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_meta] - } - "RDMA" { - # externalize RDMA streams - # data streams - set m_axis_eth_tx_data [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_eth_tx_data ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_eth_tx_data - set s_axis_eth_rx_data [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_eth_rx_data ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {1} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {8} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_eth_rx_data - - # RDMA sq and rq - set m_axis_rdma_sq [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_rdma_sq ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_rdma_sq - - # RDMA extra pair of host/card streams - set m_axis_host_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_2 - set m_axis_card_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_2 - set s_axis_host_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_2 - set s_axis_card_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_2 ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_2 - - # RDMA wr_req and rd_req - set s_axis_rdma_wr_req [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_rdma_wr_req ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {0} CONFIG.HAS_TLAST {0} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {12} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_rdma_wr_req - set s_axis_rdma_rd_req [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_rdma_rd_req ] - set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {0} CONFIG.HAS_TLAST {0} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {12} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_rdma_rd_req - - # connections for rdma_arbiter and the axi 1-to-2 switch - create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_arbiter:1.0 cyt_rdma_arbiter_0 - create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_2 - set_property -dict [list CONFIG.NUM_SI {1} CONFIG.NUM_MI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.TDEST_WIDTH {8} CONFIG.DECODER_REG {1} CONFIG.Component_Name {axis_switch_1_to_2_inst_2}] [get_bd_cells axis_switch_1_to_2_inst_2] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_rdma_arbiter_0/ap_clk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_rdma_arbiter_0/ap_rst_n] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_2/aclk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_2/aresetn] - - connect_bd_intf_net [get_bd_intf_ports s_axis_eth_rx_data] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis] - connect_bd_intf_net [get_bd_intf_ports s_axis_rdma_wr_req] [get_bd_intf_pins cyt_rdma_arbiter_0/s_meta] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_0] [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_0] [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_1] [get_bd_intf_pins cyt_dma_adapter_0/rdma_wr_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_1] [get_bd_intf_pins axis_switch_1_to_2_inst_2/S00_AXIS] - connect_bd_intf_net [get_bd_intf_pins axis_switch_1_to_2_inst_2/M00_AXIS] [get_bd_intf_ports m_axis_card_2] - connect_bd_intf_net [get_bd_intf_pins axis_switch_1_to_2_inst_2/M01_AXIS] [get_bd_intf_ports m_axis_host_2] - - # connections for rdma_mux and the axi 2-to-1 switch - create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_mux:1.0 cyt_rdma_mux_0 - create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_2 - set_property -dict [list CONFIG.NUM_SI {2} CONFIG.TDATA_NUM_BYTES {64} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.ARB_ON_TLAST {1} CONFIG.NUM_MI {1} CONFIG.DECODER_REG {0} CONFIG.ARB_ON_MAX_XFERS {0} CONFIG.Component_Name {axis_switch_2_to_1_inst_2}] [get_bd_cells axis_switch_2_to_1_inst_2] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins cyt_rdma_mux_0/ap_clk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins cyt_rdma_mux_0/ap_rst_n] - connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_2/aclk] - connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_2/aresetn] - create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_2 - set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_2] - set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_2] - connect_bd_net [get_bd_pins xlconstant_2/dout] [get_bd_pins axis_switch_2_to_1_inst_2/s_req_suppress] - - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_meta_0] [get_bd_intf_pins ccl_offload_0/m_axis_rdma_sq] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_axis_0] [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_meta_1] [get_bd_intf_ports s_axis_rdma_rd_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/s_axis_1] [get_bd_intf_pins axis_switch_2_to_1_inst_2/M00_AXIS] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_meta_0] [get_bd_intf_ports m_axis_rdma_sq] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_meta_1] [get_bd_intf_pins cyt_dma_adapter_0/rdma_rd_req] - connect_bd_intf_net [get_bd_intf_pins cyt_rdma_mux_0/m_axis] [get_bd_intf_ports m_axis_eth_tx_data] - connect_bd_intf_net [get_bd_intf_ports s_axis_host_2] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S00_AXIS] - connect_bd_intf_net [get_bd_intf_ports s_axis_card_2] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S01_AXIS] - - - } - default { - puts "Unrecognized network backend" - exit - } -} - - -# externalize DMA data streams - -set m_axis_host_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_0 -set m_axis_host_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_host_1 -set m_axis_card_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_0 -set m_axis_card_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} ] $m_axis_card_1 - -set s_axis_host_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_0 -set s_axis_host_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_host_1 -set s_axis_card_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_0 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_0 -set s_axis_card_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_1 ] -set_property -dict [ list CONFIG.FREQ_HZ {250000000} CONFIG.HAS_TKEEP {1} CONFIG.HAS_TLAST {1} CONFIG.HAS_TREADY {1} CONFIG.HAS_TSTRB {0} CONFIG.LAYERED_METADATA {undef} CONFIG.TDATA_NUM_BYTES {64} CONFIG.TDEST_WIDTH {0} CONFIG.TID_WIDTH {0} CONFIG.TUSER_WIDTH {0} ] $s_axis_card_1 - - -# s_axis_host_0 and s_axis_card_0 multiplexed to single s_axis_dma0_mm2s stream, round-robin by tlast -connect_bd_intf_net [get_bd_intf_ports s_axis_host_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports s_axis_card_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S01_AXIS] -connect_bd_intf_net [get_bd_intf_pins axis_switch_2_to_1_inst_0/M00_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_0/aresetn] - -create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_0 -set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_0] -set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_0] -connect_bd_net [get_bd_pins xlconstant_0/dout] [get_bd_pins axis_switch_2_to_1_inst_0/s_req_suppress] - -# s_axis_host_1 and s_axis_card_1 multiplexed to single s_axis_dma1_mm2s stream, round-robin by tlast -connect_bd_intf_net [get_bd_intf_ports s_axis_host_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports s_axis_card_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S01_AXIS] -connect_bd_intf_net [get_bd_intf_pins axis_switch_2_to_1_inst_1/M00_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_2_to_1_inst_1/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_2_to_1_inst_1/aresetn] - -create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_1 -set_property -dict [list CONFIG.CONST_WIDTH {2}] [get_bd_cells xlconstant_1] -set_property -dict [list CONFIG.CONST_VAL {0}] [get_bd_cells xlconstant_1] -connect_bd_net [get_bd_pins xlconstant_1/dout] [get_bd_pins axis_switch_2_to_1_inst_1/s_req_suppress] - -# m_axis_dma0_s2mm multiplex to m_axis_host_0 and m_axis_card_0 according to the strm flag encoded in m_axis_dma0_s2mm tdest -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm] [get_bd_intf_pins axis_switch_1_to_2_inst_0/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_card_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_host_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M01_AXIS] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_0/aresetn] - -# m_axis_dma1_s2mm multiplex to m_axis_host_1 and m_axis_card_1 according to the strm flag encoded in m_axis_dma1_s2mm tdest -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm] [get_bd_intf_pins axis_switch_1_to_2_inst_1/S00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_card_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M00_AXIS] -connect_bd_intf_net [get_bd_intf_ports m_axis_host_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M01_AXIS] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins axis_switch_1_to_2_inst_1/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins axis_switch_1_to_2_inst_1/aresetn] - - - -# connect up AXI lite -create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 -set_property -dict [list CONFIG.NUM_MI {2} CONFIG.NUM_SI {1}] [get_bd_cells smartconnect_0] -connect_bd_net [get_bd_ports ap_clk_0] [get_bd_pins smartconnect_0/aclk] -connect_bd_net [get_bd_ports ap_rst_n_0] [get_bd_pins smartconnect_0/aresetn] -connect_bd_intf_net [get_bd_intf_pins hostctrl_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M00_AXI] -connect_bd_intf_net [get_bd_intf_pins ccl_offload_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M01_AXI] -make_bd_intf_pins_external [get_bd_intf_pins smartconnect_0/S00_AXI] -set_property -dict [list CONFIG.ADDR_WIDTH {16}] [get_bd_intf_ports S00_AXI_0] - -# Create address segments -assign_bd_address -offset 0x00000000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs ccl_offload_0/s_axi_control/reg0] -force -assign_bd_address -offset 0x00002000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs hostctrl_0/s_axi_control/Reg] -force - -set_property CONFIG.PROTOCOL AXI4LITE [get_bd_intf_ports /S00_AXI_0] -set_property -dict [list CONFIG.HAS_BURST {0} CONFIG.HAS_CACHE {0} CONFIG.HAS_LOCK {0} CONFIG.HAS_QOS {0} CONFIG.HAS_REGION {0}] [get_bd_intf_ports S00_AXI_0] + + + # Create interface ports + set S00_AXI_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:aximm_rtl:1.0 S00_AXI_0 ] + set_property -dict [ list \ + CONFIG.ADDR_WIDTH {14} \ + CONFIG.ARUSER_WIDTH {0} \ + CONFIG.AWUSER_WIDTH {0} \ + CONFIG.BUSER_WIDTH {0} \ + CONFIG.DATA_WIDTH {32} \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_BRESP {1} \ + CONFIG.HAS_BURST {0} \ + CONFIG.HAS_CACHE {0} \ + CONFIG.HAS_LOCK {0} \ + CONFIG.HAS_PROT {1} \ + CONFIG.HAS_QOS {0} \ + CONFIG.HAS_REGION {0} \ + CONFIG.HAS_RRESP {1} \ + CONFIG.HAS_WSTRB {1} \ + CONFIG.ID_WIDTH {0} \ + CONFIG.MAX_BURST_LENGTH {1} \ + CONFIG.NUM_READ_OUTSTANDING {1} \ + CONFIG.NUM_READ_THREADS {1} \ + CONFIG.NUM_WRITE_OUTSTANDING {1} \ + CONFIG.NUM_WRITE_THREADS {1} \ + CONFIG.PROTOCOL {AXI4LITE} \ + CONFIG.READ_WRITE_MODE {READ_WRITE} \ + CONFIG.RUSER_BITS_PER_BYTE {0} \ + CONFIG.RUSER_WIDTH {0} \ + CONFIG.SUPPORTS_NARROW_BURST {0} \ + CONFIG.WUSER_BITS_PER_BYTE {0} \ + CONFIG.WUSER_WIDTH {0} \ + ] $S00_AXI_0 + + set cyt_cq_rd_sts_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_rd_sts_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {4} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_cq_rd_sts_0 + + set cyt_cq_wr_sts_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_cq_wr_sts_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {4} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_cq_wr_sts_0 + + set cyt_rq_rd [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rq_rd ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {16} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rq_rd + + set cyt_rq_wr [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rq_wr ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {0} \ + CONFIG.HAS_TLAST {0} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {16} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rq_wr + + set cyt_rreq_recv_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {0} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rreq_recv_0 + + set cyt_rreq_recv_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_recv_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rreq_recv_1 + + set cyt_rreq_send_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rreq_send_0 + + set cyt_rreq_send_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rreq_send_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rreq_send_1 + + set cyt_rrsp_recv_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rrsp_recv_0 + + set cyt_rrsp_recv_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_recv_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $cyt_rrsp_recv_1 + + set cyt_rrsp_send_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_send_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rrsp_send_0 + + set cyt_rrsp_send_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_rrsp_send_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_rrsp_send_1 + + set cyt_sq_rd_cmd [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_rd_cmd ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_sq_rd_cmd + + set cyt_sq_wr_cmd [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 cyt_sq_wr_cmd ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $cyt_sq_wr_cmd + + set m_axis_card_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_card_0 + + set m_axis_card_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_card_1 + + set m_axis_card_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_card_2 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_card_2 + + set m_axis_host_0 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_0 + + set m_axis_host_1 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_1 + + set m_axis_host_2 [ create_bd_intf_port -mode Master -vlnv xilinx.com:interface:axis_rtl:1.0 m_axis_host_2 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + ] $m_axis_host_2 + + set s_axis_card_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_card_0 + + set s_axis_card_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_card_1 + + set s_axis_card_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_card_2 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_card_2 + + set s_axis_host_0 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_0 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_0 + + set s_axis_host_1 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_1 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_1 + + set s_axis_host_2 [ create_bd_intf_port -mode Slave -vlnv xilinx.com:interface:axis_rtl:1.0 s_axis_host_2 ] + set_property -dict [ list \ + CONFIG.FREQ_HZ {250000000} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.HAS_TREADY {1} \ + CONFIG.HAS_TSTRB {0} \ + CONFIG.LAYERED_METADATA {undef} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {0} \ + CONFIG.TID_WIDTH {0} \ + CONFIG.TUSER_WIDTH {0} \ + ] $s_axis_host_2 + + + # Create ports + set ap_clk_0 [ create_bd_port -dir I -type clk -freq_hz 250000000 ap_clk_0 ] + set_property -dict [ list \ + CONFIG.ASSOCIATED_BUSIF {cyt_cq_wr_sts_0:cyt_cq_rd_sts_0:cyt_sq_wr_cmd:cyt_sq_rd_cmd:m_axis_host_2:m_axis_card_2:s_axis_host_2:s_axis_card_2:cyt_rq_wr:m_axis_host_0:m_axis_host_1:m_axis_card_0:m_axis_card_1:s_axis_host_0:s_axis_host_1:s_axis_card_0:s_axis_card_1:S00_AXI_0:cyt_rreq_send_0:cyt_rreq_send_1:cyt_rrsp_recv_0:cyt_rrsp_recv_1:cyt_rrsp_send_0:cyt_rrsp_send_1:cyt_rq_rd:cyt_rreq_recv_0:cyt_rreq_recv_1} \ + ] $ap_clk_0 + set ap_rst_n_0 [ create_bd_port -dir I -type rst ap_rst_n_0 ] + + # Create instance: axis_data_fifo_0, and set properties + set axis_data_fifo_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_0 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_0 + + # Create instance: axis_data_fifo_1, and set properties + set axis_data_fifo_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_1 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_1 + + # Create instance: axis_data_fifo_2, and set properties + set axis_data_fifo_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_2 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_2 + + # Create instance: axis_data_fifo_3, and set properties + set axis_data_fifo_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_3 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_3 + + # Create instance: axis_data_fifo_4, and set properties + set axis_data_fifo_4 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_4 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_4 + + # Create instance: axis_data_fifo_5, and set properties + set axis_data_fifo_5 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_5 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_5 + + # Create instance: axis_data_fifo_6, and set properties + set axis_data_fifo_6 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_6 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_6 + + # Create instance: axis_data_fifo_7, and set properties + set axis_data_fifo_7 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_7 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_7 + + # Create instance: axis_data_fifo_8, and set properties + set axis_data_fifo_8 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_8 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_8 + + # Create instance: axis_data_fifo_9, and set properties + set axis_data_fifo_9 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_data_fifo:2.0 axis_data_fifo_9 ] + set_property -dict [ list \ + CONFIG.FIFO_DEPTH {16} \ + ] $axis_data_fifo_9 + + # Create instance: axis_register_slice_0, and set properties + set axis_register_slice_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_0 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_0 + + # Create instance: axis_register_slice_1, and set properties + set axis_register_slice_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_1 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_1 + + # Create instance: axis_register_slice_2, and set properties + set axis_register_slice_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_2 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_2 + + # Create instance: axis_register_slice_3, and set properties + set axis_register_slice_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_3 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_3 + + # Create instance: axis_register_slice_4, and set properties + set axis_register_slice_4 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_4 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_4 + + # Create instance: axis_register_slice_5, and set properties + set axis_register_slice_5 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_5 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_5 + + # Create instance: axis_register_slice_6, and set properties + set axis_register_slice_6 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_6 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_6 + + # Create instance: axis_register_slice_7, and set properties + set axis_register_slice_7 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_7 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_7 + + # Create instance: axis_register_slice_8, and set properties + set axis_register_slice_8 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_8 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_8 + + # Create instance: axis_register_slice_9, and set properties + set axis_register_slice_9 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_9 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_9 + + # Create instance: axis_register_slice_10, and set properties + set axis_register_slice_10 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_10 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_10 + + # Create instance: axis_register_slice_11, and set properties + set axis_register_slice_11 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_11 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_11 + + # Create instance: axis_register_slice_12, and set properties + set axis_register_slice_12 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_12 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_12 + + # Create instance: axis_register_slice_13, and set properties + set axis_register_slice_13 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_13 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_13 + + # Create instance: axis_register_slice_14, and set properties + set axis_register_slice_14 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_register_slice:1.1 axis_register_slice_14 ] + set_property -dict [ list \ + CONFIG.NUM_SLR_CROSSINGS {0} \ + CONFIG.REG_CONFIG {16} \ + ] $axis_register_slice_14 + + # Create instance: axis_switch_1_to_2_inst_0, and set properties + set axis_switch_1_to_2_inst_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_0 ] + set_property -dict [ list \ + CONFIG.DECODER_REG {1} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {8} \ + ] $axis_switch_1_to_2_inst_0 + + # Create instance: axis_switch_1_to_2_inst_1, and set properties + set axis_switch_1_to_2_inst_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_1 ] + set_property -dict [ list \ + CONFIG.DECODER_REG {1} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {8} \ + ] $axis_switch_1_to_2_inst_1 + + # Create instance: axis_switch_1_to_2_inst_2, and set properties + set axis_switch_1_to_2_inst_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_2 ] + set_property -dict [ list \ + CONFIG.DECODER_REG {1} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {8} \ + ] $axis_switch_1_to_2_inst_2 + + # Create instance: axis_switch_1_to_2_inst_3, and set properties + set axis_switch_1_to_2_inst_3 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_1_to_2_inst_3 ] + set_property -dict [ list \ + CONFIG.DECODER_REG {1} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + CONFIG.TDATA_NUM_BYTES {64} \ + CONFIG.TDEST_WIDTH {8} \ + ] $axis_switch_1_to_2_inst_3 + + # Create instance: axis_switch_2_to_1_inst_0, and set properties + set axis_switch_2_to_1_inst_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_0 ] + set_property -dict [ list \ + CONFIG.ARB_ON_MAX_XFERS {0} \ + CONFIG.ARB_ON_TLAST {1} \ + CONFIG.DECODER_REG {0} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + CONFIG.TDATA_NUM_BYTES {64} \ + ] $axis_switch_2_to_1_inst_0 + + # Create instance: axis_switch_2_to_1_inst_1, and set properties + set axis_switch_2_to_1_inst_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_1 ] + set_property -dict [ list \ + CONFIG.ARB_ON_MAX_XFERS {0} \ + CONFIG.ARB_ON_TLAST {1} \ + CONFIG.DECODER_REG {0} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + CONFIG.TDATA_NUM_BYTES {64} \ + ] $axis_switch_2_to_1_inst_1 + + # Create instance: axis_switch_2_to_1_inst_2, and set properties + set axis_switch_2_to_1_inst_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:axis_switch:1.1 axis_switch_2_to_1_inst_2 ] + set_property -dict [ list \ + CONFIG.ARB_ON_MAX_XFERS {0} \ + CONFIG.ARB_ON_TLAST {1} \ + CONFIG.DECODER_REG {0} \ + CONFIG.HAS_TKEEP {1} \ + CONFIG.HAS_TLAST {1} \ + CONFIG.NUM_MI {1} \ + CONFIG.NUM_SI {2} \ + CONFIG.TDATA_NUM_BYTES {64} \ + ] $axis_switch_2_to_1_inst_2 + + # Create instance: ccl_offload_0, and set properties + set ccl_offload_0 [ create_bd_cell -type ip -vlnv Xilinx:ACCL:ccl_offload:1.0 ccl_offload_0 ] + + # Create instance: cclo_sq_adapter_0, and set properties + set cclo_sq_adapter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cclo_sq_adapter:1.0 cclo_sq_adapter_0 ] + + # Create instance: cyt_cq_dm_sts_conver_0, and set properties + set cyt_cq_dm_sts_conver_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_cq_dm_sts_converter:1.0 cyt_cq_dm_sts_conver_0 ] + + # Create instance: cyt_cq_dm_sts_conver_1, and set properties + set cyt_cq_dm_sts_conver_1 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_cq_dm_sts_converter:1.0 cyt_cq_dm_sts_conver_1 ] + + # Create instance: cyt_dma_sq_adapter_0, and set properties + set cyt_dma_sq_adapter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_dma_sq_adapter:1.0 cyt_dma_sq_adapter_0 ] + + # Create instance: cyt_rdma_arbiter_0, and set properties + set cyt_rdma_arbiter_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:cyt_rdma_arbiter:1.0 cyt_rdma_arbiter_0 ] + + # Create instance: hostctrl_0, and set properties + set hostctrl_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:hostctrl:1.0 hostctrl_0 ] + + # Create instance: reduce_ops_0, and set properties + set reduce_ops_0 [ create_bd_cell -type ip -vlnv xilinx.com:ACCL:reduce_ops:1.0 reduce_ops_0 ] + + # Create instance: rst_ap_clk_0_250M, and set properties + set rst_ap_clk_0_250M [ create_bd_cell -type ip -vlnv xilinx.com:ip:proc_sys_reset:5.0 rst_ap_clk_0_250M ] + + # Create instance: smartconnect_0, and set properties + set smartconnect_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:smartconnect:1.0 smartconnect_0 ] + set_property -dict [ list \ + CONFIG.NUM_MI {2} \ + CONFIG.NUM_SI {1} \ + ] $smartconnect_0 + + # Create instance: system_ila_0, and set properties + set system_ila_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:system_ila:1.1 system_ila_0 ] + set_property -dict [ list \ + CONFIG.C_INPUT_PIPE_STAGES {2} \ + CONFIG.C_MON_TYPE {INTERFACE} \ + CONFIG.C_NUM_MONITOR_SLOTS {16} \ + CONFIG.C_SLOT_0_APC_EN {0} \ + CONFIG.C_SLOT_0_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_0_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_0_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_0_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_0_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_0_TYPE {0} \ + CONFIG.C_SLOT_10_APC_EN {0} \ + CONFIG.C_SLOT_10_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_10_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_10_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_10_TYPE {0} \ + CONFIG.C_SLOT_11_APC_EN {0} \ + CONFIG.C_SLOT_11_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_11_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_11_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_11_TYPE {0} \ + CONFIG.C_SLOT_12_APC_EN {0} \ + CONFIG.C_SLOT_12_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_12_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_12_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_12_TYPE {0} \ + CONFIG.C_SLOT_13_APC_EN {0} \ + CONFIG.C_SLOT_13_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_13_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_13_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_13_TYPE {0} \ + CONFIG.C_SLOT_14_APC_EN {0} \ + CONFIG.C_SLOT_14_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_14_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_14_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_14_TYPE {0} \ + CONFIG.C_SLOT_15_APC_EN {0} \ + CONFIG.C_SLOT_15_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_15_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_15_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_1_APC_EN {0} \ + CONFIG.C_SLOT_1_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_1_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_1_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_1_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_1_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_1_TYPE {0} \ + CONFIG.C_SLOT_2_APC_EN {0} \ + CONFIG.C_SLOT_2_AXI_AR_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_AR_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_AW_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_AW_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_B_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_B_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_2_AXI_R_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_R_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_2_AXI_W_SEL_DATA {1} \ + CONFIG.C_SLOT_2_AXI_W_SEL_TRIG {1} \ + CONFIG.C_SLOT_2_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_2_TYPE {0} \ + CONFIG.C_SLOT_3_APC_EN {0} \ + CONFIG.C_SLOT_3_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_3_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_3_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_3_TYPE {0} \ + CONFIG.C_SLOT_4_APC_EN {0} \ + CONFIG.C_SLOT_4_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_4_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_4_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_4_TYPE {0} \ + CONFIG.C_SLOT_5_APC_EN {0} \ + CONFIG.C_SLOT_5_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_5_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_5_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_5_TYPE {0} \ + CONFIG.C_SLOT_6_APC_EN {0} \ + CONFIG.C_SLOT_6_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_6_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_6_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_6_TYPE {0} \ + CONFIG.C_SLOT_7_APC_EN {0} \ + CONFIG.C_SLOT_7_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_7_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_7_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_7_TYPE {0} \ + CONFIG.C_SLOT_8_APC_EN {0} \ + CONFIG.C_SLOT_8_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_8_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_8_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_8_TYPE {0} \ + CONFIG.C_SLOT_9_APC_EN {0} \ + CONFIG.C_SLOT_9_AXI_DATA_SEL {1} \ + CONFIG.C_SLOT_9_AXI_TRIG_SEL {1} \ + CONFIG.C_SLOT_9_INTF_TYPE {xilinx.com:interface:axis_rtl:1.0} \ + CONFIG.C_SLOT_9_TYPE {0} \ + ] $system_ila_0 + + # Create instance: xlconstant_0, and set properties + set xlconstant_0 [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_0 ] + set_property -dict [ list \ + CONFIG.CONST_VAL {0} \ + CONFIG.CONST_WIDTH {2} \ + ] $xlconstant_0 + + # Create instance: xlconstant_1, and set properties + set xlconstant_1 [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_1 ] + set_property -dict [ list \ + CONFIG.CONST_VAL {0} \ + CONFIG.CONST_WIDTH {2} \ + ] $xlconstant_1 + + # Create instance: xlconstant_2, and set properties + set xlconstant_2 [ create_bd_cell -type ip -vlnv xilinx.com:ip:xlconstant:1.1 xlconstant_2 ] + set_property -dict [ list \ + CONFIG.CONST_VAL {0} \ + CONFIG.CONST_WIDTH {2} \ + ] $xlconstant_2 + + # Create interface connections + connect_bd_intf_net -intf_net S00_AXI_0_1 [get_bd_intf_ports S00_AXI_0] [get_bd_intf_pins smartconnect_0/S00_AXI] + connect_bd_intf_net -intf_net axis_data_fifo_0_M_AXIS [get_bd_intf_ports cyt_rrsp_send_0] [get_bd_intf_pins axis_data_fifo_0/M_AXIS] + connect_bd_intf_net -intf_net axis_data_fifo_1_M_AXIS [get_bd_intf_ports cyt_rrsp_send_1] [get_bd_intf_pins axis_data_fifo_1/M_AXIS] + connect_bd_intf_net -intf_net axis_data_fifo_2_M_AXIS [get_bd_intf_pins axis_data_fifo_2/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_rq_wr_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_3_M_AXIS [get_bd_intf_pins axis_data_fifo_3/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_rq_rd_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_4_M_AXIS [get_bd_intf_pins axis_data_fifo_4/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cclo_sq_wr_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_5_M_AXIS [get_bd_intf_pins axis_data_fifo_5/M_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cclo_sq_rd_cmd] + connect_bd_intf_net -intf_net axis_data_fifo_6_M_AXIS [get_bd_intf_pins axis_data_fifo_6/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_meta] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_6_M_AXIS] [get_bd_intf_pins axis_data_fifo_6/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_0_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_6_M_AXIS] + connect_bd_intf_net -intf_net axis_data_fifo_7_M_AXIS [get_bd_intf_pins axis_data_fifo_7/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_meta] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_7_M_AXIS] [get_bd_intf_pins axis_data_fifo_7/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_1_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_7_M_AXIS] + connect_bd_intf_net -intf_net axis_data_fifo_8_M_AXIS [get_bd_intf_pins axis_data_fifo_8/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_meta] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_8_M_AXIS] [get_bd_intf_pins axis_data_fifo_8/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_6_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_8_M_AXIS] + connect_bd_intf_net -intf_net axis_data_fifo_9_M_AXIS [get_bd_intf_pins axis_data_fifo_9/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm0_meta] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_data_fifo_9_M_AXIS] [get_bd_intf_pins axis_data_fifo_9/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_7_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_data_fifo_9_M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_0_M_AXIS [get_bd_intf_pins axis_register_slice_0/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis_0] + connect_bd_intf_net -intf_net axis_register_slice_10_M_AXIS [get_bd_intf_ports cyt_sq_rd_cmd] [get_bd_intf_pins axis_register_slice_10/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_11_M_AXIS [get_bd_intf_pins axis_register_slice_11/M_AXIS] [get_bd_intf_pins axis_switch_1_to_2_inst_3/S00_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_12_M_AXIS [get_bd_intf_pins axis_register_slice_12/M_AXIS] [get_bd_intf_pins axis_switch_1_to_2_inst_0/S00_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_13_M_AXIS [get_bd_intf_pins axis_register_slice_13/M_AXIS] [get_bd_intf_pins axis_switch_1_to_2_inst_1/S00_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_14_M_AXIS [get_bd_intf_pins axis_register_slice_14/M_AXIS] [get_bd_intf_pins axis_switch_1_to_2_inst_2/S00_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_1_M_AXIS [get_bd_intf_pins axis_register_slice_1/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_axis_1] + connect_bd_intf_net -intf_net axis_register_slice_2_M_AXIS [get_bd_intf_pins axis_register_slice_2/M_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/s_meta] + connect_bd_intf_net -intf_net axis_register_slice_3_M_AXIS [get_bd_intf_pins axis_register_slice_3/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/cq_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_register_slice_3_M_AXIS] [get_bd_intf_pins axis_register_slice_3/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_8_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_register_slice_3_M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_4_M_AXIS [get_bd_intf_pins axis_register_slice_4/M_AXIS] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/cq_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets axis_register_slice_4_M_AXIS] [get_bd_intf_pins axis_register_slice_4/M_AXIS] [get_bd_intf_pins system_ila_0/SLOT_9_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets axis_register_slice_4_M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_5_M_AXIS [get_bd_intf_pins axis_data_fifo_3/S_AXIS] [get_bd_intf_pins axis_register_slice_5/M_AXIS] + connect_bd_intf_net -intf_net axis_register_slice_6_M_AXIS [get_bd_intf_pins axis_register_slice_6/M_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/s_axis_cyt] + connect_bd_intf_net -intf_net axis_register_slice_7_M_AXIS [get_bd_intf_pins axis_register_slice_7/M_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s] + connect_bd_intf_net -intf_net axis_register_slice_8_M_AXIS [get_bd_intf_pins axis_register_slice_8/M_AXIS] [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s] + connect_bd_intf_net -intf_net axis_register_slice_9_M_AXIS [get_bd_intf_ports cyt_sq_wr_cmd] [get_bd_intf_pins axis_register_slice_9/M_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_0_M00_AXIS [get_bd_intf_ports m_axis_card_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_0_M01_AXIS [get_bd_intf_ports m_axis_host_0] [get_bd_intf_pins axis_switch_1_to_2_inst_0/M01_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_1_M00_AXIS [get_bd_intf_ports m_axis_card_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_1_M01_AXIS [get_bd_intf_ports m_axis_host_1] [get_bd_intf_pins axis_switch_1_to_2_inst_1/M01_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_2_M00_AXIS [get_bd_intf_ports m_axis_card_2] [get_bd_intf_pins axis_switch_1_to_2_inst_2/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_2_M01_AXIS [get_bd_intf_ports m_axis_host_2] [get_bd_intf_pins axis_switch_1_to_2_inst_2/M01_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M00_AXIS [get_bd_intf_ports cyt_rreq_send_0] [get_bd_intf_pins axis_switch_1_to_2_inst_3/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_1_to_2_inst_3_M01_AXIS [get_bd_intf_ports cyt_rreq_send_1] [get_bd_intf_pins axis_switch_1_to_2_inst_3/M01_AXIS] + connect_bd_intf_net -intf_net axis_switch_2_to_1_inst_0_M00_AXIS [get_bd_intf_pins axis_register_slice_7/S_AXIS] [get_bd_intf_pins axis_switch_2_to_1_inst_0/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_2_to_1_inst_1_M00_AXIS [get_bd_intf_pins axis_register_slice_8/S_AXIS] [get_bd_intf_pins axis_switch_2_to_1_inst_1/M00_AXIS] + connect_bd_intf_net -intf_net axis_switch_2_to_1_inst_2_M00_AXIS [get_bd_intf_pins axis_register_slice_6/S_AXIS] [get_bd_intf_pins axis_switch_2_to_1_inst_2/M00_AXIS] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_arith_op0 [get_bd_intf_pins ccl_offload_0/m_axis_arith_op0] [get_bd_intf_pins reduce_ops_0/in0] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_arith_op1 [get_bd_intf_pins ccl_offload_0/m_axis_arith_op1] [get_bd_intf_pins reduce_ops_0/in1] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_call_ack [get_bd_intf_pins ccl_offload_0/m_axis_call_ack] [get_bd_intf_pins hostctrl_0/sts] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression0 [get_bd_intf_pins ccl_offload_0/m_axis_compression0] [get_bd_intf_pins ccl_offload_0/s_axis_compression0] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression1 [get_bd_intf_pins ccl_offload_0/m_axis_compression1] [get_bd_intf_pins ccl_offload_0/s_axis_compression1] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_compression2 [get_bd_intf_pins ccl_offload_0/m_axis_compression2] [get_bd_intf_pins ccl_offload_0/s_axis_compression2] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_mm2s_cmd [get_bd_intf_pins ccl_offload_0/m_axis_dma0_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_mm2s_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma0_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_mm2s_cmd] [get_bd_intf_pins system_ila_0/SLOT_2_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma0_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm [get_bd_intf_pins axis_register_slice_12/S_AXIS] [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma0_s2mm_cmd [get_bd_intf_pins ccl_offload_0/m_axis_dma0_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_s2mm_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma0_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_s2mm_cmd] [get_bd_intf_pins system_ila_0/SLOT_3_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma0_s2mm_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_mm2s_cmd [get_bd_intf_pins ccl_offload_0/m_axis_dma1_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_mm2s_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma1_mm2s_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_mm2s_cmd] [get_bd_intf_pins system_ila_0/SLOT_4_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma1_mm2s_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm [get_bd_intf_pins axis_register_slice_13/S_AXIS] [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_dma1_s2mm_cmd [get_bd_intf_pins ccl_offload_0/m_axis_dma1_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_s2mm_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets ccl_offload_0_m_axis_dma1_s2mm_cmd] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_s2mm_cmd] [get_bd_intf_pins system_ila_0/SLOT_5_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets ccl_offload_0_m_axis_dma1_s2mm_cmd] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_eth_tx_data [get_bd_intf_pins ccl_offload_0/m_axis_eth_tx_data] [get_bd_intf_pins cclo_sq_adapter_0/s_axis_cclo] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_krnl [get_bd_intf_pins ccl_offload_0/m_axis_krnl] [get_bd_intf_pins ccl_offload_0/s_axis_krnl] + connect_bd_intf_net -intf_net ccl_offload_0_m_axis_rdma_sq [get_bd_intf_pins ccl_offload_0/m_axis_rdma_sq] [get_bd_intf_pins cclo_sq_adapter_0/cclo_sq] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_rd [get_bd_intf_pins axis_data_fifo_5/S_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/cyt_sq_rd] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_cyt_sq_wr [get_bd_intf_pins axis_data_fifo_4/S_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/cyt_sq_wr] + connect_bd_intf_net -intf_net cclo_sq_adapter_0_m_axis_cyt [get_bd_intf_pins axis_register_slice_11/S_AXIS] [get_bd_intf_pins cclo_sq_adapter_0/m_axis_cyt] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm0_sts [get_bd_intf_pins ccl_offload_0/s_axis_dma0_s2mm_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm0_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm0_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm0_sts] [get_bd_intf_pins system_ila_0/SLOT_12_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm0_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_0_dm1_sts [get_bd_intf_pins ccl_offload_0/s_axis_dma1_s2mm_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm1_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_0/dm1_sts] [get_bd_intf_pins system_ila_0/SLOT_13_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_0_dm1_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm0_sts [get_bd_intf_pins ccl_offload_0/s_axis_dma0_mm2s_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm0_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm0_sts] [get_bd_intf_pins system_ila_0/SLOT_14_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm0_sts] + connect_bd_intf_net -intf_net cyt_cq_dm_sts_conver_1_dm1_sts [get_bd_intf_pins ccl_offload_0/s_axis_dma1_mm2s_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_sts] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm1_sts] [get_bd_intf_pins cyt_cq_dm_sts_conver_1/dm1_sts] [get_bd_intf_pins system_ila_0/SLOT_15_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_cq_dm_sts_conver_1_dm1_sts] + connect_bd_intf_net -intf_net cyt_cq_rd_sts_0_1 [get_bd_intf_ports cyt_cq_rd_sts_0] [get_bd_intf_pins axis_register_slice_4/S_AXIS] + connect_bd_intf_net -intf_net cyt_cq_wr_sts_0_1 [get_bd_intf_ports cyt_cq_wr_sts_0] [get_bd_intf_pins axis_register_slice_3/S_AXIS] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_rd_cmd [get_bd_intf_pins axis_register_slice_10/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_sq_rd_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_rd_cmd] [get_bd_intf_pins axis_register_slice_10/S_AXIS] [get_bd_intf_pins system_ila_0/SLOT_10_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_rd_cmd] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_cyt_sq_wr_cmd [get_bd_intf_pins axis_register_slice_9/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/cyt_sq_wr_cmd] +connect_bd_intf_net -intf_net [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_wr_cmd] [get_bd_intf_pins axis_register_slice_9/S_AXIS] [get_bd_intf_pins system_ila_0/SLOT_11_AXIS] + set_property HDL_ATTRIBUTE.DEBUG {true} [get_bd_intf_nets cyt_dma_sq_adapter_0_cyt_sq_wr_cmd] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_mm2s_meta [get_bd_intf_pins axis_data_fifo_7/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_mm2s_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma0_s2mm_meta [get_bd_intf_pins axis_data_fifo_9/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma0_s2mm_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_mm2s_meta [get_bd_intf_pins axis_data_fifo_6/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_mm2s_meta] + connect_bd_intf_net -intf_net cyt_dma_sq_adapter_0_dma1_s2mm_meta [get_bd_intf_pins axis_data_fifo_8/S_AXIS] [get_bd_intf_pins cyt_dma_sq_adapter_0/dma1_s2mm_meta] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_0 [get_bd_intf_pins ccl_offload_0/s_axis_eth_rx_data] [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_0] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_axis_1 [get_bd_intf_pins axis_register_slice_14/S_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/m_axis_1] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_0 [get_bd_intf_pins ccl_offload_0/s_axis_eth_notification] [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_0] + connect_bd_intf_net -intf_net cyt_rdma_arbiter_0_m_meta_1 [get_bd_intf_pins axis_data_fifo_2/S_AXIS] [get_bd_intf_pins cyt_rdma_arbiter_0/m_meta_1] + connect_bd_intf_net -intf_net cyt_rq_rd_1 [get_bd_intf_ports cyt_rq_rd] [get_bd_intf_pins axis_register_slice_5/S_AXIS] + connect_bd_intf_net -intf_net cyt_rq_wr_1 [get_bd_intf_ports cyt_rq_wr] [get_bd_intf_pins axis_register_slice_2/S_AXIS] + connect_bd_intf_net -intf_net cyt_rreq_recv_0_1 [get_bd_intf_ports cyt_rreq_recv_0] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S00_AXIS] + connect_bd_intf_net -intf_net cyt_rreq_recv_1_1 [get_bd_intf_ports cyt_rreq_recv_1] [get_bd_intf_pins axis_switch_2_to_1_inst_2/S01_AXIS] + connect_bd_intf_net -intf_net cyt_rrsp_recv_0_1 [get_bd_intf_ports cyt_rrsp_recv_0] [get_bd_intf_pins axis_register_slice_0/S_AXIS] + connect_bd_intf_net -intf_net cyt_rrsp_recv_1_1 [get_bd_intf_ports cyt_rrsp_recv_1] [get_bd_intf_pins axis_register_slice_1/S_AXIS] + connect_bd_intf_net -intf_net hostctrl_0_cmd [get_bd_intf_pins ccl_offload_0/s_axis_call_req] [get_bd_intf_pins hostctrl_0/cmd] + connect_bd_intf_net -intf_net reduce_ops_0_out_r [get_bd_intf_pins ccl_offload_0/s_axis_arith_res] [get_bd_intf_pins reduce_ops_0/out_r] + connect_bd_intf_net -intf_net s_axis_card_0_1 [get_bd_intf_ports s_axis_card_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S01_AXIS] + connect_bd_intf_net -intf_net s_axis_card_1_1 [get_bd_intf_ports s_axis_card_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S01_AXIS] + connect_bd_intf_net -intf_net s_axis_card_2_1 [get_bd_intf_ports s_axis_card_2] [get_bd_intf_pins axis_data_fifo_0/S_AXIS] + connect_bd_intf_net -intf_net s_axis_host_0_1 [get_bd_intf_ports s_axis_host_0] [get_bd_intf_pins axis_switch_2_to_1_inst_0/S00_AXIS] + connect_bd_intf_net -intf_net s_axis_host_1_1 [get_bd_intf_ports s_axis_host_1] [get_bd_intf_pins axis_switch_2_to_1_inst_1/S00_AXIS] + connect_bd_intf_net -intf_net s_axis_host_2_1 [get_bd_intf_ports s_axis_host_2] [get_bd_intf_pins axis_data_fifo_1/S_AXIS] + connect_bd_intf_net -intf_net smartconnect_0_M00_AXI [get_bd_intf_pins hostctrl_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M00_AXI] + connect_bd_intf_net -intf_net smartconnect_0_M01_AXI [get_bd_intf_pins ccl_offload_0/s_axi_control] [get_bd_intf_pins smartconnect_0/M01_AXI] + + # Create port connections + connect_bd_net -net ap_clk_0_1 [get_bd_ports ap_clk_0] [get_bd_pins axis_data_fifo_0/s_axis_aclk] [get_bd_pins axis_data_fifo_1/s_axis_aclk] [get_bd_pins axis_data_fifo_2/s_axis_aclk] [get_bd_pins axis_data_fifo_3/s_axis_aclk] [get_bd_pins axis_data_fifo_4/s_axis_aclk] [get_bd_pins axis_data_fifo_5/s_axis_aclk] [get_bd_pins axis_data_fifo_6/s_axis_aclk] [get_bd_pins axis_data_fifo_7/s_axis_aclk] [get_bd_pins axis_data_fifo_8/s_axis_aclk] [get_bd_pins axis_data_fifo_9/s_axis_aclk] [get_bd_pins axis_register_slice_0/aclk] [get_bd_pins axis_register_slice_1/aclk] [get_bd_pins axis_register_slice_10/aclk] [get_bd_pins axis_register_slice_11/aclk] [get_bd_pins axis_register_slice_12/aclk] [get_bd_pins axis_register_slice_13/aclk] [get_bd_pins axis_register_slice_14/aclk] [get_bd_pins axis_register_slice_2/aclk] [get_bd_pins axis_register_slice_3/aclk] [get_bd_pins axis_register_slice_4/aclk] [get_bd_pins axis_register_slice_5/aclk] [get_bd_pins axis_register_slice_6/aclk] [get_bd_pins axis_register_slice_7/aclk] [get_bd_pins axis_register_slice_8/aclk] [get_bd_pins axis_register_slice_9/aclk] [get_bd_pins axis_switch_1_to_2_inst_0/aclk] [get_bd_pins axis_switch_1_to_2_inst_1/aclk] [get_bd_pins axis_switch_1_to_2_inst_2/aclk] [get_bd_pins axis_switch_1_to_2_inst_3/aclk] [get_bd_pins axis_switch_2_to_1_inst_0/aclk] [get_bd_pins axis_switch_2_to_1_inst_1/aclk] [get_bd_pins axis_switch_2_to_1_inst_2/aclk] [get_bd_pins ccl_offload_0/ap_clk] [get_bd_pins cclo_sq_adapter_0/ap_clk] [get_bd_pins cyt_cq_dm_sts_conver_0/ap_clk] [get_bd_pins cyt_cq_dm_sts_conver_1/ap_clk] [get_bd_pins cyt_dma_sq_adapter_0/ap_clk] [get_bd_pins cyt_rdma_arbiter_0/ap_clk] [get_bd_pins hostctrl_0/ap_clk] [get_bd_pins reduce_ops_0/ap_clk] [get_bd_pins rst_ap_clk_0_250M/slowest_sync_clk] [get_bd_pins smartconnect_0/aclk] [get_bd_pins system_ila_0/clk] + connect_bd_net -net ap_rst_n_0_1 [get_bd_ports ap_rst_n_0] [get_bd_pins axis_data_fifo_6/s_axis_aresetn] [get_bd_pins axis_data_fifo_7/s_axis_aresetn] [get_bd_pins axis_data_fifo_8/s_axis_aresetn] [get_bd_pins axis_data_fifo_9/s_axis_aresetn] [get_bd_pins axis_register_slice_0/aresetn] [get_bd_pins axis_register_slice_1/aresetn] [get_bd_pins axis_register_slice_10/aresetn] [get_bd_pins axis_register_slice_11/aresetn] [get_bd_pins axis_register_slice_12/aresetn] [get_bd_pins axis_register_slice_13/aresetn] [get_bd_pins axis_register_slice_14/aresetn] [get_bd_pins axis_register_slice_7/aresetn] [get_bd_pins axis_register_slice_8/aresetn] [get_bd_pins axis_register_slice_9/aresetn] [get_bd_pins axis_switch_1_to_2_inst_0/aresetn] [get_bd_pins axis_switch_1_to_2_inst_1/aresetn] [get_bd_pins axis_switch_1_to_2_inst_2/aresetn] [get_bd_pins axis_switch_1_to_2_inst_3/aresetn] [get_bd_pins axis_switch_2_to_1_inst_0/aresetn] [get_bd_pins axis_switch_2_to_1_inst_1/aresetn] [get_bd_pins ccl_offload_0/ap_rst_n] [get_bd_pins cclo_sq_adapter_0/ap_rst_n] [get_bd_pins cyt_cq_dm_sts_conver_0/ap_rst_n] [get_bd_pins cyt_cq_dm_sts_conver_1/ap_rst_n] [get_bd_pins cyt_dma_sq_adapter_0/ap_rst_n] [get_bd_pins cyt_rdma_arbiter_0/ap_rst_n] [get_bd_pins hostctrl_0/ap_rst_n] [get_bd_pins reduce_ops_0/ap_rst_n] [get_bd_pins rst_ap_clk_0_250M/ext_reset_in] [get_bd_pins smartconnect_0/aresetn] [get_bd_pins system_ila_0/resetn] + connect_bd_net -net rst_ap_clk_0_250M_peripheral_aresetn [get_bd_pins axis_data_fifo_0/s_axis_aresetn] [get_bd_pins axis_data_fifo_1/s_axis_aresetn] [get_bd_pins axis_data_fifo_2/s_axis_aresetn] [get_bd_pins axis_data_fifo_3/s_axis_aresetn] [get_bd_pins axis_data_fifo_4/s_axis_aresetn] [get_bd_pins axis_data_fifo_5/s_axis_aresetn] [get_bd_pins axis_register_slice_2/aresetn] [get_bd_pins axis_register_slice_3/aresetn] [get_bd_pins axis_register_slice_4/aresetn] [get_bd_pins axis_register_slice_5/aresetn] [get_bd_pins axis_register_slice_6/aresetn] [get_bd_pins axis_switch_2_to_1_inst_2/aresetn] [get_bd_pins rst_ap_clk_0_250M/peripheral_aresetn] + connect_bd_net -net xlconstant_0_dout [get_bd_pins axis_switch_2_to_1_inst_0/s_req_suppress] [get_bd_pins xlconstant_0/dout] + connect_bd_net -net xlconstant_1_dout [get_bd_pins axis_switch_2_to_1_inst_1/s_req_suppress] [get_bd_pins xlconstant_1/dout] + connect_bd_net -net xlconstant_2_dout [get_bd_pins axis_switch_2_to_1_inst_2/s_req_suppress] [get_bd_pins xlconstant_2/dout] + + # Create address segments + assign_bd_address -offset 0x00000000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs ccl_offload_0/s_axi_control/reg0] -force + assign_bd_address -offset 0x00002000 -range 0x00002000 -target_address_space [get_bd_addr_spaces S00_AXI_0] [get_bd_addr_segs hostctrl_0/s_axi_control/Reg] -force + +# create some hierarchies +group_bd_cells cclo [get_bd_cells hostctrl_0] [get_bd_cells smartconnect_0] [get_bd_cells reduce_ops_0] [get_bd_cells ccl_offload_0] +group_bd_cells rrsp_bypass [get_bd_cells axis_data_fifo_1] [get_bd_cells axis_data_fifo_0] +group_bd_cells rrsp_arbitration [get_bd_cells axis_switch_1_to_2_inst_2] [get_bd_cells axis_register_slice_0] [get_bd_cells axis_register_slice_1] [get_bd_cells cyt_rdma_arbiter_0] [get_bd_cells axis_register_slice_14] [get_bd_cells axis_register_slice_2] +group_bd_cells completion_conversion [get_bd_cells axis_register_slice_3] [get_bd_cells axis_data_fifo_6] [get_bd_cells axis_data_fifo_7] [get_bd_cells axis_register_slice_4] [get_bd_cells axis_data_fifo_8] [get_bd_cells cyt_cq_dm_sts_conver_0] [get_bd_cells axis_data_fifo_9] [get_bd_cells cyt_cq_dm_sts_conver_1] +group_bd_cells request_conversion [get_bd_cells axis_register_slice_9] [get_bd_cells axis_data_fifo_2] [get_bd_cells axis_data_fifo_3] [get_bd_cells axis_data_fifo_4] [get_bd_cells axis_data_fifo_5] [get_bd_cells cyt_dma_sq_adapter_0] [get_bd_cells axis_register_slice_10] +group_bd_cells sq_conversion [get_bd_cells axis_switch_2_to_1_inst_2] [get_bd_cells axis_switch_1_to_2_inst_3] [get_bd_cells axis_register_slice_11] [get_bd_cells axis_register_slice_6] [get_bd_cells cclo_sq_adapter_0] [get_bd_cells xlconstant_2] +group_bd_cells local_dma_input_muxing [get_bd_cells axis_switch_2_to_1_inst_0] [get_bd_cells axis_switch_2_to_1_inst_1] [get_bd_cells xlconstant_0] [get_bd_cells axis_register_slice_7] [get_bd_cells axis_register_slice_8] [get_bd_cells xlconstant_1] +group_bd_cells local_dma_output_demuxing [get_bd_cells axis_switch_1_to_2_inst_0] [get_bd_cells axis_register_slice_12] [get_bd_cells axis_register_slice_13] [get_bd_cells axis_switch_1_to_2_inst_1] + validate_bd_design save_bd_design -make_wrapper -files [get_files "$build_dir/lynx/lynx.srcs/sources_1/bd/accl_bd/accl_bd.bd"] -top -add_files -norecurse "$build_dir/lynx/lynx.gen/sources_1/bd/accl_bd/hdl/accl_bd_wrapper.v" +make_wrapper -files [get_files "$build_dir/test_config_0/user_c0_0/test.srcs/sources_1/bd/accl_bd/accl_bd.bd"] -top +add_files -norecurse "$build_dir/test_config_0/user_c0_0/test.srcs/sources_1/bd/accl_bd/hdl/accl_bd_wrapper.v" update_compile_order -fileset sources_1 exit + +