diff --git a/.gitignore b/.gitignore index 331d63fd..ef6bb1de 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,5 @@ *.swp *.Po build -test/hsa test/MatrixTranspose/MatrixTranspose test/MatrixTranspose_test/MatrixTranspose diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ca35708..07a622ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,6 +195,7 @@ else() endif() message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" ) set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" ) +set ( CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-core" ) ## Process the Debian install/remove scripts to update the CPACK variables configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY ) configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY ) @@ -221,7 +222,14 @@ if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" ) string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" ) endif() set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" ) +set ( CPACK_RPM_PACKAGE_REQUIRES "rocm-core" ) message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}") + +if(NOT ROCM_DEP_ROCMCORE) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES}) + string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS}) +endif() + ## Process the Rpm install/remove scripts to update the CPACK variables configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY ) configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY ) diff --git a/README.md b/README.md index e700ee40..28838c19 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,10 @@ rocTX API: - To build roctracer library: export CMAKE_BUILD_TYPE= # release by default + cd /roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j + or + ./build.sh - To build and run test: make mytest diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index c9d55da9..4c699182 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -760,7 +760,7 @@ static inline uint32_t get_op_end(uint32_t domain) { case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HSA_EVT: return HSA_EVT_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; - case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_LAST + 1;; + case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_LAST + 1; case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: diff --git a/src/roctx/roctx.cpp b/src/roctx/roctx.cpp index 819cc287..32600138 100644 --- a/src/roctx/roctx.cpp +++ b/src/roctx/roctx.cpp @@ -140,7 +140,7 @@ PUBLIC_API int roctxRangePushA(const char* message) { void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg); if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg); - roctx::message_stack->push(strdup(message)); + roctx::message_stack->emplace(message); return roctx::message_stack->size() - 1; API_METHOD_CATCH(-1); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a14ee2ac..17a54c80 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -44,7 +44,6 @@ endif () ## Path to HSA test set ( HSA_TEST_DIR "${TEST_DIR}/hsa/test" ) -set ( HSA_REV "f8b3870" ) ## test run script set ( RUN_SCRIPT "${TEST_DIR}/run.sh" ) @@ -70,8 +69,6 @@ add_custom_target( mytest ) ## Build HSA test -execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout ${HSA_REV}; fi" ) set ( TMP ${TEST_DIR} ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${HSA_TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa ) diff --git a/test/hsa/LICENSE b/test/hsa/LICENSE new file mode 100644 index 00000000..597d1b16 --- /dev/null +++ b/test/hsa/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +[MITx11 license] + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/test/hsa/README.md b/test/hsa/README.md new file mode 100644 index 00000000..20e09157 --- /dev/null +++ b/test/hsa/README.md @@ -0,0 +1,4 @@ +# HSA-class +``` +HSA high level C++ API +``` diff --git a/test/hsa/script/build_kernel.sh b/test/hsa/script/build_kernel.sh new file mode 100755 index 00000000..2007e7ab --- /dev/null +++ b/test/hsa/script/build_kernel.sh @@ -0,0 +1,80 @@ +#!/bin/sh -x +SO_EXT="hsaco" + +TEST_NAME=$1 +DST_DIR=$2 +ROCM_DIR=$3 +TGT_LIST=$4 + +if [ -z "$TEST_NAME" ] ; then + echo "Usage: $0 " + echo " Will look for .cl and will build .$SO_EXT dynamic code object library" + exit 1 +fi +OBJ_NAME=$(echo "_$(basename $TEST_NAME)" | sed -e 's/_./\U&\E/g' -e 's/_//g') + +if [ -z "$DST_DIR" ] ; then + DST_DIR=$(dirname TEST_NAME) +fi + +if [ -z "$ROCM_DIR" ] ; then + ROCM_DIR=/opt/rocm +fi + +if [ -z "$TGT_LIST" ] ; then + TGT_LIST=`$ROCM_DIR/bin/rocminfo | grep "amdgcn-amd-amdhsa--" | head -n 1 | sed -n "s/^.*amdgcn-amd-amdhsa--\(\w*\).*$/\1/p"` +fi + +if [ -z "$TGT_LIST" ] ; then + echo "Error: GPU targets not found" + exit 1 +fi + +OCL_VER="2.0" + +if [ -e $ROCM_DIR/llvm ] ; then + LLVM_DIR=$ROCM_DIR/llvm + LIB_DIR=$ROCM_DIR/lib +else + LLVM_DIR=$ROCM_DIR/hcc + LIB_DIR=$LLVM_DIR/lib +fi + +# Determine whether using new or old device-libs layout +if [ -e $LIB_DIR/bitcode/opencl.amdgcn.bc ]; then + BC_DIR=$LIB_DIR/bitcode +elif [ -e $LIB_DIR/opencl.amdgcn.bc ]; then + BC_DIR=$LIB_DIR +elif [ -e $ROCM_DIR/amdgcn/bitcode/opencl.bc ]; then + BC_DIR=$ROCM_DIR/amdgcn/bitcode +else + echo "Error: Cannot find amdgcn bitcode directory" + exit 1 +fi + +CLANG_ROOT=$LLVM_DIR/lib/clang +CLANG_DIR=`ls -d $CLANG_ROOT/* | head -n 1` +if [ "$CLANG_DIR" = "" ] ; then + echo "Error: LLVM clang library was not found" + exit 1 +fi + +BIN_DIR=$LLVM_DIR/bin +INC_DIR=$CLANG_DIR/include +if [ -e $BC_DIR/opencl.amdgcn.bc ]; then + BITCODE_OPTS="-nogpulib \ + -Xclang -mlink-bitcode-file -Xclang $BC_DIR/opencl.amdgcn.bc \ + -Xclang -mlink-bitcode-file -Xclang $BC_DIR/ockl.amdgcn.bc \ + -Xclang -mlink-bitcode-file -Xclang $BC_DIR/ocml.amdgcn.bc" +else + BITCODE_OPTS="--hip-device-lib-path=$BC_DIR" +fi + +for GFXIP in $TGT_LIST ; do + OBJ_PREF=$GFXIP + OBJ_FILE="${OBJ_PREF}_${OBJ_NAME}.$SO_EXT" + $BIN_DIR/clang -cl-std=CL$OCL_VER -include $INC_DIR/opencl-c.h $BITCODE_OPTS -target amdgcn-amd-amdhsa -mcpu=$GFXIP $TEST_NAME.cl -o $DST_DIR/$OBJ_FILE + echo "'$OBJ_FILE' generated" +done + +exit 0 diff --git a/test/hsa/src/hsa_rsrc_factory.cpp b/test/hsa/src/hsa_rsrc_factory.cpp new file mode 100644 index 00000000..d2d8e79e --- /dev/null +++ b/test/hsa/src/hsa_rsrc_factory.cpp @@ -0,0 +1,761 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#include "util/hsa_rsrc_factory.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// Callback function to get available in the system agents +hsa_status_t HsaRsrcFactory::GetHsaAgentsCallback(hsa_agent_t agent, void* data) { + hsa_status_t status = HSA_STATUS_ERROR; + HsaRsrcFactory* hsa_rsrc = reinterpret_cast(data); + const AgentInfo* agent_info = hsa_rsrc->AddAgentInfo(agent); + if (agent_info != NULL) status = HSA_STATUS_SUCCESS; + return status; +} + +// This function checks to see if the provided +// pool has the HSA_AMD_SEGMENT_GLOBAL property. If the kern_arg flag is true, +// the function adds an additional requirement that the pool have the +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT property. If kern_arg is false, +// pools must NOT have this property. +// Upon finding a pool that meets these conditions, HSA_STATUS_INFO_BREAK is +// returned. HSA_STATUS_SUCCESS is returned if no errors were encountered, but +// no pool was found meeting the requirements. If an error is encountered, we +// return that error. +static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool kern_arg) { + hsa_status_t err; + hsa_amd_segment_t segment; + uint32_t flag; + + if (nullptr == data) { + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + } + + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + CHECK_STATUS("hsa_amd_memory_pool_get_info", err); + if (HSA_AMD_SEGMENT_GLOBAL != segment) { + return HSA_STATUS_SUCCESS; + } + + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + CHECK_STATUS("hsa_amd_memory_pool_get_info", err); + + uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; + + if ((karg_st == 0 && kern_arg) || (karg_st != 0 && !kern_arg)) { + return HSA_STATUS_SUCCESS; + } + + *(reinterpret_cast(data)) = pool; + return HSA_STATUS_INFO_BREAK; +} + +// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that +// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that is NOT +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT +hsa_status_t FindStandardPool(hsa_amd_memory_pool_t pool, void* data) { + return FindGlobalPool(pool, data, false); +} + +// This is the call-back function for hsa_amd_agent_iterate_memory_pools() that +// finds a pool with the properties of HSA_AMD_SEGMENT_GLOBAL and that IS +// HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT +hsa_status_t FindKernArgPool(hsa_amd_memory_pool_t pool, void* data) { + return FindGlobalPool(pool, data, true); +} + +// Constructor of the class +HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize_hsa) { + hsa_status_t status; + + cpu_pool_ = NULL; + kern_arg_pool_ = NULL; + + InitHsaApiTable(NULL); + + // Initialize the Hsa Runtime + if (initialize_hsa_) { + status = hsa_api_.hsa_init(); + CHECK_STATUS("Error in hsa_init", status); + } + + // Discover the set of Gpu devices available on the platform + status = hsa_api_.hsa_iterate_agents(GetHsaAgentsCallback, this); + CHECK_STATUS("Error Calling hsa_iterate_agents", status); + if (cpu_pool_ == NULL) CHECK_STATUS("CPU memory pool is not found", HSA_STATUS_ERROR); + if (kern_arg_pool_ == NULL) CHECK_STATUS("Kern-arg memory pool is not found", HSA_STATUS_ERROR); + + // Get AqlProfile API table + aqlprofile_api_ = {0}; +#ifdef ROCP_LD_AQLPROFILE + status = LoadAqlProfileLib(&aqlprofile_api_); +#else + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); +#endif + CHECK_STATUS("aqlprofile API table load failed", status); + + // Get Loader API table + loader_api_ = {0}; + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); + CHECK_STATUS("loader API table query failed", status); + + // Instantiate HSA timer + timer_ = new HsaTimer(&hsa_api_); + CHECK_STATUS("HSA timer allocation failed", + (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + + // Time correlation + const uint32_t corr_iters = 1000; + CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters); + CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); + + // System timeout + timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); +} + +// Destructor of the class +HsaRsrcFactory::~HsaRsrcFactory() { + delete timer_; + for (auto p : cpu_list_) delete p; + for (auto p : gpu_list_) delete p; + if (initialize_hsa_) { + hsa_status_t status = hsa_api_.hsa_shut_down(); + CHECK_STATUS("Error in hsa_shut_down", status); + } +} + +void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { + std::lock_guard lck(mutex_); + + if (hsa_api_.hsa_init == NULL) { + if (table != NULL) { + hsa_api_.hsa_init = table->core_->hsa_init_fn; + hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn; + hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn; + hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn; + + hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; + hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; + hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn; + hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn; + + hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; + hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; + hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn; + hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn; + hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; + hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; + + hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn; + hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; + hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; + hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; + hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; + hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; + + hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; + hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + + hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; + hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; + hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; + hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; + hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn; + + hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; + hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; + hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; + } else { + hsa_api_.hsa_init = hsa_init; + hsa_api_.hsa_shut_down = hsa_shut_down; + hsa_api_.hsa_agent_get_info = hsa_agent_get_info; + hsa_api_.hsa_iterate_agents = hsa_iterate_agents; + + hsa_api_.hsa_queue_create = hsa_queue_create; + hsa_api_.hsa_queue_destroy = hsa_queue_destroy; + hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed; + hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed; + hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed; + + hsa_api_.hsa_signal_create = hsa_signal_create; + hsa_api_.hsa_signal_destroy = hsa_signal_destroy; + hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed; + hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed; + hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire; + hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease; + + hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file; + hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt; + hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; + hsa_api_.hsa_executable_freeze = hsa_executable_freeze; + hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; + hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; + + hsa_api_.hsa_system_get_info = hsa_system_get_info; + hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; + + hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools; + hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info; + hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate; + hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access; + hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy; + + hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled; + hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time; + hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time; + } + } +} + +hsa_status_t HsaRsrcFactory::LoadAqlProfileLib(aqlprofile_pfn_t* api) { + void* handle = dlopen(kAqlProfileLib, RTLD_NOW); + if (handle == NULL) { + fprintf(stderr, "Loading '%s' failed, %s\n", kAqlProfileLib, dlerror()); + return HSA_STATUS_ERROR; + } + dlerror(); /* Clear any existing error */ + + api->hsa_ven_amd_aqlprofile_error_string = + (decltype(::hsa_ven_amd_aqlprofile_error_string)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_error_string"); + api->hsa_ven_amd_aqlprofile_validate_event = + (decltype(::hsa_ven_amd_aqlprofile_validate_event)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_validate_event"); + api->hsa_ven_amd_aqlprofile_start = + (decltype(::hsa_ven_amd_aqlprofile_start)*)dlsym(handle, "hsa_ven_amd_aqlprofile_start"); + api->hsa_ven_amd_aqlprofile_stop = + (decltype(::hsa_ven_amd_aqlprofile_stop)*)dlsym(handle, "hsa_ven_amd_aqlprofile_stop"); +#ifdef AQLPROF_NEW_API + api->hsa_ven_amd_aqlprofile_read = + (decltype(::hsa_ven_amd_aqlprofile_read)*)dlsym(handle, "hsa_ven_amd_aqlprofile_read"); +#endif + api->hsa_ven_amd_aqlprofile_legacy_get_pm4 = + (decltype(::hsa_ven_amd_aqlprofile_legacy_get_pm4)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_legacy_get_pm4"); + api->hsa_ven_amd_aqlprofile_get_info = (decltype(::hsa_ven_amd_aqlprofile_get_info)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_get_info"); + api->hsa_ven_amd_aqlprofile_iterate_data = + (decltype(::hsa_ven_amd_aqlprofile_iterate_data)*)dlsym( + handle, "hsa_ven_amd_aqlprofile_iterate_data"); + + return HSA_STATUS_SUCCESS; +} + +// Add system agent info +const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { + // Determine if device is a Gpu agent + hsa_status_t status; + AgentInfo* agent_info = NULL; + + hsa_device_type_t type; + status = hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type); + CHECK_STATUS("Error Calling hsa_agent_get_info", status); + + if (type == HSA_DEVICE_TYPE_CPU) { + agent_info = new AgentInfo{}; + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_CPU; + agent_info->dev_index = cpu_list_.size(); + + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); + if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); + if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool; + agent_info->gpu_pool = {}; + + cpu_list_.push_back(agent_info); + cpu_agents_.push_back(agent); + } + + if (type == HSA_DEVICE_TYPE_GPU) { + agent_info = new AgentInfo{}; + agent_info->dev_id = agent; + agent_info->dev_type = HSA_DEVICE_TYPE_GPU; + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); + strncpy(agent_info->gfxip, agent_info->name, 4); + agent_info->gfxip[4] = '\0'; + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); + hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); + agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_api_.hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); + + agent_info->cpu_pool = {}; + agent_info->kern_arg_pool = {}; + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); + CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); + + // GFX8 and GFX9 SGPR/VGPR block sizes + agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; + agent_info->sgpr_block_size = 8; + agent_info->vgpr_block_size = 4; + + // Set GPU index + agent_info->dev_index = gpu_list_.size(); + gpu_list_.push_back(agent_info); + gpu_agents_.push_back(agent); + } + + if (agent_info) agent_map_[agent.handle] = agent_info; + + return agent_info; +} + +// Return systen agent info +const AgentInfo* HsaRsrcFactory::GetAgentInfo(const hsa_agent_t agent) { + const AgentInfo* agent_info = NULL; + auto it = agent_map_.find(agent.handle); + if (it != agent_map_.end()) { + agent_info = it->second; + } + return agent_info; +} + +// Get the count of Hsa Gpu Agents available on the platform +// +// @return uint32_t Number of Gpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfGpuAgents() { return uint32_t(gpu_list_.size()); } + +// Get the count of Hsa Cpu Agents available on the platform +// +// @return uint32_t Number of Cpu agents on platform +// +uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size()); } + +// Get the AgentInfo handle of a Gpu device +// +// @param idx Gpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(gpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = gpu_list_[idx]; + + return true; +} + +// Get the AgentInfo handle of a Cpu device +// +// @param idx Cpu Agent at specified index +// +// @param agent_info Output parameter updated with AgentInfo +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) { + // Determine if request is valid + uint32_t size = uint32_t(cpu_list_.size()); + if (idx >= size) { + return false; + } + + // Copy AgentInfo from specified index + *agent_info = cpu_list_[idx]; + return true; +} + +// Create a Queue object and return its handle. The queue object is expected +// to support user requested number of Aql dispatch packets. +// +// @param agent_info Gpu Agent on which to create a queue object +// +// @param num_Pkts Number of packets to be held by queue +// +// @param queue Output parameter updated with handle of queue object +// +// @return bool true if successful, false otherwise +// +bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, + hsa_queue_t** queue) { + hsa_status_t status; + status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, + UINT32_MAX, UINT32_MAX, queue); + return (status == HSA_STATUS_SUCCESS); +} + +// Create a Signal object and return its handle. +// @param value Initial value of signal object +// @param signal Output parameter updated with handle of signal object +// @return bool true if successful, false otherwise +bool HsaRsrcFactory::CreateSignal(uint32_t value, hsa_signal_t* signal) { + hsa_status_t status; + status = hsa_api_.hsa_signal_create(value, 0, NULL, signal); + return (status == HSA_STATUS_SUCCESS); +} + +// Allocate memory for use by a kernel of specified size in specified +// agent's memory region. +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + uint8_t* buffer = NULL; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); + uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; + return ptr; +} + +// Allocate memory to pass kernel parameters. +// Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter. +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + uint8_t* buffer = NULL; + if (!cpu_agents_.empty()) { + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); + // Both the CPU and GPU can access the kernel arguments + if (status == HSA_STATUS_SUCCESS) { + hsa_agent_t ag_list[1] = {agent_info->dev_id}; + status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); + } + } + uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; + return ptr; +} + +// Allocate system memory accessible by both CPU and GPU +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + uint8_t* buffer = NULL; + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + if (!cpu_agents_.empty()) { + status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); + // Both the CPU and GPU can access the memory + if (status == HSA_STATUS_SUCCESS) { + hsa_agent_t ag_list[1] = {agent_info->dev_id}; + status = hsa_api_.hsa_amd_agents_allow_access(1, ag_list, NULL, buffer); + } + } + uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; + return ptr; +} + +// Allocate memory for command buffer. +// @param agent_info Agent from whose memory region to allocate +// @param size Size of memory in terms of bytes +// @return uint8_t* Pointer to buffer, null if allocation fails. +uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t size) { + size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; + uint8_t* ptr = (agent_info->is_apu && CMD_MEMORY_MMAP) + ? reinterpret_cast( + mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANONYMOUS, 0, 0)) + : AllocateSysMemory(agent_info, size); + return ptr; +} + +// Wait signal +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + const hsa_signal_value_t exp_value = signal_value - 1; + hsa_signal_value_t ret_value = signal_value; + while (1) { + ret_value = + hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + if (ret_value == exp_value) break; + if (ret_value != signal_value) { + std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value + << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + abort(); + } + } + return ret_value; +} + +// Wait signal with signal value restore +void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + SignalWait(signal, signal_value); + hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); +} + +// Copy data from GPU to host memory +bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size) { + hsa_status_t status = HSA_STATUS_ERROR; + if (!cpu_agents_.empty()) { + hsa_signal_t s = {}; + status = hsa_api_.hsa_signal_create(1, 0, NULL, &s); + CHECK_STATUS("hsa_signal_create()", status); + status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); + CHECK_STATUS("hsa_amd_memory_async_copy()", status); + SignalWait(s, 1); + status = hsa_api_.hsa_signal_destroy(s); + CHECK_STATUS("hsa_signal_destroy()", status); + } + return (status == HSA_STATUS_SUCCESS); +} +bool HsaRsrcFactory::Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size) { + return Memcpy(agent_info->dev_id, dst, src, size); +} + +// Memory free method +bool HsaRsrcFactory::FreeMemory(void* ptr) { + const hsa_status_t status = hsa_memory_free(ptr); + CHECK_STATUS("hsa_memory_free", status); + return (status == HSA_STATUS_SUCCESS); +} + +// Loads an Assembled Brig file and Finalizes it into Device Isa +// @param agent_info Gpu device for which to finalize +// @param brig_path File path of the Assembled Brig file +// @param kernel_name Name of the kernel to finalize +// @param code_desc Handle of finalized Code Descriptor that could +// be used to submit for execution +// @return bool true if successful, false otherwise +bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, + const char* kernel_name, hsa_executable_t* executable, + hsa_executable_symbol_t* code_desc) { + hsa_status_t status = HSA_STATUS_ERROR; + + // Build the code object filename + std::string filename(brig_path); + std::clog << "Code object filename: " << filename << std::endl; + + // Open the file containing code object + hsa_file_t file_handle = open(filename.c_str(), O_RDONLY); + if (file_handle == -1) { + std::cerr << "Error: failed to load '" << filename << "'" << std::endl; + assert(false); + return false; + } + + // Create code object reader + hsa_code_object_reader_t code_obj_rdr = {0}; + status = hsa_api_.hsa_code_object_reader_create_from_file(file_handle, &code_obj_rdr); + if (status != HSA_STATUS_SUCCESS) { + std::cerr << "Failed to create code object reader '" << filename << "'" << std::endl; + return false; + } + + // Create executable. + status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, executable); + CHECK_STATUS("Error in creating executable object", status); + + // Load code object. + status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, + NULL, NULL); + CHECK_STATUS("Error in loading executable object", status); + + // Freeze executable. + status = hsa_api_.hsa_executable_freeze(*executable, ""); + CHECK_STATUS("Error in freezing executable object", status); + + // Get symbol handle. + hsa_executable_symbol_t kernelSymbol; + status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, + &kernelSymbol); + CHECK_STATUS("Error in looking up kernel symbol", status); + + // Update output parameter + *code_desc = kernelSymbol; + return true; +} + +// Print the various fields of Hsa Gpu Agents +bool HsaRsrcFactory::PrintGpuAgents(const std::string& header) { + std::cout << std::flush; + std::clog << header << " :" << std::endl; + + const AgentInfo* agent_info; + int size = uint32_t(gpu_list_.size()); + for (int idx = 0; idx < size; idx++) { + agent_info = gpu_list_[idx]; + + std::clog << "> agent[" << idx << "] :" << std::endl; + std::clog << ">> Name : " << agent_info->name << std::endl; + std::clog << ">> APU : " << agent_info->is_apu << std::endl; + std::clog << ">> HSAIL profile : " << agent_info->profile << std::endl; + std::clog << ">> Max Wave Size : " << agent_info->max_wave_size << std::endl; + std::clog << ">> Max Queue Size : " << agent_info->max_queue_size << std::endl; + std::clog << ">> CU number : " << agent_info->cu_num << std::endl; + std::clog << ">> Waves per CU : " << agent_info->waves_per_cu << std::endl; + std::clog << ">> SIMDs per CU : " << agent_info->simds_per_cu << std::endl; + std::clog << ">> SE number : " << agent_info->se_num << std::endl; + std::clog << ">> Shader Arrays per SE : " << agent_info->shader_arrays_per_se << std::endl; + } + return true; +} + +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; + + // adevance command queue + const uint64_t write_idx = hsa_api_.hsa_queue_load_write_index_relaxed(queue); + hsa_api_.hsa_queue_store_write_index_relaxed(queue, write_idx + 1); + while ((write_idx - hsa_api_.hsa_queue_load_read_index_relaxed(queue)) >= queue->size) { + sched_yield(); + } + + uint32_t slot_idx = (uint32_t)(write_idx % queue->size); + uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + const uint32_t* slot_data = reinterpret_cast(packet); + + // Copy buffered commands into the queue slot. + // Overwrite the AQL invalid header (first dword) last. + // This prevents the slot from being read until it's fully written. + memcpy(&queue_slot[1], &slot_data[1], slot_size_b - sizeof(uint32_t)); + std::atomic* header_atomic_ptr = + reinterpret_cast*>(&queue_slot[0]); + header_atomic_ptr->store(slot_data[0], std::memory_order_release); + + // ringdoor bell + hsa_api_.hsa_signal_store_relaxed(queue->doorbell_signal, write_idx); + + return write_idx; +} + +uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes) { + const uint32_t slot_size_b = CMD_SLOT_SIZE_B; + if ((size_bytes & (slot_size_b - 1)) != 0) { + fprintf(stderr, "HsaRsrcFactory::Submit: Bad packet size %zx\n", size_bytes); + abort(); + } + + const char* begin = reinterpret_cast(packet); + const char* end = begin + size_bytes; + uint64_t write_idx = 0; + for (const char* ptr = begin; ptr < end; ptr += slot_size_b) { + write_idx = Submit(queue, ptr); + } + + return write_idx; +} + +const char* HsaRsrcFactory::GetKernelName(uint64_t addr) { + std::lock_guard lck(mutex_); + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); + abort(); + } + return strdup(it->second); +} + +void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { + std::lock_guard lck(mutex_); + executable_tracking_on_ = true; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; +} + +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { + hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; + hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + CHECK_STATUS("Error in getting symbol info", status); + if (value == HSA_SYMBOL_KIND_KERNEL) { + uint64_t addr = 0; + uint32_t len = 0; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + CHECK_STATUS("Error in getting kernel object", status); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + CHECK_STATUS("Error in getting name len", status); + char *name = new char[len + 1]; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); + CHECK_STATUS("Error in getting kernel name", status); + name[len] = 0; + auto ret = symbols_map_->insert({addr, name}); + if (ret.second == false) { + delete[] ret.first->second; + ret.first->second = name; + } + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { + std::lock_guard lck(mutex_); + if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + CHECK_STATUS("Error in iterating executable symbols", status); + return hsa_api_.hsa_executable_freeze(executable, options);; +} + +void HsaRsrcFactory::DumpHandles(FILE* file) { + auto beg = agent_map_.begin(); + auto end = agent_map_.end(); + for (auto it = beg; it != end; ++it) { + const AgentInfo* agent_info = it->second; + fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); + if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); + if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); + if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle); + } + fflush(file); +} + +std::atomic HsaRsrcFactory::instance_{}; +HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; +HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; +hsa_pfn_t HsaRsrcFactory::hsa_api_{}; +bool HsaRsrcFactory::executable_tracking_on_ = false; +HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL; diff --git a/test/hsa/src/hsa_rsrc_factory.h b/test/hsa/src/hsa_rsrc_factory.h new file mode 100644 index 00000000..8383aa66 --- /dev/null +++ b/test/hsa/src/hsa_rsrc_factory.h @@ -0,0 +1,516 @@ +/********************************************************************** +Copyright ©2013 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted +provided that the following conditions are met: + +<95> Redistributions of source code must retain the above copyright notice, this list of +conditions and the following disclaimer. +<95> Redistributions in binary form must reproduce the above copyright notice, this list of +conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +********************************************************************/ + +#ifndef _HSA_RSRC_FACTORY_H_ +#define _HSA_RSRC_FACTORY_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#define HSA_ARGUMENT_ALIGN_BYTES 16 +#define HSA_QUEUE_ALIGN_BYTES 64 +#define HSA_PACKET_ALIGN_BYTES 64 + +#define CHECK_STATUS(msg, status) do { \ + if ((status) != HSA_STATUS_SUCCESS) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ +} while (0) + +#define CHECK_ITER_STATUS(msg, status) do { \ + if ((status) != HSA_STATUS_INFO_BREAK) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ +} while (0) + +static const size_t MEM_PAGE_BYTES = 0x1000; +static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; +typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t; + +struct hsa_pfn_t { + decltype(hsa_init)* hsa_init; + decltype(hsa_shut_down)* hsa_shut_down; + decltype(hsa_agent_get_info)* hsa_agent_get_info; + decltype(hsa_iterate_agents)* hsa_iterate_agents; + + decltype(hsa_queue_create)* hsa_queue_create; + decltype(hsa_queue_destroy)* hsa_queue_destroy; + decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed; + decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed; + decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed; + + decltype(hsa_signal_create)* hsa_signal_create; + decltype(hsa_signal_destroy)* hsa_signal_destroy; + decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed; + decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed; + decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire; + decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease; + + decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file; + decltype(hsa_executable_create_alt)* hsa_executable_create_alt; + decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; + decltype(hsa_executable_freeze)* hsa_executable_freeze; + decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; + decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; + decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; + + decltype(hsa_system_get_info)* hsa_system_get_info; + decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; + + decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools; + decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info; + decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate; + decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access; + decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy; + + decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler; + decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled; + decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time; + decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time; +}; + +// Encapsulates information about a Hsa Agent such as its +// handle, name, max queue size, max wavefront size, etc. +struct AgentInfo { + // Handle of Agent + hsa_agent_t dev_id; + + // Agent type - Cpu = 0, Gpu = 1 or Dsp = 2 + uint32_t dev_type; + + // APU flag + bool is_apu; + + // Agent system index + uint32_t dev_index; + + // GFXIP name + char gfxip[64]; + + // Name of Agent whose length is less than 64 + char name[64]; + + // Max size of Wavefront size + uint32_t max_wave_size; + + // Max size of Queue buffer + uint32_t max_queue_size; + + // Hsail profile supported by agent + hsa_profile_t profile; + + // CPU/GPU/kern-arg memory pools + hsa_amd_memory_pool_t cpu_pool; + hsa_amd_memory_pool_t gpu_pool; + hsa_amd_memory_pool_t kern_arg_pool; + + // The number of compute unit available in the agent. + uint32_t cu_num; + + // Maximum number of waves possible in a Compute Unit. + uint32_t waves_per_cu; + + // Number of SIMD's per compute unit CU + uint32_t simds_per_cu; + + // Number of Shader Engines (SE) in Gpu + uint32_t se_num; + + // Number of Shader Arrays Per Shader Engines in Gpu + uint32_t shader_arrays_per_se; + + // SGPR/VGPR block sizes + uint32_t sgpr_block_dflt; + uint32_t sgpr_block_size; + uint32_t vgpr_block_size; +}; + +// HSA timer class +// Provides current HSA timestampa and system-clock/ns conversion API +class HsaTimer { + public: + typedef uint64_t timestamp_t; + static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; + typedef long double freq_t; + + enum time_id_t { + TIME_ID_CLOCK_REALTIME = 0, + TIME_ID_CLOCK_MONOTONIC = 1, + TIME_ID_NUMBER + }; + + HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { + timestamp_t sysclock_hz = 0; + hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); + sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; + } + + // Methods for system-clock/ns conversion + timestamp_t sysclock_to_ns(const timestamp_t& sysclock) const { + return timestamp_t((freq_t)sysclock * sysclock_factor_); + } + timestamp_t ns_to_sysclock(const timestamp_t& time) const { + return timestamp_t((freq_t)time / sysclock_factor_); + } + + // Method for timespec/ns conversion + static timestamp_t timespec_to_ns(const timespec& time) { + return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; + } + + // Return timestamp in 'ns' + timestamp_t timestamp_ns() const { + timestamp_t sysclock; + hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock); + CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status); + return sysclock_to_ns(sysclock); + } + + // Return time in 'ns' + static timestamp_t clocktime_ns(clockid_t clock_id) { + timespec time; + clock_gettime(clock_id, &time); + return timespec_to_ns(time); + } + + // Return pair of correlated values of profiling timestamp and time with + // correlation error for a given time ID and number of iterations + void correlated_pair_ns(time_id_t time_id, uint32_t iters, + timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const { + clockid_t clock_id = 0; + switch (clock_id) { + case TIME_ID_CLOCK_REALTIME: + clock_id = CLOCK_REALTIME; + break; + case TIME_ID_CLOCK_MONOTONIC: + clock_id = CLOCK_MONOTONIC; + break; + default: + CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); + } + + std::vector ts_vec(iters); + std::vector tm_vec(iters); + const uint32_t steps = iters - 1; + + for (uint32_t i = 0; i < iters; ++i) { + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); + clock_gettime(clock_id, &tm_vec[i]); + } + + const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); + const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); + const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); + + timestamp_t ts_accum = 0; + timestamp_t tm_accum = 0; + for (uint32_t i = 0; i < iters; ++i) { + ts_accum += (ts_vec[i] - ts_base); + tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); + } + + *timestamp_v = (ts_accum / iters) + ts_base + error; + *time_v = (tm_accum / iters) + tm_base; + *error_v = error; + } + + private: + // Timestamp frequency factor + freq_t sysclock_factor_; + // HSA API table + const hsa_pfn_t* const hsa_api_; +}; + +class HsaRsrcFactory { + public: + static const size_t CMD_SLOT_SIZE_B = 0x40; + typedef std::recursive_mutex mutex_t; + typedef HsaTimer::timestamp_t timestamp_t; + + static HsaRsrcFactory* Create(bool initialize_hsa = true) { + std::lock_guard lck(mutex_); + HsaRsrcFactory* obj = instance_.load(std::memory_order_relaxed); + if (obj == NULL) { + obj = new HsaRsrcFactory(initialize_hsa); + instance_.store(obj, std::memory_order_release); + } + return obj; + } + + static HsaRsrcFactory& Instance() { + HsaRsrcFactory* obj = instance_.load(std::memory_order_acquire); + if (obj == NULL) obj = Create(false); + hsa_status_t status = (obj != NULL) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; + CHECK_STATUS("HsaRsrcFactory::Instance() failed", status); + return *obj; + } + + static void Destroy() { + std::lock_guard lck(mutex_); + if (instance_) delete instance_.load(); + instance_ = NULL; + } + + // Return system agent info + const AgentInfo* GetAgentInfo(const hsa_agent_t agent); + + // Get the count of Hsa Gpu Agents available on the platform + // @return uint32_t Number of Gpu agents on platform + uint32_t GetCountOfGpuAgents(); + + // Get the count of Hsa Cpu Agents available on the platform + // @return uint32_t Number of Cpu agents on platform + uint32_t GetCountOfCpuAgents(); + + // Get the AgentInfo handle of a Gpu device + // @param idx Gpu Agent at specified index + // @param agent_info Output parameter updated with AgentInfo + // @return bool true if successful, false otherwise + bool GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); + + // Get the AgentInfo handle of a Cpu device + // @param idx Cpu Agent at specified index + // @param agent_info Output parameter updated with AgentInfo + // @return bool true if successful, false otherwise + bool GetCpuAgentInfo(uint32_t idx, const AgentInfo** agent_info); + + // Create a Queue object and return its handle. The queue object is expected + // to support user requested number of Aql dispatch packets. + // @param agent_info Gpu Agent on which to create a queue object + // @param num_Pkts Number of packets to be held by queue + // @param queue Output parameter updated with handle of queue object + // @return bool true if successful, false otherwise + bool CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue); + + // Create a Signal object and return its handle. + // @param value Initial value of signal object + // @param signal Output parameter updated with handle of signal object + // @return bool true if successful, false otherwise + bool CreateSignal(uint32_t value, hsa_signal_t* signal); + + // Allocate local GPU memory + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateLocalMemory(const AgentInfo* agent_info, size_t size); + + // Allocate memory tp pass kernel parameters + // Memory is alocated accessible for all CPU agents and for GPU given by AgentInfo parameter. + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateKernArgMemory(const AgentInfo* agent_info, size_t size); + + // Allocate system memory accessible from both CPU and GPU + // Memory is alocated accessible to all CPU agents and AgentInfo parameter is ignored. + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateSysMemory(const AgentInfo* agent_info, size_t size); + + // Allocate memory for command buffer. + // @param agent_info Agent from whose memory region to allocate + // @param size Size of memory in terms of bytes + // @return uint8_t* Pointer to buffer, null if allocation fails. + uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); + + // Wait signal + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; + + // Wait signal with signal value restore + void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; + + // Copy data from GPU to host memory + bool Memcpy(const hsa_agent_t& agent, void* dst, const void* src, size_t size); + bool Memcpy(const AgentInfo* agent_info, void* dst, const void* src, size_t size); + + // Memory free method + static bool FreeMemory(void* ptr); + + // Loads an Assembled Brig file and Finalizes it into Device Isa + // @param agent_info Gpu device for which to finalize + // @param brig_path File path of the Assembled Brig file + // @param kernel_name Name of the kernel to finalize + // @param code_desc Handle of finalized Code Descriptor that could + // be used to submit for execution + // @return true if successful, false otherwise + bool LoadAndFinalize(const AgentInfo* agent_info, const char* brig_path, const char* kernel_name, + hsa_executable_t* hsa_exec, hsa_executable_symbol_t* code_desc); + + // Print the various fields of Hsa Gpu Agents + bool PrintGpuAgents(const std::string& header); + + // Submit AQL packet to given queue + static uint64_t Submit(hsa_queue_t* queue, const void* packet); + static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); + + // Enable executables loading tracking + static bool IsExecutableTracking() { return executable_tracking_on_; } + static void EnableExecutableTracking(HsaApiTable* table); + static const char* GetKernelName(uint64_t addr); + + // Initialize HSA API table + void static InitHsaApiTable(HsaApiTable* table); + static const hsa_pfn_t* HsaApi() { return &hsa_api_; } + + // Return AqlProfile API table + typedef hsa_ven_amd_aqlprofile_pfn_t aqlprofile_pfn_t; + const aqlprofile_pfn_t* AqlProfileApi() const { return &aqlprofile_api_; } + + // Return Loader API table + const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } + + // Methods for system-clock/ns conversion and timestamp in 'ns' + timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); } + timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); } + timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } + + timestamp_t GetSysTimeout() const { return timeout_; } + static timestamp_t GetTimeoutNs() { return timeout_ns_; } + static void SetTimeoutNs(const timestamp_t& time) { + std::lock_guard lck(mutex_); + timeout_ns_ = time; + if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); + } + + void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) { + timestamp_t timestamp_v = 0; + timestamp_t time_v = 0; + timestamp_t error_v = 0; + timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); + time_shift_[time_id] = time_v - timestamp_v; + time_error_[time_id] = error_v; + } + + hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *time = value + time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + + hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *timestamp = value - time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + + void DumpHandles(FILE* output_file); + + private: + // System agents iterating callback + static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); + + // Callback function to find and bind kernarg region of an agent + static hsa_status_t FindMemRegionsCallback(hsa_region_t region, void* data); + + // Load AQL profile HSA extension library directly + static hsa_status_t LoadAqlProfileLib(aqlprofile_pfn_t* api); + + // Constructor of the class. Will initialize the Hsa Runtime and + // query the system topology to get the list of Cpu and Gpu devices + explicit HsaRsrcFactory(bool initialize_hsa); + + // Destructor of the class + ~HsaRsrcFactory(); + + // Add an instance of AgentInfo representing a Hsa Gpu agent + const AgentInfo* AddAgentInfo(const hsa_agent_t agent); + + // To mmap command buffer memory + static const bool CMD_MEMORY_MMAP = false; + + // HSA was initialized + const bool initialize_hsa_; + + static std::atomic instance_; + static mutex_t mutex_; + + // Used to maintain a list of Hsa Gpu Agent Info + std::vector gpu_list_; + std::vector gpu_agents_; + + // Used to maintain a list of Hsa Cpu Agent Info + std::vector cpu_list_; + std::vector cpu_agents_; + + // System agents map + std::map agent_map_; + + // Executables loading tracking + typedef std::map symbols_map_t; + static symbols_map_t* symbols_map_; + static bool executable_tracking_on_; + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + + // HSA runtime API table + static hsa_pfn_t hsa_api_; + + // AqlProfile API table + aqlprofile_pfn_t aqlprofile_api_; + + // Loader API table + hsa_ven_amd_loader_1_00_pfn_t loader_api_; + + // System timeout, ns + static timestamp_t timeout_ns_; + // System timeout, sysclock + timestamp_t timeout_; + + // HSA timer + HsaTimer* timer_; + + // Time shift array to support time conversion + timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; + timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; + + // CPU/kern-arg memory pools + hsa_amd_memory_pool_t *cpu_pool_; + hsa_amd_memory_pool_t *kern_arg_pool_; +}; + +#endif // _HSA_RSRC_FACTORY_H_ diff --git a/test/hsa/test/CMakeLists.txt b/test/hsa/test/CMakeLists.txt new file mode 100644 index 00000000..77727b23 --- /dev/null +++ b/test/hsa/test/CMakeLists.txt @@ -0,0 +1,64 @@ +################################################################################ +# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +################################################################################ + +cmake_minimum_required ( VERSION 2.8.12 ) +set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE ) + +set ( EXE_NAME "ctrl" ) + +if ( NOT DEFINED TEST_DIR ) + set ( TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) + project ( ${EXE_NAME} ) + ## Set build environment + include ( env ) +endif () + +if ( NOT DEFINED ROCM_ROOT_DIR ) + set ( ROCM_ROOT_DIR "" ) +endif () +if ( NOT DEFINED GPU_TARGETS ) + set ( GPU_TARGETS "" ) +endif () + +## Util sources +file( GLOB UTIL_SRC "${TEST_DIR}/util/*.cpp" ) + +## Test control sources +set ( CTRL_SRC + ${TEST_DIR}/app/test.cpp + ${TEST_DIR}/ctrl/test_hsa.cpp +) + +## Dummy kernel +set ( DUMMY_NAME dummy_kernel ) +execute_process ( COMMAND sh -xc "${TEST_DIR}/../script/build_kernel.sh '${TEST_DIR}/${DUMMY_NAME}/${DUMMY_NAME}' '${PROJECT_BINARY_DIR}' '${ROCM_ROOT_DIR}' '${GPU_TARGETS}'" ) + +## Test kernel +set ( TEST_NAME simple_convolution ) +set ( KERN_SRC ${TEST_DIR}/${TEST_NAME}/${TEST_NAME}.cpp ) +execute_process ( COMMAND sh -xc "${TEST_DIR}/../script/build_kernel.sh '${TEST_DIR}/${TEST_NAME}/${TEST_NAME}' '${PROJECT_BINARY_DIR}' '${ROCM_ROOT_DIR}' '${GPU_TARGETS}'" ) + +## Building ctrl test executable +add_executable ( ${EXE_NAME} ${CTRL_SRC} ${UTIL_SRC} ${KERN_SRC} ) +target_include_directories ( ${EXE_NAME} PRIVATE ${TEST_DIR} ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH} ) +target_link_libraries( ${EXE_NAME} ${HSA_RUNTIME_LIB} ${HSA_KMT_LIB} c stdc++ dl pthread rt ) +execute_process ( COMMAND sh -xc "cp ${TEST_DIR}/run.sh ${PROJECT_BINARY_DIR}" ) diff --git a/test/hsa/test/app/test.cpp b/test/hsa/test/app/test.cpp new file mode 100644 index 00000000..23d39273 --- /dev/null +++ b/test/hsa/test/app/test.cpp @@ -0,0 +1,86 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include +#include +#include +#include +#include + +#include "ctrl/run_kernel.h" +#include "ctrl/test_aql.h" +#include "dummy_kernel/dummy_kernel.h" +#include "simple_convolution/simple_convolution.h" + +void thread_fun(const int kiter, const int diter, const uint32_t agents_number) { + const AgentInfo* agent_info[agents_number]; + hsa_queue_t* queue[agents_number]; + HsaRsrcFactory* rsrc = &HsaRsrcFactory::Instance(); + + for (uint32_t n = 0; n < agents_number; ++n) { + uint32_t agent_id = n % rsrc->GetCountOfGpuAgents(); + if (rsrc->GetGpuAgentInfo(agent_id, &agent_info[n]) == false) { + fprintf(stderr, "AgentInfo failed\n"); + abort(); + } + if (rsrc->CreateQueue(agent_info[n], 128, &queue[n]) == false) { + fprintf(stderr, "CreateQueue failed\n"); + abort(); + } + } + + for (int i = 0; i < kiter; ++i) { + for (uint32_t n = 0; n < agents_number; ++n) { + RunKernel(0, NULL, agent_info[n], queue[n], diter); + RunKernel(0, NULL, agent_info[n], queue[n], diter); + } + } + + for (uint32_t n = 0; n < agents_number; ++n) { + hsa_queue_destroy(queue[n]); + } +} + +int main(int argc, char** argv) { + const char* kiter_s = getenv("ROCP_KITER"); + const char* diter_s = getenv("ROCP_DITER"); + const char* agents_s = getenv("ROCP_AGENTS"); + const char* thrs_s = getenv("ROCP_THRS"); + + const int kiter = (kiter_s != NULL) ? atol(kiter_s) : 1; + const int diter = (diter_s != NULL) ? atol(diter_s) : 1; + const uint32_t agents_number = (agents_s != NULL) ? (uint32_t)atol(agents_s) : 1; + const int thrs = (thrs_s != NULL) ? atol(thrs_s) : 1; + + TestHsa::HsaInstantiate(); + + std::vector t(thrs); + for (int n = 0; n < thrs; ++n) { + t[n] = std::thread(thread_fun, kiter, diter, agents_number); + } + for (int n = 0; n < thrs; ++n) { + t[n].join(); + } + + TestHsa::HsaShutdown(); + return 0; +} diff --git a/test/hsa/test/ctrl/run_kernel.h b/test/hsa/test/ctrl/run_kernel.h new file mode 100644 index 00000000..846e0b68 --- /dev/null +++ b/test/hsa/test/ctrl/run_kernel.h @@ -0,0 +1,90 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_CTRL_RUN_KERNEL_H_ +#define TEST_CTRL_RUN_KERNEL_H_ + +#include "ctrl/test_hsa.h" +#include "util/test_assert.h" + +template bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL, hsa_queue_t* queue = NULL, int count = 1) { + bool ret_val = false; + + if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL); + + + // Create test kernel object + Kernel test_kernel; + + TestHsa* test_hsa = new TestHsa(&test_kernel); + test_hsa->SetAgentInfo(agent_info); + test_hsa->SetQueue(queue); + + TestAql* test_aql = new Test(test_hsa); + TEST_ASSERT(test_aql != NULL); + if (test_aql == NULL) return 1; + + // Initialization of Hsa Runtime + ret_val = test_aql->Initialize(argc, argv); + if (ret_val == false) { + std::cerr << "Error in the test initialization" << std::endl; + // TEST_ASSERT(ret_val); + return false; + } + + // Setup Hsa resources needed for execution + ret_val = test_aql->Setup(); + if (ret_val == false) { + std::cerr << "Error in creating hsa resources" << std::endl; + TEST_ASSERT(ret_val); + return false; + } + + // Kernel dspatch iterations + for (int i = 0; i < count; ++i) { + // Run test kernel + ret_val = test_aql->Run(); + if (ret_val == false) { + std::cerr << "Error in running the test kernel" << std::endl; + TEST_ASSERT(ret_val); + return false; + } + + // Verify the results of the execution + ret_val = test_aql->VerifyResults(); + if (ret_val) { + std::clog << "Test : Passed" << std::endl; + } else { + std::clog << "Test : Failed" << std::endl; + } + } + + // Print time taken by sample + test_aql->PrintTime(); + + test_aql->Cleanup(); + delete test_aql; + + return ret_val; +} + +#endif // TEST_CTRL_RUN_KERNEL_H_ diff --git a/test/hsa/test/ctrl/test_aql.h b/test/hsa/test/ctrl/test_aql.h new file mode 100644 index 00000000..d77363ee --- /dev/null +++ b/test/hsa/test/ctrl/test_aql.h @@ -0,0 +1,77 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_AQL_H_ +#define TEST_CTRL_TEST_AQL_H_ + +#include +#include + +#include "util/hsa_rsrc_factory.h" + +// Test AQL interface +class TestAql { + public: + explicit TestAql(TestAql* t = 0) : test_(t) {} + virtual ~TestAql() { + if (test_) delete test_; + } + + TestAql* Test() { return test_; } + virtual const AgentInfo* GetAgentInfo() { return (test_) ? test_->GetAgentInfo() : 0; } + virtual hsa_queue_t* GetQueue() { return (test_) ? test_->GetQueue() : 0; } + virtual HsaRsrcFactory* GetRsrcFactory() { return (test_) ? test_->GetRsrcFactory() : 0; } + + // Initialize application environment including setting + // up of various configuration parameters based on + // command line arguments + // @return bool true on success and false on failure + virtual bool Initialize(int argc, char** argv) { + return (test_) ? test_->Initialize(argc, argv) : true; + } + + // Setup application parameters for exectuion + // @return bool true on success and false on failure + virtual bool Setup() { return (test_) ? test_->Setup() : true; } + + // Run the kernel + // @return bool true on success and false on failure + virtual bool Run() { return (test_) ? test_->Run() : true; } + + // Verify results + // @return bool true on success and false on failure + virtual bool VerifyResults() { return (test_) ? test_->VerifyResults() : true; } + + // Print to console the time taken to execute kernel + virtual void PrintTime() { + if (test_) test_->PrintTime(); + } + + // Release resources e.g. memory allocations + // @return bool true on success and false on failure + virtual bool Cleanup() { return (test_) ? test_->Cleanup() : true; } + + private: + TestAql* const test_; +}; + +#endif // TEST_CTRL_TEST_AQL_H_ diff --git a/test/hsa/test/ctrl/test_hsa.cpp b/test/hsa/test/ctrl/test_hsa.cpp new file mode 100644 index 00000000..638f7b1a --- /dev/null +++ b/test/hsa/test/ctrl/test_hsa.cpp @@ -0,0 +1,279 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "ctrl/test_hsa.h" + +#include + +#include "util/test_assert.h" +#include "util/helper_funcs.h" +#include "util/hsa_rsrc_factory.h" + +HsaRsrcFactory* TestHsa::hsa_rsrc_ = NULL; + +HsaRsrcFactory* TestHsa::HsaInstantiate() { + // Instantiate an instance of Hsa Resources Factory + if (hsa_rsrc_ == NULL) { + hsa_rsrc_ = HsaRsrcFactory::Create(); + // Print properties of the agents + hsa_rsrc_->PrintGpuAgents("> GPU agents"); + } + return hsa_rsrc_; +} + +void TestHsa::HsaShutdown() { + if (hsa_rsrc_) hsa_rsrc_->Destroy(); +} + +bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) { + std::clog << "TestHsa::Initialize :" << std::endl; + + // Instantiate a Timer object + setup_timer_idx_ = hsa_timer_.CreateTimer(); + dispatch_timer_idx_ = hsa_timer_.CreateTimer(); + + if (hsa_rsrc_ == NULL) { + TEST_ASSERT(false); + return false; + } + + // Create an instance of Gpu agent + if (agent_info_ == NULL) { + const uint32_t agent_id = 0; + if (!hsa_rsrc_->GetGpuAgentInfo(agent_id, &agent_info_)) { + agent_info_ = NULL; + std::cerr << "> error: agent[" << agent_id << "] is not found" << std::endl; + return false; + } + } + std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name << std::endl; + + // Create an instance of Aql Queue + if (hsa_queue_ == NULL) { + const uint32_t num_pkts = 128; + if (hsa_rsrc_->CreateQueue(agent_info_, num_pkts, &hsa_queue_) == false) { + hsa_queue_ = NULL; + TEST_ASSERT(false); + } + my_queue_ = true; + } + + // Obtain handle of signal + hsa_rsrc_->CreateSignal(1, &hsa_signal_); + + // Obtain the code object file name + std::string agentName(agent_info_->name); + brig_path_obj_.append(agentName); + brig_path_obj_.append("_" + name_ + ".hsaco"); + + return true; +} + +bool TestHsa::Setup() { + std::clog << "TestHsa::setup :" << std::endl; + + // Start the timer object + hsa_timer_.StartTimer(setup_timer_idx_); + + // Load and Finalize Kernel Code Descriptor + const char* brig_path = brig_path_obj_.c_str(); + bool suc = hsa_rsrc_->LoadAndFinalize(agent_info_, brig_path, symb_.c_str(), &hsa_exec_, + &kernel_code_desc_); + if (suc == false) { + std::cerr << "Error in loading and finalizing Kernel" << std::endl; + return false; + } + + mem_map_t& mem_map = test_->GetMemMap(); + for (mem_it_t it = mem_map.begin(); it != mem_map.end(); ++it) { + mem_descr_t& des = it->second; + if (des.size == 0) continue; + + switch (des.id) { + case TestKernel::LOCAL_DES_ID: + des.ptr = hsa_rsrc_->AllocateLocalMemory(agent_info_, des.size); + break; + case TestKernel::KERNARG_DES_ID: { + // Check the kernel args size + const size_t kernarg_size = des.size; + size_t size_info = 0; + const hsa_status_t status = hsa_executable_symbol_get_info( + kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info); + TEST_ASSERT(status == HSA_STATUS_SUCCESS); + size_info = kernarg_size; + const bool kernarg_missmatch = (kernarg_size > size_info); + if (kernarg_missmatch) { + std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info + << std::flush << std::endl; + TEST_ASSERT(!kernarg_missmatch); + break; + } + // ALlocate kernarg memory + des.size = size_info; + des.ptr = hsa_rsrc_->AllocateKernArgMemory(agent_info_, size_info); + if (des.ptr) memset(des.ptr, 0, size_info); + break; + } + case TestKernel::SYS_DES_ID: + des.ptr = hsa_rsrc_->AllocateSysMemory(agent_info_, des.size); + if (des.ptr) memset(des.ptr, 0, des.size); + break; + case TestKernel::NULL_DES_ID: + des.ptr = NULL; + break; + default: + break; + } + TEST_ASSERT(des.ptr != NULL); + if (des.ptr == NULL) return false; + } + test_->Init(); + + // Stop the timer object + hsa_timer_.StopTimer(setup_timer_idx_); + setup_time_taken_ = hsa_timer_.ReadTimer(setup_timer_idx_); + total_time_taken_ = setup_time_taken_; + + return true; +} + +bool TestHsa::Run() { + std::clog << "TestHsa::run :" << std::endl; + + const uint32_t work_group_size = 64; + const uint32_t work_grid_size = test_->GetGridSize(); + uint32_t group_segment_size = 0; + uint32_t private_segment_size = 0; + uint64_t code_handle = 0; + + // Retrieve the amount of group memory needed + hsa_executable_symbol_get_info( + kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, &group_segment_size); + + // Retrieve the amount of private memory needed + hsa_executable_symbol_get_info(kernel_code_desc_, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &private_segment_size); + + + // Retrieve handle of the code block + hsa_executable_symbol_get_info(kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &code_handle); + + // Initialize the dispatch packet. + hsa_kernel_dispatch_packet_t aql; + memset(&aql, 0, sizeof(aql)); + // Set the packet's type, barrier bit, acquire and release fences + aql.header = HSA_PACKET_TYPE_KERNEL_DISPATCH; + aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCACQUIRE_FENCE_SCOPE; + aql.header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_SCRELEASE_FENCE_SCOPE; + // Populate Aql packet with default values + aql.setup = 1; + aql.grid_size_x = work_grid_size; + aql.grid_size_y = 1; + aql.grid_size_z = 1; + aql.workgroup_size_x = work_group_size; + aql.workgroup_size_y = 1; + aql.workgroup_size_z = 1; + // Bind the kernel code descriptor and arguments + aql.kernel_object = code_handle; + aql.kernarg_address = test_->GetKernargPtr(); + aql.group_segment_size = group_segment_size; + aql.private_segment_size = private_segment_size; + // Initialize Aql packet with handle of signal + hsa_signal_store_relaxed(hsa_signal_, 1); + aql.completion_signal = hsa_signal_; + + std::clog << "> Executing kernel: \"" << name_ << "\"" << std::endl; + + // Start the timer object + hsa_timer_.StartTimer(dispatch_timer_idx_); + + // Submit AQL packet to the queue + const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql); + + std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush; + + // Wait on the dispatch signal until the kernel is finished. + // Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling + if (hsa_signal_wait_scacquire(hsa_signal_, HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, + HSA_WAIT_STATE_BLOCKED) != 0) { + TEST_ASSERT("signal_wait failed"); + } + + std::clog << "> DONE, que_idx=" << que_idx << std::endl; + + // Stop the timer object + hsa_timer_.StopTimer(dispatch_timer_idx_); + dispatch_time_taken_ = hsa_timer_.ReadTimer(dispatch_timer_idx_); + total_time_taken_ += dispatch_time_taken_; + + return true; +} + +bool TestHsa::VerifyResults() { + bool cmp = false; + void* output = NULL; + const uint32_t size = test_->GetOutputSize(); + bool suc = false; + + if (size == 0) return true; + + // Copy local kernel output buffers from local memory into host memory + if (test_->IsOutputLocal()) { + output = hsa_rsrc_->AllocateSysMemory(agent_info_, size); + suc = hsa_rsrc_->Memcpy(agent_info_, output, test_->GetOutputPtr(), size); + if (!suc) std::clog << "> VerifyResults: Memcpy failed" << std::endl << std::flush; + } else { + output = test_->GetOutputPtr(); + suc = true; + } + + if ((output != NULL) && suc) { + // Print the test output + test_->PrintOutput(output); + // Compare the results and see if they match + cmp = (memcmp(output, test_->GetRefOut(), size) == 0); + } + + if (test_->IsOutputLocal() && (output != NULL)) hsa_rsrc_->FreeMemory(output); + + return cmp; +} + +void TestHsa::PrintTime() { + std::clog << "Time taken for Setup by " << this->name_ << " : " << this->setup_time_taken_ + << std::endl; + std::clog << "Time taken for Dispatch by " << this->name_ << " : " << this->dispatch_time_taken_ + << std::endl; + std::clog << "Time taken in Total by " << this->name_ << " : " << this->total_time_taken_ + << std::endl; +} + +bool TestHsa::Cleanup() { + hsa_executable_destroy(hsa_exec_); + hsa_signal_destroy(hsa_signal_); + if (my_queue_) hsa_queue_destroy(hsa_queue_); + hsa_queue_ = NULL; + agent_info_ = NULL; + return true; +} diff --git a/test/hsa/test/ctrl/test_hsa.h b/test/hsa/test/ctrl/test_hsa.h new file mode 100644 index 00000000..bb54c600 --- /dev/null +++ b/test/hsa/test/ctrl/test_hsa.h @@ -0,0 +1,129 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_HSA_H_ +#define TEST_CTRL_TEST_HSA_H_ + +#include "ctrl/test_aql.h" +#include "ctrl/test_kernel.h" +#include "util/hsa_rsrc_factory.h" +#include "util/perf_timer.h" + +// Class implements HSA test +class TestHsa : public TestAql { + public: + // Instantiate HSA resources + static HsaRsrcFactory* HsaInstantiate(); + static void HsaShutdown(); + + // Constructor + explicit TestHsa(TestKernel* test) : test_(test), name_(test->Name()), symb_(test->SymbName()) { + total_time_taken_ = 0; + setup_time_taken_ = 0; + dispatch_time_taken_ = 0; + agent_info_ = NULL; + hsa_queue_ = NULL; + my_queue_ = false; + hsa_exec_ = {}; + } + + // Get methods for Agent Info, HAS queue, HSA Resourcse Manager + HsaRsrcFactory* GetRsrcFactory() { return hsa_rsrc_; } + hsa_agent_t HsaAgent() { return agent_info_->dev_id; } + const AgentInfo* GetAgentInfo() { return agent_info_; } + void SetAgentInfo(const AgentInfo* agent_info) { agent_info_ = agent_info; } + hsa_queue_t* GetQueue() { return hsa_queue_; } + void SetQueue(hsa_queue_t* queue) { hsa_queue_ = queue; } + + // Initialize application environment including setting + // up of various configuration parameters based on + // command line arguments + // @return bool true on success and false on failure + bool Initialize(int argc, char** argv); + + // Setup application parameters for exectuion + // @return bool true on success and false on failure + bool Setup(); + + // Run the BinarySearch kernel + // @return bool true on success and false on failure + bool Run(); + + // Verify against reference implementation + // @return bool true on success and false on failure + bool VerifyResults(); + + // Print to console the time taken to execute kernel + void PrintTime(); + + // Release resources e.g. memory allocations + // @return bool true on success and false on failure + bool Cleanup(); + + private: + typedef TestKernel::mem_descr_t mem_descr_t; + typedef TestKernel::mem_map_t mem_map_t; + typedef TestKernel::mem_it_t mem_it_t; + + // Test object + TestKernel* test_; + + // Path of Brig file + std::string brig_path_obj_; + + // Used to track time taken to run the sample + double total_time_taken_; + double setup_time_taken_; + double dispatch_time_taken_; + + // Handle of signal + hsa_signal_t hsa_signal_; + + // Handle of Kernel Code Descriptor + hsa_executable_symbol_t kernel_code_desc_; + + // Instance of timer object + uint32_t setup_timer_idx_; + uint32_t dispatch_timer_idx_; + PerfTimer hsa_timer_; + + // Instance of Hsa Resources Factory + static HsaRsrcFactory* hsa_rsrc_; + + // Handle to an Hsa Gpu Agent + const AgentInfo* agent_info_; + + // Handle to an Hsa Queue + hsa_queue_t* hsa_queue_; + bool my_queue_; + + // Test kernel name + std::string name_; + + // Test kernel name + std::string symb_; + + // Kernel executable + hsa_executable_t hsa_exec_; +}; + +#endif // TEST_CTRL_TEST_HSA_H_ diff --git a/test/hsa/test/ctrl/test_kernel.h b/test/hsa/test/ctrl/test_kernel.h new file mode 100644 index 00000000..0ca89200 --- /dev/null +++ b/test/hsa/test/ctrl/test_kernel.h @@ -0,0 +1,138 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_CTRL_TEST_KERNEL_H_ +#define TEST_CTRL_TEST_KERNEL_H_ + +#include +#include +#include +#include + +// Class implements kernel test +class TestKernel { + public: + // Exported buffers IDs + enum buf_id_t { KERNARG_EXP_ID, OUTPUT_EXP_ID, REFOUT_EXP_ID }; + // Memory descriptors IDs + enum des_id_t { NULL_DES_ID, LOCAL_DES_ID, KERNARG_DES_ID, SYS_DES_ID, REFOUT_DES_ID }; + + // Memory descriptors vector declaration + struct mem_descr_t { + des_id_t id; + void* ptr; + uint32_t size; + }; + + // Memory map declaration + typedef std::map mem_map_t; + typedef mem_map_t::iterator mem_it_t; + typedef mem_map_t::const_iterator mem_const_it_t; + + virtual ~TestKernel() {} + + // Initialize method + virtual void Init() = 0; + + // Return kernel memory map + mem_map_t& GetMemMap() { return mem_map_; } + + // Return NULL descriptor + static mem_descr_t NullDescriptor() { return {NULL_DES_ID, NULL, 0}; } + + // Check if decripter is local + bool IsLocal(const mem_descr_t& descr) const { return (descr.id == LOCAL_DES_ID); } + + // Methods to get the kernel attributes + const mem_descr_t& GetKernargDescr() { return *test_map_[KERNARG_EXP_ID]; } + const mem_descr_t& GetOutputDescr() { return *test_map_[OUTPUT_EXP_ID]; } + void* GetKernargPtr() { return GetKernargDescr().ptr; } + uint32_t GetKernargSize() { return GetKernargDescr().size; } + void* GetOutputPtr() { return GetOutputDescr().ptr; } + uint32_t GetOutputSize() { return GetOutputDescr().size; } + bool IsOutputLocal() { return IsLocal(GetOutputDescr()); } + virtual uint32_t GetGridSize() const = 0; + + // Return reference output + void* GetRefOut() { return test_map_[REFOUT_EXP_ID]->ptr; } + + // Print output + virtual void PrintOutput(const void* ptr) const = 0; + + // Return name + virtual std::string Name() const = 0; + + // Return name + virtual std::string SymbName() { return Name() + ".kd"; } + + protected: + // Set buffer descriptor + bool SetInDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) { + bool suc = SetMemDescr(buf_id, des_id, size); + if (des_id == KERNARG_DES_ID) { + test_map_[KERNARG_EXP_ID] = &mem_map_[buf_id]; + } + return suc; + } + + // Set results descriptor + bool SetOutDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) { + bool suc = SetMemDescr(buf_id, des_id, size); + test_map_[OUTPUT_EXP_ID] = &mem_map_[buf_id]; + return suc; + } + + // Set host descriptor + bool SetHostDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) { + bool suc = SetMemDescr(buf_id, des_id, size); + if (suc) { + mem_descr_t& descr = mem_map_[buf_id]; + descr.ptr = malloc(size); + if (des_id == REFOUT_DES_ID) { + test_map_[REFOUT_EXP_ID] = &descr; + } + if (descr.ptr == NULL) suc = false; + } + return suc; + } + + // Get memory descriptor + mem_descr_t GetDescr(const uint32_t& buf_id) const { + mem_const_it_t it = mem_map_.find(buf_id); + return (it != mem_map_.end()) ? it->second : NullDescriptor(); + } + + private: + // Set memory descriptor + bool SetMemDescr(const uint32_t& buf_id, const des_id_t& des_id, const uint32_t& size) { + const mem_descr_t des = {des_id, NULL, size}; + auto ret = mem_map_.insert(mem_map_t::value_type(buf_id, des)); + return ret.second; + } + + // Kernel memory map object + mem_map_t mem_map_; + // Test memory map object + std::map test_map_; +}; + +#endif // TEST_CTRL_TEST_KERNEL_H_ diff --git a/test/hsa/test/dummy_kernel/dummy_kernel.cl b/test/hsa/test/dummy_kernel/dummy_kernel.cl new file mode 100644 index 00000000..4ab159c8 --- /dev/null +++ b/test/hsa/test/dummy_kernel/dummy_kernel.cl @@ -0,0 +1,28 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +/** + dummy kernel + */ +__kernel void DummyKernel() { + uint tid = get_global_id(0); +} diff --git a/test/hsa/test/dummy_kernel/dummy_kernel.h b/test/hsa/test/dummy_kernel/dummy_kernel.h new file mode 100644 index 00000000..1b8ce430 --- /dev/null +++ b/test/hsa/test/dummy_kernel/dummy_kernel.h @@ -0,0 +1,71 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ +#define TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ + +#include +#include + +#include "ctrl/test_kernel.h" + +// Class implements DummyKernel kernel parameters +class DummyKernel : public TestKernel { + public: + // Kernel buffers IDs + enum { KERNARG_BUF_ID, LOCAL_BUF_ID }; + + // Constructor + DummyKernel() : + width_(64), + height_(64) + { + SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0); + SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0); + } + + // Initialize method + void Init() {} + + // Return compute grid size + uint32_t GetGridSize() const { return width_ * height_; } + + // Print output + void PrintOutput(const void* ptr) const {} + + // Return name + std::string Name() const { return std::string("DummyKernel"); } + + private: + // Reference CPU implementation + bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask, + const uint32_t width, const uint32_t height, + const uint32_t maskWidth, const uint32_t maskHeight) { return true; } + + // Width of the Input array + const uint32_t width_; + + // Height of the Input array + const uint32_t height_; +}; + +#endif // TEST_DUMMY_KERNEL_DUMMY_KERNEL_H_ diff --git a/test/hsa/test/run.sh b/test/hsa/test/run.sh new file mode 100755 index 00000000..32848317 --- /dev/null +++ b/test/hsa/test/run.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +################################################################################ +# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. +################################################################################ + +# test trace +export ROC_TEST_TRACE=1 + +# kernels loading iterations +export ROCP_KITER=50 +# kernels dispatching iterations per kernel load +# dispatching to the same queue +export ROCP_DITER=50 +# GPU agents number +export ROCP_AGENTS=2 +# host threads number +# each thread creates a queue pre GPU agent +export ROCP_THRS=3 + +eval ./test/ctrl + +#valgrind --leak-check=full $tbin +#valgrind --tool=massif $tbin +#ms_print massif.out. + +exit 0 diff --git a/test/hsa/test/simple_convolution/simple_convolution.cl b/test/hsa/test/simple_convolution/simple_convolution.cl new file mode 100644 index 00000000..3f8115a6 --- /dev/null +++ b/test/hsa/test/simple_convolution/simple_convolution.cl @@ -0,0 +1,76 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +/** + * SimpleConvolution is where each pixel of the output image + * is the weighted sum of the neighborhood pixels of the input image + * The neighborhood is defined by the dimensions of the mask and + * weight of each neighbor is defined by the mask itself. + * @param output Output matrix after performing convolution + * @param input Input matrix on which convolution is to be performed + * @param mask mask matrix using which convolution was to be performed + * @param inputDimensions dimensions of the input matrix + * @param maskDimensions dimensions of the mask matrix + */ +__kernel void SimpleConvolution(__global uint * output, + __global uint * input, + __global float * mask, + const uint2 inputDimensions, + const uint2 maskDimensions) { + + uint tid = get_global_id(0); + + uint width = inputDimensions.x; + uint height = inputDimensions.y; + + uint x = tid%width; + uint y = tid/width; + + uint maskWidth = maskDimensions.x; + uint maskHeight = maskDimensions.y; + + uint vstep = (maskWidth -1)/2; + uint hstep = (maskHeight -1)/2; + + // find the left, right, top and bottom indices such that + // the indices do not go beyond image boundaires + uint left = (x < vstep) ? 0 : (x - vstep); + uint right = ((x + vstep) >= width) ? width - 1 : (x + vstep); + uint top = (y < hstep) ? 0 : (y - hstep); + uint bottom = ((y + hstep) >= height)? height - 1: (y + hstep); + + // initializing wighted sum value + float sumFX = 0; + + for(uint i = left; i <= right; ++i) { + for(uint j = top; j <= bottom; ++j) { + // performing wighted sum within the mask boundaries + uint maskIndex = (j - (y - hstep)) * maskWidth + (i - (x - vstep)); + uint index = j * width + i; + sumFX += ((float)input[index] * mask[maskIndex]); + } + } + + // To round to the nearest integer + sumFX += 0.5f; + output[tid] = (uint)sumFX; +} diff --git a/test/hsa/test/simple_convolution/simple_convolution.cpp b/test/hsa/test/simple_convolution/simple_convolution.cpp new file mode 100644 index 00000000..546f9a6a --- /dev/null +++ b/test/hsa/test/simple_convolution/simple_convolution.cpp @@ -0,0 +1,388 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "simple_convolution/simple_convolution.h" + +#include +#include +#include + +#include "util/helper_funcs.h" +#include "util/test_assert.h" + +const uint32_t SimpleConvolution::input_data_[]{ + 15, 201, 51, 89, 92, 34, 96, 66, 11, 225, 161, 96, 81, 211, 108, 124, 202, 244, 182, + 90, 215, 92, 98, 20, 44, 225, 55, 247, 202, 0, 45, 218, 202, 97, 51, 39, 131, 147, + 105, 143, 116, 11, 239, 198, 222, 92, 67, 169, 81, 250, 3, 40, 86, 101, 60, 131, 70, + 116, 123, 17, 117, 168, 236, 64, 10, 31, 103, 142, 179, 209, 29, 40, 220, 13, 239, 187, + 105, 50, 100, 186, 44, 104, 227, 131, 205, 32, 6, 20, 149, 130, 38, 10, 43, 18, 75, + 53, 50, 178, 195, 230, 132, 225, 14, 96, 238, 253, 27, 88, 48, 128, 18, 92, 232, 246, + 224, 182, 23, 231, 203, 172, 105, 241, 183, 148, 4, 2, 202, 55, 181, 142, 29, 57, 111, + 43, 153, 93, 41, 181, 181, 89, 54, 200, 182, 31, 190, 150, 213, 213, 126, 160, 130, 232, + 146, 57, 125, 151, 59, 71, 206, 240, 213, 236, 42, 68, 24, 195, 162, 65, 121, 87, 155, + 175, 31, 81, 207, 222, 232, 164, 180, 102, 69, 55, 79, 216, 112, 204, 112, 171, 19, 63, + 156, 233, 43, 198, 46, 67, 138, 208, 132, 4, 39, 32, 180, 71, 113, 131, 38, 90, 40, + 219, 193, 109, 18, 16, 70, 131, 220, 182, 46, 240, 245, 203, 217, 32, 146, 7, 100, 28, + 216, 233, 32, 255, 9, 213, 71, 123, 88, 110, 213, 128, 74, 150, 238, 93, 166, 52, 224, + 131, 234, 15, 115, 224, 218, 76, 1, 108, 84, 101, 137, 44, 79, 170, 44, 88, 127, 116, + 211, 216, 226, 168, 88, 45, 63, 70, 138, 230, 123, 107, 105, 101, 122, 220, 70, 84, 41, + 71, 193, 125, 173, 75, 169, 252, 245, 213, 84, 117, 73, 40, 77, 44, 209, 166, 90, 16, + 237, 229, 246, 104, 80, 95, 206, 202, 60, 20, 31, 101, 92, 225, 226, 9, 44, 140, 5, + 34, 97, 89, 151, 171, 129, 229, 216, 82, 139, 51, 99, 120, 24, 89, 225, 104, 185, 175, + 50, 246, 196, 82, 91, 32, 51, 62, 42, 96, 202, 47, 130, 44, 137, 26, 215, 10, 255, + 176, 93, 138, 227, 193, 3, 251, 27, 229, 100, 212, 149, 151, 202, 89, 233, 38, 122, 29, + 100, 164, 125, 46, 212, 0, 90, 93, 26, 50, 103, 25, 226, 197, 164, 198, 135, 168, 194, + 162, 141, 38, 119, 34, 190, 66, 124, 167, 104, 247, 197, 204, 156, 67, 251, 112, 67, 85, + 205, 93, 135, 53, 119, 106, 251, 28, 49, 130, 196, 243, 36, 82, 26, 155, 117, 216, 221, + 241, 128, 70, 233, 70, 18, 133, 137, 14, 245, 204, 99, 195, 42, 235, 248, 161, 86, 243, + 190, 135, 118, 130, 123, 154, 213, 150, 54, 74, 111, 20, 60, 240, 90, 37, 54, 109, 171, + 191, 123, 161, 140, 222, 100, 182, 202, 93, 88, 32, 80, 23, 168, 198, 153, 36, 97, 111, + 187, 151, 185, 43, 172, 245, 27, 6, 27, 82, 115, 199, 18, 239, 104, 158, 206, 205, 85, + 152, 42, 174, 185, 123, 197, 98, 65, 95, 135, 163, 206, 66, 59, 136, 109, 231, 125, 137, + 237, 153, 219, 97, 96, 237, 81, 201, 140, 31, 150, 226, 183, 192, 144, 113, 59, 86, 212, + 125, 182, 91, 33, 132, 158, 92, 12, 12, 68, 138, 149, 50, 36, 113, 147, 133, 95, 229, + 78, 235, 4, 228, 206, 188, 165, 95, 45, 225, 181, 1, 94, 107, 93, 128, 240, 251, 220, + 252, 7, 32, 135, 156, 83, 171, 14, 230, 48, 109, 203, 126, 89, 208, 99, 39, 140, 9, + 134, 185, 234, 60, 187, 73, 167, 24, 201, 152, 20, 166, 148, 27, 199, 28, 184, 26, 199, + 198, 0, 248, 52, 204, 119, 141, 157, 218, 181, 41, 227, 59, 227, 206, 119, 159, 23, 31, + 184, 224, 183, 204, 134, 76, 231, 77, 105, 160, 103, 48, 103, 104, 41, 155, 53, 160, 41, + 210, 123, 222, 252, 95, 26, 223, 45, 146, 126, 68, 177, 54, 37, 105, 3, 171, 182, 235, + 249, 31, 139, 97, 80, 243, 202, 121, 143, 0, 26, 184, 210, 149, 151, 207, 244, 177, 174, + 34, 67, 45, 102, 245, 100, 140, 95, 104, 55, 21, 83, 49, 53, 223, 147, 134, 210, 93, + 0, 97, 93, 26, 26, 48, 175, 178, 255, 164, 99, 174, 198, 167, 220, 45, 156, 64, 185, + 252, 168, 241, 18, 252, 35, 71, 219, 182, 205, 173, 19, 206, 15, 113, 232, 42, 161, 152, + 220, 160, 60, 64, 79, 3, 231, 43, 49, 132, 108, 235, 128, 21, 220, 146, 17, 255, 218, + 236, 182, 168, 154, 201, 118, 170, 58, 94, 212, 220, 246, 177, 125, 51, 241, 204, 55, 216, + 248, 104, 92, 100, 83, 221, 121, 48, 111, 138, 47, 73, 119, 230, 241, 17, 175, 103, 187, + 234, 198, 144, 199, 188, 65, 68, 240, 51, 17, 39, 11, 9, 143, 104, 109, 227, 70, 231, + 19, 181, 113, 66, 255, 233, 41, 241, 250, 217, 89, 182, 196, 31, 71, 139, 220, 137, 208, + 204, 188, 225, 243, 200, 234, 131, 48, 88, 102, 119, 63, 121, 44, 177, 188, 44, 154, 229, + 29, 149, 190, 118, 76, 130, 150, 147, 14, 114, 28, 222, 62, 217, 191, 50, 161, 170, 181, + 210, 2, 28, 73, 66, 149, 117, 243, 81, 162, 141, 55, 191, 35, 245, 54, 111, 120, 204, + 2, 134, 62, 31, 100, 125, 248, 36, 175, 153, 206, 101, 107, 209, 129, 181, 19, 22, 43, + 7, 104, 205, 149, 159, 140, 184, 149, 195, 39, 14, 143, 42, 148, 205, 73, 249, 74, 66, + 30, 250, 219, 237, 96, 71, 190, 225, 253, 210, 248, 40, 218, 96, 245, 111, 0, 130, 39, + 150, 69, 79, 165, 212, 122, 57, 162, 195, 51, 237, 6, 82, 231, 225, 63, 71, 41, 253, + 41, 38, 208, 33, 78, 170, 130, 68, 26, 131, 198, 66, 26, 12, 145, 191, 224, 11, 249, + 130, 207, 44, 112, 213, 126, 88, 183, 190, 160, 225, 187, 201, 8, 140, 235, 87, 55, 109, + 155, 81, 241, 98, 147, 11, 110, 37, 202, 79, 49, 195, 210, 0, 240, 66, 214, 110, 154, + 142, 44, 58, 111, 232, 4, 119, 117, 239, 207, 172, 93, 106, 254, 78, 205, 145, 89, 59, + 183, 35, 138, 232, 230, 92, 233, 214, 159, 191, 69, 58, 78, 114, 116, 189, 91, 121, 53, + 208, 104, 4, 125, 198, 111, 123, 20, 60, 13, 109, 120, 196, 145, 3, 172, 119, 95, 150, + 78, 255, 85, 147, 57, 163, 6, 174, 97, 97, 39, 151, 50, 144, 155, 175, 86, 11, 43, + 107, 71, 56, 216, 191, 253, 105, 194, 170, 225, 34, 64, 47, 34, 150, 195, 91, 58, 201, + 10, 155, 43, 49, 50, 93, 194, 206, 13, 25, 217, 56, 132, 33, 112, 92, 225, 109, 198, + 164, 23, 167, 199, 88, 215, 234, 238, 155, 69, 40, 100, 80, 196, 144, 129, 246, 237, 68, + 197, 250, 93, 159, 51, 225, 193, 163, 62, 163, 17, 4, 71, 41, 172, 15, 130, 132, 249, + 112, 31, 63, 152, 132, 143, 92, 20, 17, 83, 1, 86, 25, 252, 179, 185, 47, 149, 122, + 211, 211, 29, 229, 216, 101, 15, 133, 117, 145, 9, 111, 1, 40, 175, 154, 173, 62, 247, + 193, 80, 75, 194, 166, 100, 191, 90, 29, 239, 239, 152, 194, 195, 182, 168, 156, 27, 183, + 33, 145, 73, 43, 0, 75, 83, 175, 229, 0, 238, 221, 194, 63, 40, 133, 230, 140, 68, + 64, 170, 51, 48, 66, 246, 243, 248, 159, 144, 20, 87, 177, 165, 160, 220, 166, 235, 48, + 86, 209, 49, 68, 174, 243, 132, 214, 120, 106, 99, 189, 170, 13, 241, 219, 80, 232, 207, + 72, 135, 95, 92, 223, 16, 2, 127, 237, 169, 107, 29, 255, 61, 79, 68, 236, 67, 200, + 194, 188, 50, 38, 121, 221, 52, 107, 184, 132, 84, 136, 204, 219, 231, 41, 186, 248, 44, + 58, 229, 213, 166, 3, 212, 227, 82, 25, 207, 150, 225, 146, 82, 20, 185, 204, 242, 237, + 55, 170, 113, 139, 50, 62, 103, 26, 103, 34, 18, 148, 93, 247, 105, 3, 251, 62, 231, + 77, 87, 182, 227, 57, 73, 54, 77, 2, 2, 63, 239, 57, 234, 97, 197, 29, 159, 44, + 55, 7, 79, 74, 155, 172, 66, 5, 175, 61, 67, 150, 139, 155, 77, 111, 212, 151, 165, + 34, 153, 167, 98, 137, 225, 77, 234, 166, 107, 138, 211, 163, 145, 34, 237, 45, 206, 47, + 50, 126, 108, 117, 21, 248, 17, 98, 103, 230, 249, 12, 9, 147, 179, 107, 29, 149, 185, + 7, 59, 37, 146, 14, 200, 35, 49, 182, 80, 0, 230, 130, 126, 83, 248, 148, 75, 9, + 247, 178, 240, 240, 190, 249, 132, 114, 101, 161, 7, 30, 169, 67, 68, 59, 82, 12, 95, + 131, 195, 176, 131, 169, 51, 2, 252, 44, 150, 72, 54, 141, 250, 38, 126, 185, 31, 3, + 44, 132, 165, 52, 163, 78, 120, 231, 138, 202, 244, 234, 77, 183, 155, 209, 97, 207, 212, + 94, 251, 107, 166, 49, 249, 161, 88, 120, 91, 120, 123, 135, 253, 33, 188, 160, 112, 52, + 136, 250, 254, 125, 229, 76, 53, 128, 30, 150, 79, 243, 244, 75, 95, 155, 125, 88, 60, + 213, 209, 152, 78, 77, 32, 75, 110, 220, 236, 222, 17, 117, 217, 15, 242, 190, 92, 39, + 63, 123, 190, 143, 111, 178, 219, 206, 78, 88, 38, 138, 46, 247, 34, 124, 69, 66, 199, + 179, 31, 179, 145, 48, 41, 106, 64, 27, 41, 157, 67, 105, 24, 1, 249, 135, 179, 212, + 86, 1, 44, 124, 140, 91, 116, 175, 215, 185, 242, 159, 108, 17, 83, 254, 66, 124, 105, + 131, 151, 146, 32, 218, 252, 57, 219, 245, 193, 143, 201, 23, 145, 246, 148, 30, 82, 8, + 206, 41, 194, 192, 201, 47, 210, 28, 46, 20, 152, 151, 151, 48, 42, 184, 11, 38, 241, + 231, 28, 179, 119, 230, 202, 8, 220, 94, 39, 46, 103, 245, 88, 42, 181, 33, 90, 136, + 62, 136, 156, 214, 31, 52, 7, 74, 237, 19, 113, 223, 250, 141, 146, 113, 115, 92, 122, + 80, 187, 161, 126, 35, 150, 215, 78, 76, 249, 168, 212, 55, 48, 113, 14, 80, 166, 21, + 154, 147, 40, 12, 114, 35, 153, 5, 148, 12, 98, 15, 92, 29, 176, 219, 65, 71, 179, + 143, 147, 172, 56, 104, 227, 104, 218, 241, 185, 128, 7, 84, 20, 47, 96, 135, 82, 249, + 140, 231, 6, 238, 246, 99, 12, 167, 63, 77, 238, 242, 221, 130, 158, 21, 235, 129, 126, + 197, 114, 56, 69, 121, 140, 90, 169, 237, 225, 252, 231, 109, 228, 237, 91, 219, 81, 104, + 130, 144, 181, 113, 130, 147, 244, 32, 169, 223, 162, 39, 164, 21, 95, 234, 143, 236, 68, + 57, 217, 37, 53, 192, 147, 25, 174, 239, 245, 0, 87, 119, 144, 13, 232, 19, 160, 220, + 51, 73, 188, 214, 113, 96, 235, 209, 75, 122, 190, 144, 179, 151, 181, 233, 88, 73, 3, + 7, 56, 248, 7, 143, 112, 152, 156, 89, 171, 61, 53, 223, 135, 242, 181, 248, 83, 161, + 202, 158, 28, 136, 46, 208, 32, 228, 186, 121, 45, 189, 128, 102, 182, 136, 246, 38, 32, + 147, 127, 204, 208, 181, 171, 87, 167, 97, 80, 250, 2, 26, 153, 31, 163, 200, 239, 195, + 172, 169, 60, 218, 103, 188, 65, 30, 69, 55, 68, 102, 202, 196, 50, 154, 121, 221, 242, + 33, 63, 67, 28, 66, 93, 181, 97, 0, 126, 81, 196, 43, 251, 0, 5, 98, 189, 70, + 128, 3, 126, 197, 105, 72, 137, 155, 227, 3, 121, 214, 36, 184, 25, 65, 250, 118, 247, + 91, 119, 117, 173, 60, 160, 168, 60, 166, 10, 250, 237, 139, 253, 107, 80, 102, 180, 217, + 2, 151, 221, 123, 109, 1, 52, 134, 66, 46, 253, 57, 138, 117, 175, 55, 178, 79, 223, + 239, 245, 234, 233, 226, 117, 231, 78, 198, 78, 2, 159, 80, 154, 124, 204, 7, 126, 0, + 142, 193, 47, 140, 251, 185, 2, 170, 241, 180, 249, 208, 163, 239, 186, 141, 210, 48, 116, + 32, 246, 195, 34, 150, 19, 188, 19, 224, 196, 146, 224, 83, 83, 15, 224, 78, 201, 226, + 249, 186, 151, 243, 139, 58, 226, 70, 199, 181, 118, 60, 213, 109, 255, 248, 3, 19, 181, + 23, 243, 122, 169, 212, 205, 252, 228, 173, 75, 173, 144, 68, 104, 39, 55, 243, 98, 26, + 57, 41, 207, 175, 102, 165, 29, 102, 158, 32, 121, 83, 56, 109, 205, 225, 66, 155, 222, + 38, 73, 42, 212, 218, 110, 60, 1, 166, 48, 99, 193, 105, 141, 145, 25, 244, 54, 54, + 90, 213, 87, 212, 40, 143, 66, 246, 112, 132, 146, 79, 171, 220, 121, 128, 182, 232, 189, + 184, 143, 237, 27, 80, 86, 169, 226, 112, 158, 25, 166, 248, 238, 253, 204, 23, 141, 15, + 13, 254, 147, 160, 77, 63, 124, 199, 191, 50, 175, 124, 234, 62, 105, 6, 143, 192, 176, + 113, 48, 78, 139, 215, 71, 121, 213, 20, 144, 98, 35, 158, 96, 183, 62, 174, 246, 187, + 117, 182, 237, 37, 50, 216, 99, 156, 223, 243, 93, 143, 101, 142, 222, 240, 101, 37, 106, + 58, 57, 250, 157, 93, 153, 254, 20, 216, 172, 10, 147, 34, 192, 129, 71, 243, 90, 171, + 144, 57, 159, 238, 201, 4, 124, 167, 244, 225, 205, 95, 28, 7, 89, 185, 100, 243, 184, + 121, 203, 100, 131, 95, 135, 68, 224, 207, 56, 58, 122, 201, 115, 25, 183, 61, 30, 51, + 229, 18, 21, 178, 113, 49, 186, 203, 235, 31, 191, 163, 152, 138, 8, 28, 233, 143, 97, + 202, 95, 153, 4, 217, 98, 120, 243, 26, 182, 17, 77, 155, 36, 99, 78, 150, 149, 8, + 98, 128, 39, 33, 36, 192, 172, 45, 220, 149, 189, 61, 96, 28, 215, 100, 246, 58, 221, + 233, 84, 147, 251, 162, 47, 31, 5, 125, 181, 154, 134, 23, 27, 174, 57, 64, 110, 229, + 109, 75, 123, 43, 136, 219, 71, 95, 64, 61, 154, 29, 39, 238, 177, 34, 145, 225, 65, + 150, 94, 247, 49, 229, 15, 77, 147, 72, 141, 2, 45, 251, 77, 169, 38, 213, 132, 110, + 53, 196, 172, 207, 226, 212, 190, 148, 246, 79, 117, 56, 230, 212, 48, 23, 185, 63, 100, + 76, 136, 242, 78, 181, 237, 156, 95, 20, 113, 227, 131, 167, 168, 47, 119, 139, 3, 53, + 31, 250, 133, 149, 50, 107, 105, 99, 130, 34, 162, 231, 111, 42, 217, 190, 224, 199, 90, + 63, 220, 204, 35, 95, 115, 203, 143, 234, 86, 147, 32, 118, 141, 165, 11, 192, 16, 117, + 35, 147, 152, 198, 123, 7, 240, 84, 198, 209, 28, 33, 17, 248, 237, 52, 88, 97, 255, + 231, 76, 86, 122, 109, 204, 8, 18, 216, 201, 35, 77, 237, 183, 229, 179, 50, 237, 164, + 135, 179, 118, 164, 213, 135, 157, 195, 187, 245, 36, 187, 220, 113, 18, 87, 222, 222, 96, + 241, 183, 42, 21, 4, 23, 205, 233, 203, 0, 214, 112, 136, 138, 230, 44, 95, 110, 201, + 34, 41, 191, 71, 229, 155, 185, 247, 243, 151, 214, 84, 137, 141, 126, 159, 146, 149, 108, + 124, 97, 109, 82, 209, 245, 221, 183, 34, 60, 37, 236, 95, 79, 171, 167, 53, 71, 96, + 45, 58, 248, 3, 142, 129, 145, 12, 33, 36, 162, 142, 160, 3, 251, 243, 213, 240, 208, + 141, 19, 13, 178, 255, 109, 2, 170, 20, 55, 241, 116, 101, 44, 108, 105, 186, 238, 251, + 199, 15, 31, 106, 157, 191, 110, 152, 178, 67, 137, 131, 208, 156, 144, 131, 155, 253, 134, + 70, 18, 190, 55, 134, 35, 99, 243, 140, 30, 225, 135, 230, 240, 166, 81, 142, 102, 191, + 39, 25, 3, 177, 156, 211, 77, 45, 87, 233, 43, 221, 48, 61, 155, 103, 195, 191, 203, + 182, 75, 233, 152, 211, 208, 136, 121, 33, 23, 224, 224, 62, 249, 227, 239, 149, 183, 61, + 195, 15, 39, 238, 236, 87, 43, 136, 191, 239, 71, 138, 166, 147, 116, 62, 102, 68, 199, + 224, 101, 223, 193, 70, 29, 186, 42, 13, 80, 225, 75, 19, 241, 115, 1, 221, 202, 45, + 102, 137, 29, 174, 20, 195, 66, 136, 2, 168, 205, 201, 137, 50, 168, 74, 121, 198, 4, + 163, 212, 85, 133, 31, 105, 118, 146, 106, 84, 93, 152, 187, 231, 181, 105, 251, 121, 171, + 132, 123, 84, 81, 69, 221, 132, 238, 40, 253, 181, 45, 161, 137, 130, 39, 169, 235, 158, + 59, 86, 242, 153, 239, 173, 128, 165, 23, 123, 30, 195, 0, 154, 23, 81, 224, 245, 214, + 206, 30, 212, 131, 75, 117, 12, 206, 157, 181, 186, 59, 241, 17, 45, 138, 0, 219, 11, + 165, 243, 135, 196, 182, 135, 95, 205, 217, 63, 195, 175, 14, 225, 131, 145, 45, 249, 158, + 251, 150, 84, 182, 209, 70, 199, 255, 209, 199, 219, 220, 109, 206, 99, 50, 132, 234, 146, + 82, 195, 209, 22, 114, 223, 247, 246, 113, 37, 239, 16, 33, 134, 100, 215, 88, 170, 158, + 87, 123, 102, 50, 88, 211, 1, 187, 6, 134, 165, 152, 216, 105, 106, 239, 220, 74, 231, + 210, 187, 12, 194, 204, 45, 72, 49, 4, 160, 219, 162, 248, 87, 8, 43, 176, 220, 44, + 107, 227, 178, 17, 124, 139, 122, 230, 122, 87, 48, 97, 42, 236, 110, 236, 185, 155, 53, + 234, 159, 214, 198, 66, 206, 30, 75, 249, 206, 40, 38, 57, 11, 217, 74, 136, 100, 197, + 110, 223, 29, 159, 65, 71, 140, 175, 51, 69, 74, 105, 48, 234, 63, 246, 45, 13, 20, + 121, 7, 226, 161, 46, 28, 173, 7, 103, 53, 108, 45, 164, 76, 74, 68, 141, 145, 208, + 61, 197, 22, 136, 46, 70, 115, 110, 60, 161, 124, 81, 26, 132, 51, 188, 178, 79, 106, + 186, 183, 160, 39, 228, 68, 115, 46, 136, 1, 192, 89, 62, 133, 112, 198, 180, 182, 58, + 34, 243, 219, 158, 69, 245, 34, 120, 178, 213, 200, 28, 143, 128, 188, 182, 100, 1, 41, + 146, 137, 43, 82, 227, 105, 216, 83, 48, 140, 10, 106, 175, 254, 70, 77, 67, 59, 112, + 188, 237, 69, 133, 10, 212, 5, 198, 138, 105, 199, 180, 252, 81, 223, 79, 53, 73, 39, + 137, 121, 180, 148, 228, 99, 146, 42, 177, 214, 102, 33, 147, 84, 102, 25, 94, 59, 31, + 37, 197, 137, 237, 122, 133, 63, 90, 213, 116, 163, 253, 253, 29, 177, 145, 2, 21, 36, + 45, 198, 251, 147, 231, 143, 232, 78, 168, 71, 137, 199, 108, 79, 80, 90, 201, 214, 153, + 35, 172, 13, 199, 169, 11, 228, 91, 157, 231, 112, 193, 20, 54, 189, 167, 30, 77, 144, + 108, 245, 215, 246, 189, 68, 69, 14, 158, 14, 228, 55, 50, 145, 69, 249, 58, 80, 222, + 149, 237, 198, 5, 175, 218, 60, 109, 130, 91, 186, 18, 200, 175, 234, 190, 109, 46, 3, + 123, 204, 18, 96, 4, 68, 241, 73, 62, 44, 154, 29, 193, 136, 227, 199, 55, 189, 4, + 164, 64, 95, 95, 82, 39, 15, 60, 230, 124, 107, 233, 248, 55, 251, 89, 60, 63, 75, + 134, 126, 119, 32, 156, 57, 168, 127, 0, 224, 61, 5, 133, 125, 100, 228, 208, 140, 243, + 12, 114, 111, 119, 92, 104, 175, 87, 193, 236, 151, 13, 114, 21, 132, 146, 177, 189, 59, + 49, 190, 27, 110, 195, 160, 236, 40, 132, 188, 181, 120, 201, 40, 232, 65, 132, 80, 241, + 220, 18, 221, 115, 31, 79, 137, 164, 226, 58, 98, 29, 108, 32, 57, 219, 228, 218, 199, + 13, 95, 132, 195, 215, 77, 235, 191, 143, 112, 16, 128, 76, 35, 93, 191, 66, 173, 73, + 231, 143, 132, 73, 173, 240, 106, 231, 203, 78, 193, 147, 92, 33, 23, 31, 248, 100, 11, + 184, 243, 123, 201, 115, 200, 236, 209, 135, 47, 126, 209, 22, 14, 85, 95, 188, 69, 202, + 163, 17, 24, 101, 164, 117, 134, 187, 148, 127, 31, 159, 55, 19, 27, 1, 135, 227, 237, + 89, 107, 28, 216, 60, 51, 230, 145, 147, 163, 215, 93, 70, 232, 118, 172, 140, 235, 50, + 71, 128, 177, 103, 32, 233, 123, 60, 234, 2, 31, 216, 91, 139, 244, 52, 200, 40, 26, + 90, 188, 189, 49, 25, 4, 25, 144, 176, 166, 124, 227, 237, 252, 148, 85, 29, 125, 208, + 89, 104, 210, 121, 64, 46, 4, 53, 99, 204, 93, 125, 38, 25, 59, 88, 51, 64, 113, + 195, 241, 23, 64, 212, 5, 60, 104, 90, 90, 230, 42, 179, 78, 253, 44, 143, 44, 49, + 196, 143, 254, 34, 13, 36, 60, 73, 125, 112, 137, 239, 52, 122, 7, 116, 79, 12, 177, + 183, 103, 11, 158, 146, 190, 237, 143, 235, 124, 188, 28, 65, 76, 26, 100, 89, 63, 160, + 163, 188, 17, 44, 172, 69, 167, 179, 185, 246, 191, 107, 174, 38, 118, 76, 184, 53, 58, + 72, 32, 182, 5, 61, 248, 81, 88, 92, 170, 152, 253, 77, 84, 14, 122, 1, 83, 34, + 180, 13, 25, 115, 120, 199, 154, 238, 20, 83, 36, 79, 155, 68, 5, 160, 130, 254, 242, + 218, 90, 156, 114, 87, 234, 199, 101, 101, 200, 185, 135, 124, 198, 160, 240, 62, 104, 138, + 45, 125, 222, 81, 204, 122, 150, 210, 26, 24, 208, 12, 242, 42, 169, 101, 130, 148, 44, + 232, 249, 245, 161, 128, 113, 103, 33, 98, 166, 137, 236, 212, 7, 202, 38, 211, 69, 188, + 165, 95, 212, 118, 108, 199, 161, 22, 45, 35, 170, 90, 11, 163, 79, 173, 36, 193, 20, + 69, 35, 187, 207, 16, 144, 214, 219, 182, 170, 32, 114, 79, 128, 71, 198, 237, 15, 103, + 4, 60, 139, 175, 150, 151, 82, 230, 68, 119, 168, 89, 188, 204, 20, 140, 220, 165, 98, + 184, 91, 12, 217, 205, 92, 90, 20, 35, 71, 36, 138, 76, 96, 22, 251, 247, 173, 78, + 222, 241, 197, 134, 75, 130, 83, 96, 14, 47, 5, 113, 232, 96, 126, 193, 45, 218, 28, + 66, 253, 99, 103, 136, 176, 200, 158, 171, 191, 76, 249, 158, 62, 190, 37, 137, 65, 120, + 233, 80, 168, 238, 193, 145, 79, 63, 82, 125, 26, 111, 191, 24, 210, 39, 161, 131, 239, + 64, 46, 175, 140, 39, 77, 202, 230, 115, 84, 40, 235, 62, 120, 148, 45, 57, 37, 124, + 121, 120, 249, 148, 231, 185, 172, 186, 224, 77, 61, 207, 141, 107, 126, 26, 147, 204, 229, + 121, 63, 58, 161, 43, 120, 25, 191, 165, 83, 228, 34, 205, 92, 27, 97, 67, 213, 13, + 253, 182, 91, 59, 133, 233, 166, 4, 4, 57, 209, 233, 179, 16, 35, 85, 59, 155, 111, + 250, 65, 194, 223, 99, 144, 59, 127, 241, 127, 85, 255, 125, 11, 90, 184, 145, 68, 95, + 150, 72, 153, 103, 49, 76, 120, 85, 161, 179, 241, 16, 174, 51, 211, 142, 150, 99, 201, + 22, 85, 73, 108, 84, 199, 120, 175, 128, 9, 243, 223, 160, 59, 120, 8, 109, 197, 128, + 194, 103, 52, 180, 119, 227, 231, 75, 113, 126, 175, 59, 148, 4, 132, 1, 89, 75, 121, + 8, 204, 131, 251, 171, 36, 55, 36, 44, 165, 233, 172, 103, 80, 224, 28, 200, 195, 3, + 20, 53, 129, 195, 112, 22, 200, 244, 23, 34, 64, 145, 42, 12, 20, 38, 184, 56, 94, + 220, 101, 3, 198, 17, 107, 22, 242, 135, 222, 182, 138, 243, 235, 11, 182, 91, 34, 127, + 80, 58, 161, 145, 203, 204, 158, 224, 242, 86, 24, 81, 51, 126, 84, 249, 143, 191, 15, + 130, 70, 238, 57, 209, 225, 36, 221, 152, 128, 255, 24, 208, 57, 186, 97, 4, 134, 255, + 229, 121, 86, 254, 202, 137, 124, 31, 130, 12, 222, 146, 142, 37, 129, 199, 247, 98, 236, + 212, 251, 108, 211, 20, 60, 13, 206, 158, 18, 84}; + +SimpleConvolution::SimpleConvolution() { + width_ = 64; + height_ = 64; + mask_width_ = 3; + mask_height_ = mask_width_; + randomize_seed_ = 0; + + if (!IsPowerOf2(width_)) { + width_ = RoundToPowerOf2(width_); + } + + if (!IsPowerOf2(height_)) { + height_ = RoundToPowerOf2(height_); + } + + if (!(mask_width_ % 2)) { + mask_width_++; + } + + if (!(mask_height_ % 2)) { + mask_height_++; + } + + if (width_ * height_ < 256) { + width_ = 64; + height_ = 64; + } + + const uint32_t input_size_bytes = width_ * height_ * sizeof(uint32_t); + const uint32_t mask_size_bytes = mask_width_ * mask_height_ * sizeof(float); + + SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, sizeof(kernel_args_t)); + SetInDescr(INPUT_BUF_ID, SYS_DES_ID, input_size_bytes); + SetInDescr(MASK_BUF_ID, SYS_DES_ID, mask_size_bytes); + SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, input_size_bytes); + SetHostDescr(REFOUT_BUF_ID, REFOUT_DES_ID, input_size_bytes); + + if (!randomize_seed_) TEST_ASSERT(sizeof(input_data_) <= input_size_bytes); +} + +void SimpleConvolution::Init() { + std::clog << "SimpleConvolution::init :" << std::endl; + + mem_descr_t kernarg_des = GetDescr(KERNARG_BUF_ID); + mem_descr_t input_des = GetDescr(INPUT_BUF_ID); + mem_descr_t mask_des = GetDescr(MASK_BUF_ID); + mem_descr_t output_des = GetDescr(LOCAL_BUF_ID); +#if 0 + printf("kernarg_des %p 0x%x\n", kernarg_des.ptr, kernarg_des.size); + printf("input_des %p 0x%x\n", input_des.ptr, input_des.size); + printf("mask_des %p 0x%x\n", mask_des.ptr, mask_des.size); + printf("output_des %p 0x%x\n", output_des.ptr, output_des.size); +#endif + uint32_t* input = reinterpret_cast(input_des.ptr); + uint32_t* output_local = reinterpret_cast(output_des.ptr); + float* mask = reinterpret_cast(mask_des.ptr); + kernel_args_t* kernel_args = reinterpret_cast(kernarg_des.ptr); + + if (randomize_seed_) { + // random initialisation of input + FillRandom(input, width_, height_, 0, 255, randomize_seed_); + } else { + // initialization with preset values + memcpy(input, input_data_, width_ * height_ * sizeof(uint32_t)); + } + + // Fill a blurr filter or some other filter of your choice + const float val = 1.0f / (mask_width_ * 2.0f - 1.0f); + for (uint32_t i = 0; i < (mask_width_ * mask_height_); i++) { + mask[i] = 0; + } + for (uint32_t i = 0; i < mask_width_; i++) { + uint32_t y = mask_height_ / 2; + mask[y * mask_width_ + i] = val; + } + for (uint32_t i = 0; i < mask_height_; i++) { + uint32_t x = mask_width_ / 2; + mask[i * mask_width_ + x] = val; + } + + // Print the INPUT array. + std::clog << std::dec; + PrintArray("> Input[0]", input, width_, 1); + PrintArray("> Mask", mask, mask_width_, mask_height_); + + // Fill the kernel args + kernel_args->arg1 = output_local; + kernel_args->arg2 = input; + kernel_args->arg3 = mask; + kernel_args->arg4 = width_; + kernel_args->arg41 = height_; + kernel_args->arg5 = mask_width_; + kernel_args->arg51 = mask_height_; + + // Calculate the reference output + ReferenceImplementation(reinterpret_cast(GetRefOut()), input, mask, width_, height_, + mask_width_, mask_height_); +} + +void SimpleConvolution::PrintOutput(const void* ptr) const { + PrintArray("> Output[0]", reinterpret_cast(ptr), width_, 1); +} + +bool SimpleConvolution::ReferenceImplementation(uint32_t* output, const uint32_t* input, + const float* mask, const uint32_t width, + const uint32_t height, const uint32_t mask_width, + const uint32_t mask_height) { + const uint32_t vstep = (mask_width - 1) / 2; + const uint32_t hstep = (mask_height - 1) / 2; + + // for each pixel in the input + for (uint32_t x = 0; x < width; x++) { + for (uint32_t y = 0; y < height; y++) { + // find the left, right, top and bottom indices such that + // the indices do not go beyond image boundaires + const uint32_t left = (x < vstep) ? 0 : (x - vstep); + const uint32_t right = ((x + vstep) >= width) ? width - 1 : (x + vstep); + const uint32_t top = (y < hstep) ? 0 : (y - hstep); + const uint32_t bottom = ((y + hstep) >= height) ? height - 1 : (y + hstep); + + // initializing wighted sum value + float sum_fx = 0; + for (uint32_t i = left; i <= right; ++i) { + for (uint32_t j = top; j <= bottom; ++j) { + // performing wighted sum within the mask boundaries + uint32_t mask_idx = (j - (y - hstep)) * mask_width + (i - (x - vstep)); + uint32_t index = j * width + i; + + // to round to the nearest integer + sum_fx += ((float)input[index] * mask[mask_idx]); + } + } + sum_fx += 0.5f; + output[y * width + x] = uint32_t(sum_fx); + } + } + + return true; +} diff --git a/test/hsa/test/simple_convolution/simple_convolution.h b/test/hsa/test/simple_convolution/simple_convolution.h new file mode 100644 index 00000000..550d1320 --- /dev/null +++ b/test/hsa/test/simple_convolution/simple_convolution.h @@ -0,0 +1,94 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ +#define TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ + +#include +#include + +#include "ctrl/test_kernel.h" + +// Class implements SimpleConvolution kernel parameters +class SimpleConvolution : public TestKernel { + public: + // Kernel buffers IDs + enum { INPUT_BUF_ID, LOCAL_BUF_ID, MASK_BUF_ID, KERNARG_BUF_ID, REFOUT_BUF_ID }; + + // Constructor + SimpleConvolution(); + + // Initialize method + void Init(); + + // Return compute grid size + uint32_t GetGridSize() const { return width_ * height_; } + + // Print output + void PrintOutput(const void* ptr) const; + + // Return name + std::string Name() const { return std::string("SimpleConvolution"); } + + private: + // Local kernel arguments declaration + struct kernel_args_t { + void* arg1; + void* arg2; + void* arg3; + uint32_t arg4; + uint32_t arg41; + uint32_t arg5; + uint32_t arg51; + }; + + // Reference CPU implementation of Simple Convolution + // @param output Output matrix after performing convolution + // @param input Input matrix on which convolution is to be performed + // @param mask mask matrix using which convolution was to be performed + // @param input_dimensions dimensions of the input matrix + // @param mask_dimensions dimensions of the mask matrix + // @return bool true on success and false on failure + bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask, + const uint32_t width, const uint32_t height, + const uint32_t maskWidth, const uint32_t maskHeight); + + // Width of the Input array + uint32_t width_; + + // Height of the Input array + uint32_t height_; + + // Mask dimensions + uint32_t mask_width_; + + // Mask dimensions + uint32_t mask_height_; + + // Randomize input data + unsigned randomize_seed_; + + // Input data + static const uint32_t input_data_[]; +}; + +#endif // TEST_SIMPLE_CONVOLUTION_SIMPLE_CONVOLUTION_H_ diff --git a/test/hsa/test/util/evt_stats.h b/test/hsa/test/util/evt_stats.h new file mode 100644 index 00000000..01bc1317 --- /dev/null +++ b/test/hsa/test/util/evt_stats.h @@ -0,0 +1,98 @@ +#ifndef EVT_STATS_H_ +#define EVT_STATS_H_ + +#include + +#include +#include +#include +#include + +template +class EvtStatsT { + public: + typedef std::mutex mutex_t; + typedef uint64_t evt_count_t; + typedef double evt_avr_t; + struct evt_record_t { + uint64_t count; + evt_avr_t avr; + evt_record_t() : count(0), avr(0) {} + }; + typedef typename std::map map_t; + typedef typename std::map labels_t; + + // Comparison function + struct cmpfun { + template bool operator()(const T& a, const T& b) const { + return (a.second.avr != b.second.avr) ? a.second.avr < b.second.avr : a.first < b.first; + } + }; + + inline void add_event(evt_id_t id, evt_weight_t weight) { + std::lock_guard lck(mutex_); + //printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout); + + evt_record_t& rec = map_[id]; + const evt_count_t prev_count = rec.count; + const evt_count_t new_count = prev_count + 1; + const evt_avr_t prev_avr = rec.avr; + const evt_avr_t new_avr = ((prev_avr * prev_count) + weight) / new_count; + + rec.count = new_count; + rec.avr = new_avr; + } + + void dump() { + std::lock_guard lck(mutex_); + fprintf(stdout, "Dumping %s\n", path_); fflush(stdout); + + typedef typename std::set, cmpfun> set_t; + set_t s_(map_.begin(), map_.end()); + + uint64_t index = 0; + for (auto& e : s_) { + const evt_id_t id = e.first; + const char* label = get_label(id); + std::ostringstream oss; + oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) << "," << (uint64_t)(e.second.count * e.second.avr); + fprintf(fdes_, "%s\n", oss.str().c_str()); + index += 1; + } + + fclose(fdes_); + } + + const char* get_label(const uint32_t& id) { + auto ret = labels_.insert({id, NULL}); + const char* label = ret.first->second; + return label; + } + const char* get_label(const char* id) { + return id; + } + const char* get_label(const std::string& id) { + return id.c_str(); + } + + void set_label(evt_id_t id, const char* label) { + //printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout); + labels_[id] = label; + } + + EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) { + //printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout); + fprintf(fdes_, "Index,Name,Count,Avr,Total\n"); + } + + private: + mutex_t mutex_; + map_t map_; + labels_t labels_; + FILE* fdes_; + const char* path_; +}; + +typedef EvtStatsT EvtStats; + +#endif // EVT_STATS_H_ diff --git a/test/hsa/test/util/helper_funcs.h b/test/hsa/test/util/helper_funcs.h new file mode 100644 index 00000000..c76854ba --- /dev/null +++ b/test/hsa/test/util/helper_funcs.h @@ -0,0 +1,86 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_UTIL_HELPER_FUNCS_H_ +#define TEST_UTIL_HELPER_FUNCS_H_ + +#include +#include +#include +#include +#include + +static inline void Error(std::string error_msg) { + std::cerr << "Error: " << error_msg << std::endl; +} + +template +void PrintArray(const std::string header, const T* data, const int width, const int height) { + std::clog << header << " :\n"; + for (int i = 0; i < height; i++) { + std::clog << "> "; + for (int j = 0; j < width; j++) { + std::clog << data[i * width + j] << " "; + } + std::clog << "\n"; + } +} + +template +bool FillRandom(T* array_ptr, const int width, const int height, const T range_min, + const T range_max, unsigned int seed = 123) { + if (!array_ptr) { + Error("Cannot fill array. NULL pointer."); + return false; + } + + if (!seed) seed = (unsigned int)time(NULL); + + srand(seed); + double range = double(range_max - range_min) + 1.0; + + /* random initialisation of input */ + for (int i = 0; i < height; i++) + for (int j = 0; j < width; j++) { + int index = i * width + j; + array_ptr[index] = range_min + T(range * rand() / (RAND_MAX + 1.0)); + } + + return true; +} + +template T RoundToPowerOf2(T val) { + int bytes = sizeof(T); + + val--; + for (int i = 0; i < bytes; i++) val |= val >> (1 << i); + val++; + + return val; +} + +template bool IsPowerOf2(T val) { + long long long_val = val; + return (((long_val & (-long_val)) - long_val == 0) && (long_val != 0)); +} + +#endif // TEST_UTIL_HELPER_FUNCS_H_ diff --git a/test/hsa/test/util/hsa_rsrc_factory.cpp b/test/hsa/test/util/hsa_rsrc_factory.cpp new file mode 120000 index 00000000..f3726ccf --- /dev/null +++ b/test/hsa/test/util/hsa_rsrc_factory.cpp @@ -0,0 +1 @@ +../../src/hsa_rsrc_factory.cpp \ No newline at end of file diff --git a/test/hsa/test/util/hsa_rsrc_factory.h b/test/hsa/test/util/hsa_rsrc_factory.h new file mode 120000 index 00000000..64af96f1 --- /dev/null +++ b/test/hsa/test/util/hsa_rsrc_factory.h @@ -0,0 +1 @@ +../../src/hsa_rsrc_factory.h \ No newline at end of file diff --git a/test/hsa/test/util/perf_timer.cpp b/test/hsa/test/util/perf_timer.cpp new file mode 100644 index 00000000..85c490b6 --- /dev/null +++ b/test/hsa/test/util/perf_timer.cpp @@ -0,0 +1,179 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#include "util/perf_timer.h" + +PerfTimer::PerfTimer() { freq_in_100mhz_ = MeasureTSCFreqHz(); } + +PerfTimer::~PerfTimer() { + while (!timers_.empty()) { + Timer* temp = timers_.back(); + timers_.pop_back(); + delete temp; + } +} + +// New cretaed timer instantance index will be returned +int PerfTimer::CreateTimer() { + Timer* newTimer = new Timer; + newTimer->start = 0; + newTimer->clocks = 0; + +#ifdef _WIN32 + QueryPerformanceFrequency((LARGE_INTEGER*)&newTimer->freq); +#else + newTimer->freq = (long long)1.0E3; +#endif + + /* Push back the address of new Timer instance created */ + timers_.push_back(newTimer); + return (int)(timers_.size() - 1); +} + +int PerfTimer::StartTimer(int index) { + if (index >= (int)timers_.size()) { + Error("Cannot reset timer. Invalid handle."); + return FAILURE; + } + +#ifdef _WIN32 +// General Windows timing method +#ifndef _AMD + long long tmpStart; + QueryPerformanceCounter((LARGE_INTEGER*)&(tmpStart)); + timers_[index]->start = (double)tmpStart; +#else +// AMD Windows timing method +#endif +#else +// General Linux timing method +#ifndef _AMD + struct timeval s; + gettimeofday(&s, 0); + timers_[index]->start = s.tv_sec * 1.0E3 + ((double)(s.tv_usec / 1.0E3)); +#else + // AMD timing method + unsigned int unused; + timers_[index]->start = __rdtscp(&unused); +#endif +#endif + + return SUCCESS; +} + + +int PerfTimer::StopTimer(int index) { + double n = 0; + if (index >= (int)timers_.size()) { + Error("Cannot reset timer. Invalid handle."); + return FAILURE; + } +#ifdef _WIN32 +#ifndef _AMD + long long n1; + QueryPerformanceCounter((LARGE_INTEGER*)&(n1)); + n = (double)n1; +#else +// AMD Window Timing +#endif + +#else +// General Linux timing method +#ifndef _AMD + struct timeval s; + gettimeofday(&s, 0); + n = s.tv_sec * 1.0E3 + (double)(s.tv_usec / 1.0E3); +#else + // AMD Linux timing + unsigned int unused; + n = __rdtscp(&unused); +#endif +#endif + + n -= timers_[index]->start; + timers_[index]->start = 0; + +#ifndef _AMD + timers_[index]->clocks += n; +#else + // timers_[index]->clocks += 10 * n / freq_in_100mhz_; // unit is ns + timers_[index]->clocks += 1.0E-6 * 10 * n / freq_in_100mhz_; // convert to ms +#endif + + return SUCCESS; +} + +void PerfTimer::Error(std::string str) { std::cout << str << std::endl; } + + +double PerfTimer::ReadTimer(int index) { + if (index >= (int)timers_.size()) { + Error("Cannot read timer. Invalid handle."); + return FAILURE; + } + + double reading = double(timers_[index]->clocks); + + reading = double(reading / timers_[index]->freq); + + return reading; +} + + +uint64_t PerfTimer::CoarseTimestampUs() { +#ifdef _WIN32 + uint64_t freqHz, ticks; + QueryPerformanceFrequency((LARGE_INTEGER*)&freqHz); + QueryPerformanceCounter((LARGE_INTEGER*)&ticks); + + // Scale numerator and divisor until (ticks * 1000000) fits in uint64_t. + while (ticks > (1ULL << 44)) { + ticks /= 16; + freqHz /= 16; + } + + return (ticks * 1000000) / freqHz; +#else + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000; +#endif +} + +uint64_t PerfTimer::MeasureTSCFreqHz() { + // Make a coarse interval measurement of TSC ticks for 1 gigacycles. + unsigned int unused; + uint64_t tscTicksEnd; + + uint64_t coarseBeginUs = CoarseTimestampUs(); + uint64_t tscTicksBegin = __rdtscp(&unused); + do { + tscTicksEnd = __rdtscp(&unused); + } while (tscTicksEnd - tscTicksBegin < 1000000000); + + uint64_t coarseEndUs = CoarseTimestampUs(); + + // Compute the TSC frequency and round to nearest 100MHz. + uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000; + uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin; + return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs; +} diff --git a/test/hsa/test/util/perf_timer.h b/test/hsa/test/util/perf_timer.h new file mode 100644 index 00000000..bfd55324 --- /dev/null +++ b/test/hsa/test/util/perf_timer.h @@ -0,0 +1,83 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_UTIL_PERF_TIMER_H_ +#define TEST_UTIL_PERF_TIMER_H_ + +// Will use AMD timer or general Linux timer based on compilation flag +// Need to consider platform is Windows or Linux + +#include +#include +#include + +#if defined(_MSC_VER) +#include +#include +#include +#else +#if defined(__GNUC__) +#include +#include +#endif // __GNUC__ +#endif // _MSC_VER + +#include +#include +#include + +class PerfTimer { + public: + enum { SUCCESS = 0, FAILURE = 1 }; + + PerfTimer(); + ~PerfTimer(); + + // General Linux timing method + int CreateTimer(); + int StartTimer(int index); + int StopTimer(int index); + + // retrieve time + double ReadTimer(int index); + // write into a file + double WriteTimer(int index); + + private: + struct Timer { + std::string name; /* name of time object */ + long long freq; /* frequency */ + double clocks; /* number of ticks at end */ + double start; /* start point ticks */ + }; + + std::vector timers_; /* vector to Timer objects */ + double freq_in_100mhz_; + + // AMD timing method + uint64_t CoarseTimestampUs(); + uint64_t MeasureTSCFreqHz(); + + void Error(std::string str); +}; + +#endif // TEST_UTIL_PERF_TIMER_H_ diff --git a/test/hsa/test/util/test_assert.h b/test/hsa/test/util/test_assert.h new file mode 100644 index 00000000..7803865d --- /dev/null +++ b/test/hsa/test/util/test_assert.h @@ -0,0 +1,35 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_UTIL_TEST_ASSERT_H_ +#define TEST_UTIL_TEST_ASSERT_H_ + +#define TEST_ASSERT(cond) \ + { \ + if (!(cond)) { \ + std::cerr << "Assert failed(" << #cond << ") at " << __FILE__ << ", line " << __LINE__ \ + << std::endl; \ + exit(-1); \ + } \ + } + +#endif // TEST_UTIL_TEST_ASSERT_H_ diff --git a/test/hsa/test/util/xml.h b/test/hsa/test/util/xml.h new file mode 100644 index 00000000..eb2f5074 --- /dev/null +++ b/test/hsa/test/util/xml.h @@ -0,0 +1,457 @@ +/****************************************************************************** +Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*******************************************************************************/ + +#ifndef TEST_UTIL_XML_H_ +#define TEST_UTIL_XML_H_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace xml { + +class Xml { + public: + typedef std::vector token_t; + + struct level_t; + typedef std::vector nodes_t; + typedef std::map opts_t; + struct level_t { + std::string tag; + nodes_t nodes; + opts_t opts; + }; + typedef std::vector nodes_vec_t; + typedef std::map map_t; + + enum { DECL_STATE, BODY_STATE }; + + static Xml* Create(const std::string& file_name, const Xml* obj = NULL) { + Xml* xml = new Xml(file_name, obj); + if (xml != NULL) { + if (xml->Init() == false) { + delete xml; + xml = NULL; + } else { + const std::size_t pos = file_name.rfind('/'); + const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : ""; + + xml->PreProcess(); + nodes_t incl_nodes; + for (auto* node : xml->GetNodes("top.include")) { + if (node->opts.find("touch") == node->opts.end()) { + node->opts["touch"] = ""; + incl_nodes.push_back(node); + } + } + for (auto* incl : incl_nodes) { + const std::string& incl_name = path + incl->opts["file"]; + Xml* ixml = Create(incl_name, xml); + if (ixml == NULL) { + delete xml; + xml = NULL; + break; + } else { + delete ixml; + } + } + if (xml) { + xml->Process(); + } + } + } + + return xml; + } + + static void Destroy(Xml* xml) { delete xml; } + + std::string GetName() { return file_name_; } + + void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) { + const std::size_t pos = full_tag.rfind('.'); + const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1; + const std::string level_tag = full_tag.substr(pos1); + level_t* level = new level_t; + (*map_)[full_tag].push_back(level); + level->tag = level_tag; + level->opts["name"] = name; + level->opts["expr"] = expr; + } + + void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) { + std::ostringstream oss; + oss << val; + AddExpr(full_tag, name, oss.str()); + } + + nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; } + + template F ForEach(const F& f_i) { + F f = f_i; + if (map_) { + for (auto& entry : *map_) { + for (auto node : entry.second) { + if (f.fun(entry.first, node) == false) break; + } + } + } + return f; + } + + template F ForEach(const F& f_i) const { + F f = f_i; + if (map_) { + for (auto& entry : *map_) { + for (auto node : entry.second) { + if (f.fun(entry.first, node) == false) break; + } + } + } + return f; + } + + struct print_func { + bool fun(const std::string& global_tag, level_t* node) { + for (auto& opt : node->opts) { + std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl; + } + return true; + } + }; + + void Print() const { + std::cout << "XML file '" << file_name_ << "':" << std::endl; + ForEach(print_func()); + } + + private: + Xml(const std::string& file_name, const Xml* obj) + : file_name_(file_name), + file_line_(0), + data_size_(0), + index_(0), + state_(BODY_STATE), + comment_(false), + included_(false), + level_(NULL), + map_(NULL) { + if (obj != NULL) { + map_ = obj->map_; + level_ = obj->level_; + included_ = true; + } + } + + struct delete_func { + bool fun(const std::string&, level_t* node) { + delete node; + return true; + } + }; + + ~Xml() { + if (included_ == false) { + ForEach(delete_func()); + delete map_; + } + } + + bool Init() { + fd_ = open(file_name_.c_str(), O_RDONLY); + if (fd_ == -1) { + // perror((std::string("open XML file ") + file_name_).c_str()); + return false; + } + + if (map_ == NULL) { + map_ = new map_t; + if (map_ == NULL) return false; + AddLevel("top"); + } + + return true; + } + + void PreProcess() { + uint32_t ind = 0; + char buf[kBufSize]; + bool error = false; + + while (1) { + const uint32_t pos = lseek(fd_, 0, SEEK_CUR); + uint32_t size = read(fd_, buf, kBufSize); + if (size <= 0) break; + buf[size - 1] = '\0'; + + if (strncmp(buf, "#include \"", 10) == 0) { + for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {} + if (ind == size) { + fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize); + error = true; + break; + } + buf[ind] = '\0'; + size = ind; + lseek(fd_, pos + ind + 1, SEEK_SET); + + for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {} + if (ind == size) { + error = true; + break; + } + buf[ind] = '\0'; + + AddLevel("include"); + AddOption("file", &buf[10]); + UpLevel(); + } + } + + if (error) { + fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf); + exit(1); + } + + lseek(fd_, 0, SEEK_SET); + } + + void Process() { + token_t remainder; + + while (1) { + token_t token = (remainder.size()) ? remainder : NextToken(); + remainder.clear(); + + // token_t token1 = token; + // token1.push_back('\0'); + // std::cout << "> " << &token1[0] << std::endl; + + // End of file + if (token.size() == 0) break; + + switch (state_) { + case BODY_STATE: + if (token[0] == '<') { + bool node_begin = true; + unsigned ind = 1; + if (token[1] == '/') { + node_begin = false; + ++ind; + } + + unsigned i = ind; + while (i < token.size()) { + if (token[i] == '>') break; + ++i; + } + for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]); + + if (i == token.size()) { + if (node_begin) + state_ = DECL_STATE; + else + BadFormat(token); + token.push_back('\0'); + } else { + token[i] = '\0'; + } + + const char* tag = &token[ind]; + if (node_begin) { + AddLevel(tag); + } else { + if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) { + token.back() = '>'; + BadFormat(token); + } + UpLevel(); + } + } else { + BadFormat(token); + } + break; + case DECL_STATE: + if (token[0] == '>') { + state_ = BODY_STATE; + for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]); + continue; + } else { + token.push_back('\0'); + unsigned j = 0; + for (j = 0; j < token.size(); ++j) + if (token[j] == '=') break; + if (j == token.size()) BadFormat(token); + token[j] = '\0'; + const char* key = &token[0]; + const char* value = &token[j + 1]; + AddOption(key, value); + } + break; + default: + std::cout << "XML parser error: wrong state: " << state_ << std::endl; + exit(1); + } + } + } + + bool SpaceCheck() const { + bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t')); + return cond; + } + + bool LineEndCheck() { + bool found = false; + if (buffer_[index_] == '\n') { + buffer_[index_] = ' '; + ++file_line_; + found = true; + comment_ = false; + } else if (comment_ || (buffer_[index_] == '#')) { + found = true; + comment_ = true; + } + return found; + } + + token_t NextToken() { + token_t token; + bool in_string = false; + bool special_symb = false; + + while (1) { + if (data_size_ == 0) { + data_size_ = read(fd_, buffer_, kBufSize); + if (data_size_ <= 0) break; + } + + if (token.empty()) { + while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) { + ++index_; + } + } + while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) { + const char symb = buffer_[index_]; + bool skip_symb = false; + + switch (symb) { + case '\\': + if (special_symb) { + special_symb = false; + } else { + special_symb = true; + skip_symb = true; + } + break; + case '"': + if (special_symb) { + special_symb = false; + } else { + in_string = !in_string; + if (!in_string) { + buffer_[index_] = ' '; + --index_; + } + skip_symb = true; + } + break; + } + + if (!skip_symb) token.push_back(symb); + ++index_; + } + + if (index_ == data_size_) { + index_ = 0; + data_size_ = 0; + } else { + if (special_symb || in_string) BadFormat(token); + break; + } + } + + return token; + } + + void BadFormat(token_t token) { + token.push_back('\0'); + std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '" + << &token[0] << "'" << std::endl; + exit(1); + } + + void AddLevel(const std::string& tag) { + level_t* level = new level_t; + level->tag = tag; + if (level_) { + level_->nodes.push_back(level); + stack_.push_back(level_); + } + level_ = level; + + std::string global_tag; + for (level_t* level : stack_) { + global_tag += level->tag + "."; + } + global_tag += tag; + (*map_)[global_tag].push_back(level_); + } + + void UpLevel() { + level_ = stack_.back(); + stack_.pop_back(); + } + + std::string CurrentLevel() const { return level_->tag; } + + void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; } + + const std::string file_name_; + unsigned file_line_; + int fd_; + + static const size_t kBufSize = 256; + char buffer_[kBufSize]; + + unsigned data_size_; + unsigned index_; + unsigned state_; + bool comment_; + std::vector stack_; + bool included_; + level_t* level_; + map_t* map_; +}; + +} // namespace xml + +#endif // TEST_UTIL_XML_H_