diff --git a/.azuredevops/rocm-ci.yml b/.azuredevops/rocm-ci.yml new file mode 100644 index 0000000..06d6e24 --- /dev/null +++ b/.azuredevops/rocm-ci.yml @@ -0,0 +1,45 @@ +resources: + repositories: + - repository: pipelines_repo + type: github + endpoint: ROCm + name: ROCm/ROCm + pipelines: + - pipeline: rocr-runtime_pipeline + source: \ROCR-Runtime + trigger: + branches: + include: + - master + +variables: +- group: common +- template: /.azuredevops/variables-global.yml@pipelines_repo + +trigger: + batch: true + branches: + include: + - amd-staging + - amd-master + paths: + exclude: + - .github + - License.txt + - README.md + +pr: + autoCancel: true + branches: + include: + - amd-staging + - amd-master + paths: + exclude: + - .github + - License.txt + - README.md + drafts: false + +jobs: + - template: ${{ variables.CI_COMPONENT_PATH }}/rocminfo.yml@pipelines_repo diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..17a6867 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,6 @@ +* @dayatsin-amd @shwetagkhatri +# Documentation files +docs/* @ROCm/rocm-documentation @dayatsin-amd @shwetagkhatri +*.md @ROCm/rocm-documentation @dayatsin-amd @shwetagkhatri +*.rst @ROCm/rocm-documentation @dayatsin-amd @shwetagkhatri +.readthedocs.yaml @ROCm/rocm-documentation @dayatsin-amd @shwetagkhatri diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..0086358 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: true diff --git a/.github/ISSUE_TEMPLATE/issue_report.yml b/.github/ISSUE_TEMPLATE/issue_report.yml new file mode 100644 index 0000000..f55a286 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/issue_report.yml @@ -0,0 +1,179 @@ +name: Issue Report +description: File a report for ROCm related issues on Linux and Windows. For issues pertaining to documentation or non-bug related, please open a blank issue located below. +title: "[Issue]: " + +body: +- type: markdown + attributes: + value: | + Thank you for taking the time to fill out this report! + + You can acquire your OS, CPU, GPU (for filling out this report) with the following commands: + + Linux: + echo "OS:" && cat /etc/os-release | grep -E "^(NAME=|VERSION=)"; + echo "CPU: " && cat /proc/cpuinfo | grep "model name" | sort --unique; + echo "GPU:" && /opt/rocm/bin/rocminfo | grep -E "^\s*(Name|Marketing Name)"; + + Windows: + (Get-WmiObject Win32_OperatingSystem).Version + (Get-WmiObject win32_Processor).Name + (Get-WmiObject win32_VideoController).Name +- type: textarea + attributes: + label: Problem Description + description: Describe the issue you encountered. + validations: + required: true +- type: input + attributes: + label: Operating System + description: What is the name and version number of the OS? + placeholder: "e.g. Ubuntu 22.04.3 LTS (Jammy Jellyfish)" + validations: + required: true +- type: input + attributes: + label: CPU + description: What CPU did you encounter the issue on? + placeholder: "e.g. AMD Ryzen 9 5900HX with Radeon Graphics" + validations: + required: true +- type: dropdown + attributes: + label: GPU + description: What GPU(s) did you encounter the issue on (you can select multiple GPUs from the list) + multiple: true + options: + - AMD Instinct MI300 + - AMD Instinct MI300A + - AMD Instinct MI300X + - AMD Instinct MI250X + - AMD Instinct MI250 + - AMD Instinct MI210 + - AMD Instinct MI100 + - AMD Instinct MI50 + - AMD Instinct MI25 + - AMD Radeon Pro V620 + - AMD Radeon Pro VII + - AMD Radeon RX 7900 XTX + - AMD Radeon VII + - AMD Radeon Pro W7900 + - AMD Radeon Pro W7800 + - AMD Radeon Pro W6800 + - AMD Radeon Pro W6600 + - AMD Radeon Pro W5500 + - AMD Radeon RX 7900 XT + - AMD Radeon RX 7600 + - AMD Radeon RX 6950 XT + - AMD Radeon RX 6900 XT + - AMD Radeon RX 6800 XT + - AMD Radeon RX 6800 + - AMD Radeon RX 6750 + - AMD Radeon RX 6700 XT + - AMD Radeon RX 6700 + - AMD Radeon RX 6650 XT + - AMD Radeon RX 6600 XT + - AMD Radeon RX 6600 + - Other + validations: + required: true +- type: input + attributes: + label: Other + description: If you selected Other, please specify +- type: dropdown + attributes: + label: ROCm Version + description: What version(s) of ROCm did you encounter the issue on? + multiple: true + options: + - ROCm 6.0.0 + - ROCm 5.7.1 + - ROCm 5.7.0 + - ROCm 5.6.0 + - ROCm 5.5.1 + - ROCm 5.5.0 + validations: + required: true +- type: dropdown + attributes: + label: ROCm Component + description: (Optional) If this issue relates to a specific ROCm component, it can be mentioned here. + options: + - Other + - AMDMIGraphX + - amdsmi + - aomp + - aomp-extras + - clang-ocl + - clr + - composable_kernel + - flang + - half + - HIP + - hipBLAS + - HIPCC + - hipCUB + - HIP-Examples + - hipFFT + - hipfort + - HIPIFY + - hipSOLVER + - hipSPARSE + - hipTensor + - llvm-project + - MIOpen + - MIVisionX + - rccl + - rdc + - rocALUTION + - rocBLAS + - ROCdbgapi + - rocFFT + - ROCgdb + - ROCK-Kernel-Driver + - ROCm + - rocm_bandwidth_test + - rocm_smi_lib + - rocm-cmake + - ROCm-CompilerSupport + - rocm-core + - ROCm-Device-Libs + - rocminfo + - rocMLIR + - ROCmValidationSuite + - rocPRIM + - rocprofiler + - rocr_debug_agent + - rocRAND + - ROCR-Runtime + - rocSOLVER + - rocSPARSE + - rocThrust + - roctracer + - ROCT-Thunk-Interface + - rocWMMA + - rpp + - Tensile + default: 39 +- type: textarea + attributes: + label: Steps to Reproduce + description: (Optional) Detailed steps to reproduce the issue. + validations: + required: false + +- type: textarea + attributes: + label: (Optional for Linux users) Output of /opt/rocm/bin/rocminfo --support + description: The output of rocminfo --support could help to better address the problem. + validations: + required: false + +- type: textarea + attributes: + label: Additional Information + description: (Optional) Any additional information that is relevant, e.g. relevant environment variables, dockerfiles, log files, dmesg output (on Linux), etc. + validations: + required: false diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..d24c82f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/docs/sphinx" # Location of package manifests + open-pull-requests-limit: 10 + schedule: + interval: "daily" + labels: + - "documentation" + - "dependencies" + reviewers: + - "samjwu" diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..fbbc470 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,18 @@ +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +sphinx: + configuration: docs/conf.py + +formats: [htmlzip, pdf, epub] + +python: + install: + - requirements: docs/sphinx/requirements.txt + +build: + os: ubuntu-22.04 + tools: + python: "3.10" diff --git a/CMakeLists.txt b/CMakeLists.txt index cbcf9d0..426657d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,6 +46,9 @@ if(WIN32) return() endif() +# Generate static package, when BUILD_SHARED_LIBS is set to OFF. +# Default to ON +option(BUILD_SHARED_LIBS "Build using shared libraries" ON) ## Set default module path if not already set if(NOT DEFINED CMAKE_MODULE_PATH) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") @@ -123,7 +126,6 @@ if(${BUILD_TYPE} STREQUAL "Debug") add_definitions(-DDEBUG) endif() -add_definitions(-D__linux__) add_definitions(-DLITTLEENDIAN_CPU=1) # @@ -138,13 +140,12 @@ set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fmerge-all-constants) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fms-extensions) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Werror) set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Wall) -set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64) # # Extend the compiler flags for 64-bit builds # if((${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "AMD64")) - set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -msse -msse2) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64 -msse -msse2) endif() # @@ -174,18 +175,21 @@ target_compile_options(${ROCMINFO_EXE} PRIVATE ${ROCMINFO_CXX_FLAGS}) # Install directives ########################### install ( - FILES ${CMAKE_CURRENT_BINARY_DIR}/${ROCMINFO_EXE} - PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + TARGETS ${ROCMINFO_EXE} DESTINATION ${CMAKE_INSTALL_BINDIR} ) install ( - FILES ${CMAKE_CURRENT_BINARY_DIR}/rocm_agent_enumerator - PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/rocm_agent_enumerator DESTINATION ${CMAKE_INSTALL_BINDIR} ) ########################### # Packaging directives ########################### -set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") +if(BUILD_SHARED_LIBS) + set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") +else() + set(CPACK_RPM_PACKAGE_NAME "${PROJECT_NAME}-static-devel") + set(CPACK_DEBIAN_PACKAGE_NAME "${PROJECT_NAME}-static-dev") +endif() set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") set(CPACK_PACKAGE_VERSION_MAJOR "${PKG_VERSION_MAJOR}") set(CPACK_PACKAGE_VERSION_MINOR "${PKG_VERSION_MINOR}") @@ -206,7 +210,7 @@ if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) endif() #Debian package specific variables -set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr, kmod, pciutils") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr, kmod, pciutils, python3, libc6, libgcc-s1, libstdc++6") set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_DEBIAN_PACKAGE_HOMEPAGE} CACHE STRING "https://github.com/RadeonOpenCompute/ROCm") if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) @@ -218,7 +222,20 @@ if ( ROCM_DEP_ROCMCORE ) endif() #RPM package specific variables -set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr kmod pciutils") +execute_process(COMMAND rpm --eval %{?dist} + RESULT_VARIABLE PROC_RESULT + OUTPUT_VARIABLE EVAL_RESULT + OUTPUT_STRIP_TRAILING_WHITESPACE) +message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}") + +if(PROC_RESULT EQUAL "0" AND "${EVAL_RESULT}" STREQUAL ".el7") + # In Centos using parentheses is causing cpack errors. + # Set the dependencies specifically for centos + set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr, kmod, pciutils, python3, glibc, libgcc, libstdc++") +else() + set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr, kmod, pciutils, python3, glibc, (libgcc or libgcc_s1), (libstdc++ or libstdc++6)") +endif() # End EVAL_RESULT + if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" ) endif() @@ -230,6 +247,9 @@ endif() if ( ROCM_DEP_ROCMCORE ) string ( APPEND CPACK_RPM_PACKAGE_REQUIRES " rocm-core" ) endif() +# Cpack converts !/usr/bin/env python3 to /usr/libexec/platform-python in RHEL8. +# prevent the BRP(buildroot policy) script from checking and modifying interpreter directives +set(CPACK_RPM_SPEC_MORE_DEFINE "%undefine __brp_mangle_shebangs") #Set rpm distro if(CPACK_RPM_PACKAGE_RELEASE) @@ -243,4 +263,8 @@ set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSIO set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") +if(NOT BUILD_SHARED_LIBS) + string(REPLACE "hsa-rocr" "hsa-rocr-static-dev" CPACK_DEBIAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_PACKAGE_DEPENDS}) + string(REPLACE "hsa-rocr" "hsa-rocr-static-devel" CPACK_RPM_PACKAGE_REQUIRES ${CPACK_RPM_PACKAGE_REQUIRES}) +endif() include ( CPack ) diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..69fa449 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_build/ diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..274742d --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,29 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +import re + + +html_theme = "rocm_docs_theme" +html_theme_options = {"flavor": "rocm"} + +extensions = ["rocm_docs"] +external_toc_path = "./sphinx/_toc.yml" + +with open('../CMakeLists.txt', encoding='utf-8') as f: + match = re.search(r'get_package_version_number\(\"?([0-9.]+)[^0-9.]+', f.read()) + if not match: + raise ValueError("VERSION not found!") + version_number = match[1] + +version = version_number +release = version_number +html_title = f"rocminfo {version} Documentation" +project = "rocminfo" +author = "Advanced Micro Devices, Inc." +copyright = ( + "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved." +) diff --git a/docs/how-to/use-rocm-agent-enumerator.rst b/docs/how-to/use-rocm-agent-enumerator.rst new file mode 100644 index 0000000..2ce8495 --- /dev/null +++ b/docs/how-to/use-rocm-agent-enumerator.rst @@ -0,0 +1,22 @@ +.. meta:: + :description: agent, enumerator ROCmInfo + :keywords: install, rocminfo, AMD, ROCm, ROCmInfo + + +Using ROCm agent enumerator +----------------------------- + +The rocm_agent_enumerator tool prints the list of available AMD GCN ISA or acthitecture names. With the option ‘-name’, it prints out available architecture names that can be used by third-party scripts to determine which ISAs are needed to execute code on all GPUs in the system. + +See the following example output of the rocm_agent_enumerator command on a system with an MI-300X installation, + +.. code-block:: + + gfx000 + gfx941 + + +.. Note:: + +The gfx000 represents the CPU agent. + diff --git a/docs/how-to/use-rocminfo.rst b/docs/how-to/use-rocminfo.rst new file mode 100644 index 0000000..102b05b --- /dev/null +++ b/docs/how-to/use-rocminfo.rst @@ -0,0 +1,190 @@ +.. meta:: + :description: Using ROCmInfo + :keywords: rocminfo, enumerator, info, AMD, ROCm, HSA, hsa + + +================ +Using ROCmInfo +================ + +The ROCmInfo command provides information about the Heterogenous System Architecture (HSA) system attributes and agents. Each agent represents a device and a device can be a CPU or a GPU. + +The output has the following two sections: + +* HSA System Attributes - List of general information of the system. + +* HSA agents - List of devices in the system. + +See the following example output of the ROCmInfo command on a system with MI300X: + +.. code-block:: + + HSA System Attributes + ===================== + Runtime Version: 1.1 + Runtime Ext Version: 1.6 + System Timestamp Freq.: 1000.000000MHz + Sig. Max Wait Duration: 18446744073709551615 (0xFFFFFFFFFFFFFFFF) (timestamp count) + Machine Model: LARGE + System Endianness: LITTLE + Mwaitx: DISABLED + DMAbuf Support: YES + ========== + HSA Agents + ========== + ******* + Agent 1 + ******* + Name: AMD Ryzen 9 7950X 16-Core Processor + Uuid: CPU-XX + Marketing Name: AMD Ryzen 9 7950X 16-Core Processor\ + Vendor Name: CPU\ + Feature: None specified + Profile: FULL_PROFILE + Float Round Mode: NEAR + Max Queue Number: 0(0x0) + Queue Min Size: 0(0x0)\ + Queue Max Size: 0(0x0) + Queue Type: MULTI + Node: 0 + Device Type: CPU + Cache Info: + L1: 32768(0x8000) KB + Chip ID: 0(0x0) + ASIC Revision: 0(0x0) + Cacheline Size: 64(0x40) + Max Clock Freq. (MHz): 4500 + BDFID: 0 + Internal Node ID: 0 + Compute Unit: 32 + SIMDs per CU: 0 + Shader Engines: 0 + Shader Arrs. per Eng.: 0 + WatchPts on Addr. Ranges:1 + Memory Properties: + Features: None + Pool Info: + Pool 1 + Segment: GLOBAL; FLAGS: FINE GRAINED + Size: 65111316(0x3e18514) KB + Allocatable: TRUE + Alloc Granule: 4KB + Alloc Recommended Granule:4KB + Alloc Alignment: 4KB + Accessible by all: TRUE + Pool 2 + Segment: GLOBAL; FLAGS: KERNARG, FINE GRAINED + Size: 65111316(0x3e18514) KB + Allocatable: TRUE + Alloc Granule: 4KB + Alloc Recommended Granule:4KB + Alloc Alignment: 4KB + Accessible by all: TRUE + Pool 3 + Segment: GLOBAL; FLAGS: COARSE GRAINED + Size: 65111316(0x3e18514) KB + Allocatable: TRUE + Alloc Granule: 4KB + Alloc Recommended Granule:4KB + Alloc Alignment: 4KB + Accessible by all: TRUE + ISA Info: + ******* + Agent 2 + ******* + Name: gfx941 + Uuid: GPU-a8673551b40c6374 + Marketing Name: AMD Instinct MI300X + Vendor Name: AMD + Feature: KERNEL_DISPATCH + Profile: BASE_PROFILE + Float Round Mode: NEAR + Max Queue Number: 128(0x80) + Queue Min Size: 64(0x40) + Queue Max Size: 131072(0x20000) + Queue Type: MULTI + Node: 1 + Device Type: GPU + Cache Info: + L1: 32(0x20) KB + L2: 4096(0x1000) KB + L3: 262144(0x40000) KB + Chip ID: 29857(0x74a1) + ASIC Revision: 0(0x0) + Cacheline Size: 64(0x40) + Max Clock Freq. (MHz): 1800 + BDFID: 768 + Internal Node ID: 1 + Compute Unit: 304 + SIMDs per CU: 4 + Shader Engines: 32 + Shader Arrs. per Eng.: 1 + WatchPts on Addr. Ranges:4 + Coherent Host Access: FALSE + Memory Properties: + Features: KERNEL_DISPATCH + Fast F16 Operation: TRUE + Wavefront Size: 64(0x40) + Workgroup Max Size: 1024(0x400) + Workgroup Max Size per Dimension: + x 1024(0x400) + y 1024(0x400) + z 1024(0x400) + Max Waves Per CU: 32(0x20) + Max Work-item Per CU: 2048(0x800) + Grid Max Size: 4294967295(0xffffffff) + Grid Max Size per Dimension: + x 4294967295(0xffffffff) + y 4294967295(0xffffffff) + z 4294967295(0xffffffff) + Max fbarriers/Workgrp: 32 + Packet Processor uCode:: 141 + SDMA engine uCode:: 19 + IOMMU Support:: None + Pool Info: + Pool 1 + Segment: GLOBAL; FLAGS: COARSE GRAINED + Size: 134201344(0x7ffc000) KB + Allocatable: TRUE + Alloc Granule: 4KB + Alloc Recommended Granule:2048KB + Alloc Alignment: 4KB + Accessible by all: FALSE + Pool 2 + Segment: GLOBAL; FLAGS: EXTENDED FINE GRAINED + Size: 134201344(0x7ffc000) KB + Allocatable: TRUE + Alloc Granule: 4KB + Alloc Recommended Granule:2048KB + Alloc Alignment: 4KB + Accessible by all: FALSE + Pool 3 + Segment: GROUP + Size: 64(0x40) KB + Allocatable: FALSE + Alloc Granule: 0KB + Alloc Recommended Granule:0KB + Alloc Alignment: 0KB + Accessible by all: FALSE + ISA Info: + ISA 1 + Name: amdgcn-amd-amdhsa--gfx941:sramecc+:xnack- + Machine Models: HSA_MACHINE_MODEL_LARGE + Profiles: HSA_PROFILE_BASE + Default Rounding Mode: NEAR + Default Rounding Mode: NEAR + Fast f16: TRUE + Workgroup Max Size: 1024(0x400 + + Workgroup Max Size per Dimension: + x 1024(0x400) + y 1024(0x400) + z 1024(0x400) + Grid Max Size: 4294967295(0xffffffff) + Grid Max Size per Dimension: + x 4294967295(0xffffffff) + y 4294967295(0xffffffff) + z 4294967295(0xffffffff) + + *** Done *** + diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..5aaa420 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,33 @@ +.. meta:: + :description: Install ROCmInfo + :keywords: install, rocminfo, AMD, ROCm + +ROCmInfo documentation +************************* + +ROCmInfo is a ROCm application for reporting system information. It is a tool shipped to enumerate GPU agents available on a working ROCm stack. + + +You can access ROCmInfo code at `GitHub. `_ + +.. grid:: 2 + :gutter: 3 + + .. grid-item-card:: Build + + * :doc:`ROCmInfo installation <./install/build>` + + + .. grid-item-card:: How to + + * :doc:`Use ROCmInfo ` + * :doc:`Use ROCm agent enumerator ` + + + +To contribute to the documentation, refer to +`Contributing to ROCm `_. + +You can find licensing information on the +`Licensing `_ page. + diff --git a/docs/install/build.rst b/docs/install/build.rst new file mode 100644 index 0000000..e0bf8d1 --- /dev/null +++ b/docs/install/build.rst @@ -0,0 +1,49 @@ +.. meta:: + :description: Install ROCmInfo + :keywords: install, rocminfo, AMD, ROCm + + +Building ROCmInfo +***************** + +Use the standard cmake build procedure to build ROCmInfo. The location of ROCm root (parent directory containing ROCM headers and libraries) must be provided +as a CMake argument using the standard CMAKE_PREFIX_PATH CMake variable. + +After cloning the ROCmInfo git repo, you must perform a `git-fetch --tags` to get the tags residing on the repo. These tags are used for versioning. + +For example, + +.. code-block:: + + $ git fetch --tags origin + + Building from the CMakeLists.txt directory might look like this: + + mkdir -p build + + cd build + + cmake -DCMAKE_PREFIX_PATH=/opt/rocm .. + + make + + cd .. + +Upon a successful build, the binary, ROCmInfo, and the Python script, rocm_agent_enumerator, will be in the `build` folder. + +ROCmInfo execution +------------------- + +"rocminfo" gives information about the HSA system attributes and agents. + +"rocm_agent_enumerator" prints the list of available AMD GCN ISA or architecture names. With the option '-name', it prints out available architectures names obtained from ROCmInfo. Otherwise, it generates ISA in one of five different ways: + +1. ROCM_TARGET_LST : a user defined environment variable, set to the path and filename where to find the "target.lst" file. This can be used in an install environment with sandbox, where execution of "rocminfo" is not possible. + +2. target.lst : user-supplied text file, in the same folder as "rocm_agent_enumerator". This is used in a container setting where ROCm stack may usually not available. + +3. HSA topology : gathers the information from the HSA node topology in /sys/class/kfd/kfd/topology/nodes/ + +4. lspci : enumerate PCI bus and locate supported devices from a hard-coded lookup table. + +5. ROCmInfo : a tool shipped with this script to enumerate GPU agents available on a working ROCm stack. diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 0000000..6082363 --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,5 @@ +======= +License +======= + +.. include:: ../License.txt diff --git a/docs/sphinx/.gitignore b/docs/sphinx/.gitignore new file mode 100644 index 0000000..732e971 --- /dev/null +++ b/docs/sphinx/.gitignore @@ -0,0 +1 @@ +_toc.yml diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in new file mode 100644 index 0000000..2cde262 --- /dev/null +++ b/docs/sphinx/_toc.yml.in @@ -0,0 +1,19 @@ +defaults: + numbered: False +root: index +subtrees: +- caption: Install + entries: + - file: install/build.rst + title: ROCmInfo installation + +- caption: How to + entries: + - file: how-to/use-rocminfo.rst + title: Use ROCmInfo + - file: how-to/use-rocm-agent-enumerator.rst + title: Use ROCm agent enumerator + +- caption: About + entries: + - file: license.md diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in new file mode 100644 index 0000000..c7df173 --- /dev/null +++ b/docs/sphinx/requirements.in @@ -0,0 +1 @@ +rocm-docs-core==1.5.0 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt new file mode 100644 index 0000000..a21583b --- /dev/null +++ b/docs/sphinx/requirements.txt @@ -0,0 +1,147 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +accessible-pygments==0.0.4 + # via pydata-sphinx-theme +alabaster==0.7.16 + # via sphinx +babel==2.15.0 + # via + # pydata-sphinx-theme + # sphinx +beautifulsoup4==4.12.3 + # via pydata-sphinx-theme +breathe==4.35.0 + # via rocm-docs-core +certifi==2024.2.2 + # via requests +cffi==1.16.0 + # via + # cryptography + # pynacl +charset-normalizer==3.3.2 + # via requests +click==8.1.7 + # via sphinx-external-toc +cryptography==42.0.7 + # via pyjwt +deprecated==1.2.14 + # via pygithub +docutils==0.21.2 + # via + # breathe + # myst-parser + # pydata-sphinx-theme + # sphinx +fastjsonschema==2.19.1 + # via rocm-docs-core +gitdb==4.0.11 + # via gitpython +gitpython==3.1.43 + # via rocm-docs-core +idna==3.7 + # via requests +imagesize==1.4.1 + # via sphinx +jinja2==3.1.4 + # via + # myst-parser + # sphinx +markdown-it-py==3.0.0 + # via + # mdit-py-plugins + # myst-parser +markupsafe==2.1.5 + # via jinja2 +mdit-py-plugins==0.4.0 + # via myst-parser +mdurl==0.1.2 + # via markdown-it-py +myst-parser==3.0.1 + # via rocm-docs-core +packaging==24.0 + # via + # pydata-sphinx-theme + # sphinx +pycparser==2.22 + # via cffi +pydata-sphinx-theme==0.15.2 + # via + # rocm-docs-core + # sphinx-book-theme +pygithub==2.3.0 + # via rocm-docs-core +pygments==2.18.0 + # via + # accessible-pygments + # pydata-sphinx-theme + # sphinx +pyjwt[crypto]==2.8.0 + # via pygithub +pynacl==1.5.0 + # via pygithub +pyyaml==6.0.1 + # via + # myst-parser + # rocm-docs-core + # sphinx-external-toc +requests==2.31.0 + # via + # pygithub + # sphinx +rocm-docs-core==1.5.0 + # via -r requirements.in +smmap==5.0.1 + # via gitdb +snowballstemmer==2.2.0 + # via sphinx +soupsieve==2.5 + # via beautifulsoup4 +sphinx==7.3.7 + # via + # breathe + # myst-parser + # pydata-sphinx-theme + # rocm-docs-core + # sphinx-book-theme + # sphinx-copybutton + # sphinx-design + # sphinx-external-toc + # sphinx-notfound-page +sphinx-book-theme==1.1.2 + # via rocm-docs-core +sphinx-copybutton==0.5.2 + # via rocm-docs-core +sphinx-design==0.5.0 + # via rocm-docs-core +sphinx-external-toc==1.0.1 + # via rocm-docs-core +sphinx-notfound-page==1.0.0 + # via rocm-docs-core +sphinxcontrib-applehelp==1.0.8 + # via sphinx +sphinxcontrib-devhelp==1.0.6 + # via sphinx +sphinxcontrib-htmlhelp==2.0.5 + # via sphinx +sphinxcontrib-jsmath==1.0.1 + # via sphinx +sphinxcontrib-qthelp==1.0.7 + # via sphinx +sphinxcontrib-serializinghtml==1.1.10 + # via sphinx +tomli==2.0.1 + # via sphinx +typing-extensions==4.11.0 + # via + # pydata-sphinx-theme + # pygithub +urllib3==2.2.1 + # via + # pygithub + # requests +wrapt==1.16.0 + # via deprecated diff --git a/rocm_agent_enumerator b/rocm_agent_enumerator index ceb9e11..43cdd03 100755 --- a/rocm_agent_enumerator +++ b/rocm_agent_enumerator @@ -81,7 +81,7 @@ def staticVars(**kwargs): return func return deco -@staticVars(search_term=re.compile("gfx[0-9a-fA-F]+")) +@staticVars(search_term=re.compile(r"gfx[0-9a-fA-F]+")) def getGCNISA(line, match_from_beginning = False): if match_from_beginning is True: result = getGCNISA.search_term.match(line) @@ -92,7 +92,7 @@ def getGCNISA(line, match_from_beginning = False): return result.group(0) return None -@staticVars(search_name=re.compile("gfx[0-9a-fA-F]+:[-+:\w]+")) +@staticVars(search_name=re.compile(r"gfx[0-9a-fA-F]+(:[-+:\w]+)?")) def getGCNArchName(line): result = getGCNArchName.search_name.search(line) @@ -135,8 +135,8 @@ def readFromROCMINFO(search_arch_name = False): break # run rocminfo rocminfo_output = subprocess.Popen(rocminfo_executable, stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') - term1 = re.compile("Cannot allocate memory") - term2 = re.compile("HSA_STATUS_ERROR_OUT_OF_RESOURCES") + term1 = re.compile(r"Cannot allocate memory") + term2 = re.compile(r"HSA_STATUS_ERROR_OUT_OF_RESOURCES") done = 1 for line in rocminfo_output: if term1.search(line) is not None or term2.search(line) is not None: @@ -149,9 +149,9 @@ def readFromROCMINFO(search_arch_name = False): # search AMDGCN gfx ISA if search_arch_name is True: - line_search_term = re.compile("\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") + line_search_term = re.compile(r"\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") else: - line_search_term = re.compile("\A\s+Name:\s+(gfx\d+)") + line_search_term = re.compile(r"\A\s+Name:\s+(gfx\d+)") for line in rocminfo_output: if line_search_term.match(line) is not None: if search_arch_name is True: @@ -172,7 +172,7 @@ def readFromLSPCI(): except: lspci_output = [] - target_search_term = re.compile("1002:\w+") + target_search_term = re.compile(r"1002:\w+") for line in lspci_output: search_result = target_search_term.search(line) if search_result is not None: @@ -196,7 +196,7 @@ def readFromKFD(): if os.path.isdir(node_path): prop_path = node_path + '/properties' if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK): - target_search_term = re.compile("gfx_target_version.+") + target_search_term = re.compile(r"gfx_target_version.+") with open(prop_path) as f: try: line = f.readline() @@ -209,9 +209,28 @@ def readFromKFD(): if search_result is not None: device_id = int(search_result.group(0).split(' ')[1], 10) if device_id != 0: - major_ver = int((device_id / 10000) % 100) - minor_ver = int((device_id / 100) % 100) - stepping_ver = int(device_id % 100) + gfx_override = os.environ.get("HSA_OVERRIDE_GFX_VERSION") + if gfx_override is not None: + try: + override_tokens = gfx_override.split('.') + major_ver=int(override_tokens[0]) + minor_ver=int(override_tokens[1]) + stepping_ver=int(override_tokens[2]) + if major_ver > 63 or minor_ver > 255 or stepping_ver > 255: + print('Invalid HSA_OVERRIDE_GFX_VERSION value') + major_ver = 0 + minor_ver = 0 + stepping_ver = 0 + except Exception as e: + print('Invalid HSA_OVERRIDE_GFX_VERSION format expected \"1.2.3\"') + major_ver = 0 + minor_ver = 0 + stepping_ver = 0 + else: + major_ver = int((device_id / 10000) % 100) + minor_ver = int((device_id / 100) % 100) + stepping_ver = int(device_id % 100) + target_list.append("gfx" + format(major_ver, 'd') + format(minor_ver, 'x') + format(stepping_ver, 'x')) line = f.readline() diff --git a/rocminfo.cc b/rocminfo.cc index 8ed9111..4d99f30 100755 --- a/rocminfo.cc +++ b/rocminfo.cc @@ -56,9 +56,14 @@ #include #include +#include +#include + #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" +using namespace std; + #define COL_BLU "\x1B[34m" #define COL_KCYN "\x1B[36m" #define COL_GRN "\x1B[32m" @@ -90,10 +95,13 @@ // calls, and is later used for reference when displaying the information. struct system_info_t { uint16_t major, minor; + uint16_t ext_major, ext_minor; uint64_t timestamp_frequency = 0; uint64_t max_wait = 0; hsa_endianness_t endianness; hsa_machine_model_t machine_model; + bool mwaitx_enabled; + bool dmabuf_support; }; // This structure holds agent information acquired through hsa info related @@ -135,6 +143,11 @@ struct agent_info_t { uint16_t workgroup_max_dim[3]; uint16_t bdf_id; bool fast_f16; + bool coherent_host_access; + uint32_t pkt_processor_ucode_ver; + uint32_t sdma_ucode_ver; + hsa_amd_iommu_version_t iommu_support; + uint8_t memory_properties[8]; }; // This structure holds memory pool information acquired through hsa info @@ -145,6 +158,7 @@ typedef struct { size_t pool_size; bool alloc_allowed; size_t alloc_granule; + size_t alloc_rec_granule; size_t pool_alloc_alignment; bool pl_access; uint32_t global_flag; @@ -180,6 +194,8 @@ static const uint32_t kLabelFieldSize = 25; static const uint32_t kValueFieldSize = 35; static const uint32_t kIndentSize = 2; +static bool wsl_env = false; + enum rocmi_int_format { ROCMI_INT_FORMAT_DEC = 1, ROCMI_INT_FORMAT_HEX = 2, @@ -215,6 +231,35 @@ std::string int_to_string(uint32_t i, return sd.str(); } +pair exec(const char* cmd) { + array buffer; + string result; + int return_code = -1; + auto pclose_wrapper = [&return_code](FILE* cmd){ return_code = pclose(cmd); }; + { // scope is important, have to make sure the ptr goes out of scope first + const unique_ptr pipe(popen(cmd, "r"), pclose_wrapper); + if (pipe) { + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result += buffer.data(); + } + } + } + return make_pair(result, return_code); +} + +static void DetectWSLEnvironment() { + auto process_ret = exec("which wslinfo > /dev/null 2>&1"); + if (process_ret.second) + return; + + process_ret = exec("wslinfo --msal-proxy-path"); + if (process_ret.second == 0 && + strcasestr(process_ret.first.c_str(), "msal.wsl.proxy.exe") != nullptr) { + printf("WSL environment detected.\n"); + wsl_env = true; + } +} + static void printLabelInt(char const *l, int d, uint32_t indent_lvl = 0) { std::string ind(kIndentSize * indent_lvl, ' '); @@ -255,6 +300,14 @@ static hsa_status_t AcquireSystemInfo(system_info_t *sys_info) { err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor); RET_IF_HSA_ERR(err); + // Get HSA Ext Interface version + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_EXT_VERSION_MAJOR, + &sys_info->ext_major); + RET_IF_HSA_ERR(err); + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_EXT_VERSION_MINOR, + &sys_info->ext_minor); + RET_IF_HSA_ERR(err); + // Get timestamp frequency err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_info->timestamp_frequency); @@ -273,12 +326,24 @@ static hsa_status_t AcquireSystemInfo(system_info_t *sys_info) { err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, &sys_info->machine_model); RET_IF_HSA_ERR(err); + + // Get mwaitx mode + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED, + &sys_info->mwaitx_enabled); + RET_IF_HSA_ERR(err); + // Get DMABuf support + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED, + &sys_info->dmabuf_support); + RET_IF_HSA_ERR(err); + return err; } static void DisplaySystemInfo(system_info_t const *sys_info) { printLabel("Runtime Version:"); printf("%d.%d\n", sys_info->major, sys_info->minor); + printLabel("Runtime Ext Version:"); + printf("%d.%d\n", sys_info->ext_major, sys_info->ext_minor); printLabel("System Timestamp Freq.:"); printf("%fMHz\n", sys_info->timestamp_frequency / 1e6); printLabel("Sig. Max Wait Duration:"); @@ -298,6 +363,13 @@ static void DisplaySystemInfo(system_info_t const *sys_info) { } else if (HSA_ENDIANNESS_BIG == sys_info->endianness) { printValueStr("BIG"); } + + printLabel("Mwaitx:"); + printf("%s\n", sys_info->mwaitx_enabled ? "ENABLED" : "DISABLED"); + + printLabel("DMAbuf Support:"); + printf("%s\n", sys_info->dmabuf_support ? "YES" : "NO"); + printf("\n"); } @@ -451,6 +523,18 @@ AcquireAgentInfo(hsa_agent_t agent, agent_info_t *agent_i) { &agent_i->compute_unit); RET_IF_HSA_ERR(err); + // Get coherent Host access + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_SVM_DIRECT_HOST_ACCESS, + &agent_i->coherent_host_access); + RET_IF_HSA_ERR(err); + + // Get memory properties + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES, + agent_i->memory_properties); + RET_IF_HSA_ERR(err); + // Check if the agent is kernel agent if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { // Get flaf of fast_f16 operation @@ -492,13 +576,27 @@ AcquireAgentInfo(hsa_agent_t agent, agent_info_t *agent_i) { (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, &agent_i->max_waves_per_cu); RET_IF_HSA_ERR(err); + + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UCODE_VERSION, + &agent_i->pkt_processor_ucode_ver); + RET_IF_HSA_ERR(err); + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION, + &agent_i->sdma_ucode_ver); + RET_IF_HSA_ERR(err); + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_IOMMU_SUPPORT, + &agent_i->iommu_support); + RET_IF_HSA_ERR(err); } return err; } static void DisplayAgentInfo(agent_info_t *agent_i) { printLabelStr("Name:", agent_i->name, 1); - printLabelStr("Uuid:", agent_i->uuid, 1); + if (!wsl_env || HSA_DEVICE_TYPE_CPU == agent_i->device_type) + printLabelStr("Uuid:", agent_i->uuid, 1); printLabelStr("Marketing Name:", agent_i->device_mkt_name, 1); printLabelStr("Vendor Name:", agent_i->vendor_name, 1); @@ -575,16 +673,28 @@ static void DisplayAgentInfo(agent_info_t *agent_i) { } printLabelStr("Chip ID:", int_to_string(agent_i->chip_id), 1); - printLabelStr("ASIC Revision:", int_to_string(agent_i->asic_revision), 1); + if (!wsl_env) + printLabelStr("ASIC Revision:", int_to_string(agent_i->asic_revision), 1); printLabelStr("Cacheline Size:", int_to_string(agent_i->cacheline_size), 1); - printLabelInt("Max Clock Freq. (MHz):", agent_i->max_clock_freq, 1); - printLabelInt("BDFID:", agent_i->bdf_id, 1); + if (!wsl_env || HSA_DEVICE_TYPE_GPU == agent_i->device_type) + printLabelInt("Max Clock Freq. (MHz):", agent_i->max_clock_freq, 1); + if (!wsl_env) + printLabelInt("BDFID:", agent_i->bdf_id, 1); printLabelInt("Internal Node ID:", agent_i->internal_node_id, 1); printLabelInt("Compute Unit:", agent_i->compute_unit, 1); printLabelInt("SIMDs per CU:", agent_i->simds_per_cu, 1); printLabelInt("Shader Engines:", agent_i->shader_engs, 1); printLabelInt("Shader Arrs. per Eng.:", agent_i->shader_arrs_per_sh_eng, 1); - printLabelInt("WatchPts on Addr. Ranges:", agent_i->max_addr_watch_pts, 1); + if (!wsl_env) + printLabelInt("WatchPts on Addr. Ranges:", agent_i->max_addr_watch_pts, 1); + + if (agent_i->device_type == HSA_DEVICE_TYPE_GPU) + printLabelStr("Coherent Host Access:", agent_i->coherent_host_access ? "TRUE":"FALSE", 1); + + printLabel("Memory Properties:", false, 1); + if (hsa_flag_isset64(agent_i->memory_properties, HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)) + printf("%s", "APU"); + printf("\n"); printLabel("Features:", false, 1); if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { @@ -627,6 +737,11 @@ static void DisplayAgentInfo(agent_info_t *agent_i) { printLabelStr("z", int_to_string(agent_i->grid_max_dim.z), 2); printLabelInt("Max fbarriers/Workgrp:", agent_i->fbarrier_max_size, 1); + + printLabelInt("Packet Processor uCode::", agent_i->pkt_processor_ucode_ver, 1); + printLabelInt("SDMA engine uCode::", agent_i->sdma_ucode_ver, 1); + printLabelStr("IOMMU Support::", + agent_i->iommu_support == HSA_IOMMU_SUPPORT_V2 ? "V2" : "None", 1); } } @@ -657,6 +772,11 @@ static hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, &pool_i->alloc_granule); RET_IF_HSA_ERR(err); + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_REC_GRANULE, + &pool_i->alloc_rec_granule); + RET_IF_HSA_ERR(err); + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &pool_i->pool_alloc_alignment); @@ -687,6 +807,11 @@ static void MakeGlobalFlagsString(uint32_t global_flag, std::string* out_str) { flags.push_back("COARSE GRAINED"); } + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_EXTENDED_SCOPE_FINE_GRAINED & global_flag) + { + flags.push_back("EXTENDED FINE GRAINED"); + } + if (flags.size() > 0) { *out_str += flags[0]; } @@ -737,6 +862,9 @@ static void DisplayPoolInfo(pool_info_t *pool_i, uint32_t indent) { std::string gr_str = std::to_string(pool_i->alloc_granule/1024)+"KB"; printLabelStr("Alloc Granule:", gr_str.c_str(), indent); + std::string rgr_str = std::to_string(pool_i->alloc_rec_granule / 1024) + "KB"; + printLabelStr("Alloc Recommended Granule:", rgr_str.c_str(), indent); + std::string al_str = std::to_string(pool_i->pool_alloc_alignment/1024)+"KB"; printLabelStr("Alloc Alignment:", al_str.c_str(), indent); @@ -1056,16 +1184,31 @@ int CheckInitialState(void) { } } if (is_live){ - printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); + std::ifstream amdgpu_version("/sys/module/amdgpu/version"); + if (amdgpu_version){ + std::stringstream buffer; + buffer << amdgpu_version.rdbuf(); + std::string vers; + std::getline(buffer, vers); + amdgpu_version.close(); + printf("%sROCk module version %s is loaded%s\n", COL_WHT, vers.c_str(), COL_RESET); + } else { + printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); + } } else { printf("%sROCk module is NOT live, possibly no GPU devices%s\n", COL_RED, COL_RESET); return -1; } } else { - printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n", + int module_dir; + module_dir = open("/sys/module/amdgpu", O_DIRECTORY); + if (module_dir < 0) { + printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n", COL_RED, COL_RESET); - return -1; + return -1; + } + close(module_dir); } // Check if user belongs to the group for /dev/kfd (e.g. "video" or @@ -1159,7 +1302,9 @@ int CheckInitialState(void) { int main(int argc, char* argv[]) { hsa_status_t err; - if (CheckInitialState()) { + DetectWSLEnvironment(); + + if (!wsl_env && CheckInitialState()) { return 1; } err = hsa_init();