No implicit init, and fix docs build

mdboom · mdboom · commit dc56154b0b64 · 2026-01-05T15:42:28.000-05:00
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
@@ -202,10 +202,6 @@ jobs:
       - name: Build all docs
         if: ${{ inputs.component == 'all' }}
         run: |
-          pushd cuda_core/docs/
-          python -c "import cuda.core"
-          popd
-
           pushd cuda_python/docs/
           if [[ "${{ inputs.is-release }}" == "false" ]]; then
             ./build_all_docs.sh latest-only
diff --git a/cuda_core/cuda/core/system/__init__.py b/cuda_core/cuda/core/system/__init__.py
@@ -22,10 +22,9 @@
     from .device import Device, DeviceArchitecture
     from .exceptions import *
 
-    initialize()
-
     __all__.extend(
         [
+            "initialize",
             "Device",
             "DeviceArchitecture",
             "UninitializedError",
diff --git a/cuda_core/cuda/core/system/_nvml_context.pyx b/cuda_core/cuda/core/system/_nvml_context.pyx
@@ -29,7 +29,7 @@ _lock = threading.Lock()
 
 
 def initialize() -> None:
-    """Idempotent (per-process) initialization of NVUtil's NVML
+    """Idempotent (per-process) initialization of Nvidia Management Library (NVML).
 
     Notes
     -----
@@ -89,5 +89,7 @@ def validate() -> None:
     """
     if _NVML_STATE == _NVMLState.DISABLED_LIBRARY_NOT_FOUND:
         raise exceptions.LibraryNotFoundError("The underlying NVML library was not found")
+    elif not is_initialized():
+        raise exceptions.UninitializedError("NVML library is not initialized")
     elif nvml.device_get_count_v2() == 0:
         raise exceptions.GpuNotFoundError("No GPUs available")
diff --git a/cuda_core/cuda/core/system/device.pyx b/cuda_core/cuda/core/system/device.pyx
@@ -10,6 +10,7 @@ from typing import Iterable
 
 from cuda.bindings import _nvml as nvml
 
+from ._nvml_context import validate
 from .utils import unpack_bitmask
 
 
@@ -189,6 +190,8 @@ cdef class Device:
     cdef intptr_t _handle
 
     def __init__(self, index: int | None = None, uuid: bytes | str | None = None):
+        validate()
+
         if index is not None and uuid is not None:
             raise ValueError("Handle requires only one of either device `index` or `uuid`.")
         if index is None and uuid is None:
diff --git a/cuda_core/cuda/core/system/system.pyx b/cuda_core/cuda/core/system/system.pyx
@@ -15,6 +15,7 @@ HAS_WORKING_NVML = _BINDINGS_VERSION >= (13, 1, 2) or (_BINDINGS_VERSION[0] == 1
 
 if HAS_WORKING_NVML:
     from cuda.bindings import _nvml as nvml
+    from ._nvml_context import validate
 else:
     from cuda.core._utils.cuda_utils import driver, handle_return, runtime
 
@@ -36,6 +37,7 @@ def get_driver_version_full() -> tuple[int, int, int]:
     """
     cdef int v
     if HAS_WORKING_NVML:
+        validate()
         v = nvml.system_get_cuda_driver_version()
     else:
         v = handle_return(driver.cuDriverGetVersion())
@@ -48,6 +50,7 @@ def get_gpu_driver_version() -> tuple[int, ...]:
     """
     if not HAS_WORKING_NVML:
         raise RuntimeError("NVML library is not available")
+    validate()
     return tuple(int(v) for v in nvml.system_get_driver_version().split("."))
 
 
@@ -65,6 +68,7 @@ def get_num_devices() -> int:
     Return the number of devices in the system.
     """
     if HAS_WORKING_NVML:
+        validate()
         return nvml.device_get_count_v2()
     else:
         return handle_return(runtime.cudaGetDeviceCount())
@@ -84,6 +88,7 @@ def get_process_name(pid: int) -> str:
     name: str
         The process name.
     """
+    validate()
     return nvml.system_get_process_name(pid)
 
 
diff --git a/cuda_core/docs/build_docs.sh b/cuda_core/docs/build_docs.sh
@@ -22,7 +22,7 @@ if [[ -z "${SPHINX_CUDA_CORE_VER}" ]]; then
 fi
 
 # build the docs (in parallel)
-SPHINXOPTS="-T -j 4 -d build/.doctrees" make html
+SPHINXOPTS="-j 4 -d build/.doctrees" make html
 
 # for debugging/developing (conf.py), please comment out the above line and
 # use the line below instead, as we must build in serial to avoid getting
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
@@ -64,11 +64,21 @@ CUDA compilation toolchain
    LinkerOptions
 
 
-CUDA system information
------------------------
+CUDA system information and Nvidia Management Library (NVML)
+------------------------------------------------------------
 
-.. automethod:: cuda.core._system.System.get_driver_version
-.. automethod:: cuda.core._system.System.get_num_devices
+.. autosummary::
+   :toctree: generated/
+
+   system.initialize
+   system.Device
+   system.DeviceArchitecture
+   system.get_driver_version
+   system.get_driver_version_full
+   system.get_gpu_driver_version
+   system.get_num_devices
+   system.get_nvml_version
+   system.get_process_name
 
 
 .. module:: cuda.core.utils
diff --git a/cuda_core/tests/system/conftest.py b/cuda_core/tests/system/conftest.py
@@ -9,3 +9,11 @@
 skip_if_nvml_unsupported = pytest.mark.skipif(
     not system.HAS_WORKING_NVML, reason="NVML support requires cuda.bindings version 12.9.6+ or 13.1.2+"
 )
+
+
+@pytest.fixture(autouse=True, scope="session")
+def initialize_nvml():
+    if system.HAS_WORKING_NVML:
+        from cuda.core.system._nvml_context import initialize
+
+        initialize()
diff --git a/cuda_core/tests/system/test_nvml_context.py b/cuda_core/tests/system/test_nvml_context.py
@@ -25,19 +25,20 @@ def _run_process(target):
     assert not p.exitcode
 
 
-def _test_initialized():
+def _test_uninitialized():
     from cuda.core.system import _nvml_context
 
-    assert _nvml_context._NVML_STATE == INITIALIZED
+    assert _nvml_context._NVML_STATE == UNINITIALIZED
 
 
-def test_initialized():
-    _run_process(_test_initialized)
+def test_uninitialized():
+    _run_process(_test_uninitialized)
 
 
 def _test_is_initialized():
     from cuda.core.system import _nvml_context
 
+    _nvml_context.initialize()
     assert _nvml_context._NVML_STATE == INITIALIZED
     assert _nvml_context.is_initialized() is True
 
@@ -46,20 +47,11 @@ def test_is_initialized():
     _run_process(_test_is_initialized)
 
 
-def _test_uninitialized():
-    from cuda.core.system import _nvml_context
-
-    _nvml_context._NVML_STATE = UNINITIALIZED
-    assert _nvml_context.is_initialized() is False
-
-
-def test_uninitialized():
-    _run_process(_test_uninitialized)
-
-
 def _test_wrong_owner():
     from cuda.core.system import _nvml_context
 
+    _nvml_context.initialize()
+
     _nvml_context._NVML_OWNER_PID = 0
     assert _nvml_context.is_initialized() is False
 
@@ -81,6 +73,8 @@ def test_wsl():
 def _test_validate():
     from cuda.core.system import _nvml_context
 
+    _nvml_context.initialize()
+
     assert _nvml_context.validate() is None
 
 
diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
@@ -19,7 +19,10 @@
 if system.HAS_WORKING_NVML:
     from cuda.bindings import _nvml as nvml
 
-    if system.get_num_devices() == 0:
+
+@pytest.fixture(autouse=True, scope="module")
+def check_gpu_available(initialize_nvml):
+    if not system.HAS_WORKING_NVML or system.get_num_devices() == 0:
         pytest.skip("No GPUs available to run device tests", allow_module_level=True)