From f4b6147a60a84013e5d67cff1d1cd0b3d1075a7c Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 18 Feb 2026 16:00:30 +0000 Subject: [PATCH] Add Helm override files to match Compose --- helm/overrides/values-a100-40gb.yaml | 85 +++++++++++++++++++ helm/overrides/values-a10g.yaml | 85 +++++++++++++++++++ helm/overrides/values-l40s.yaml | 85 +++++++++++++++++++ tools/harness/README.md | 15 ++-- tools/harness/plans/SERVICE_MANAGER.md | 37 ++++++-- .../harness/src/nv_ingest_harness/cli/run.py | 4 +- .../service_manager/__init__.py | 5 +- .../nv_ingest_harness/service_manager/helm.py | 13 ++- 8 files changed, 305 insertions(+), 24 deletions(-) create mode 100644 helm/overrides/values-a100-40gb.yaml create mode 100644 helm/overrides/values-a10g.yaml create mode 100644 helm/overrides/values-l40s.yaml diff --git a/helm/overrides/values-a100-40gb.yaml b/helm/overrides/values-a100-40gb.yaml new file mode 100644 index 000000000..003c234ba --- /dev/null +++ b/helm/overrides/values-a100-40gb.yaml @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# GPU-specific overrides for A100 40GB (loaded by harness when --deployment-type helm --sku a100-40gb). +# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.a100-40gb.yaml. + +nimOperator: + page_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR + value: "2" + - name: OMP_NUM_THREADS + value: "2" + - name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR + value: "1" + - name: NIM_ENABLE_OTEL + value: "true" + - name: NIM_OTEL_SERVICE_NAME + value: "page-elements" + - name: NIM_OTEL_TRACES_EXPORTER + value: "otlp" + - name: NIM_OTEL_METRICS_EXPORTER + value: "console" + - name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4318" + - name: TRITON_OTEL_URL + value: "http://otel-collector:4318/v1/traces" + - name: TRITON_OTEL_RATE + value: "1" + + graphic_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + table_structure: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + nemoretriever_ocr_v1: + env: + - name: OMP_NUM_THREADS + value: "8" + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + + llama_3_2_nv_rerankqa_1b_v2: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" diff --git a/helm/overrides/values-a10g.yaml b/helm/overrides/values-a10g.yaml new file mode 100644 index 000000000..2c78459f2 --- /dev/null +++ b/helm/overrides/values-a10g.yaml @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# GPU-specific overrides for A10G (loaded by harness when --deployment-type helm --sku a10g). +# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.a10g.yaml. + +nimOperator: + page_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR + value: "2" + - name: OMP_NUM_THREADS + value: "2" + - name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR + value: "1" + - name: NIM_ENABLE_OTEL + value: "true" + - name: NIM_OTEL_SERVICE_NAME + value: "page-elements" + - name: NIM_OTEL_TRACES_EXPORTER + value: "otlp" + - name: NIM_OTEL_METRICS_EXPORTER + value: "console" + - name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4318" + - name: TRITON_OTEL_URL + value: "http://otel-collector:4318/v1/traces" + - name: TRITON_OTEL_RATE + value: "1" + + graphic_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + table_structure: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + nemoretriever_ocr_v1: + env: + - name: OMP_NUM_THREADS + value: "8" + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + + llama_3_2_nv_rerankqa_1b_v2: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" diff --git a/helm/overrides/values-l40s.yaml b/helm/overrides/values-l40s.yaml new file mode 100644 index 000000000..dc17c93aa --- /dev/null +++ b/helm/overrides/values-l40s.yaml @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# GPU-specific overrides for L40S (loaded by harness when --deployment-type helm --sku l40s). +# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.l40s.yaml. + +nimOperator: + page_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR + value: "2" + - name: OMP_NUM_THREADS + value: "2" + - name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR + value: "1" + - name: NIM_ENABLE_OTEL + value: "true" + - name: NIM_OTEL_SERVICE_NAME + value: "page-elements" + - name: NIM_OTEL_TRACES_EXPORTER + value: "otlp" + - name: NIM_OTEL_METRICS_EXPORTER + value: "console" + - name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-collector:4318" + - name: TRITON_OTEL_URL + value: "http://otel-collector:4318/v1/traces" + - name: TRITON_OTEL_RATE + value: "1" + + graphic_elements: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + table_structure: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_RATE_LIMIT + value: "3" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + - name: NIM_TRITON_CUDA_MEMORY_POOL_MB + value: "2048" + - name: OMP_NUM_THREADS + value: "1" + + nemoretriever_ocr_v1: + env: + - name: OMP_NUM_THREADS + value: "8" + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" + + llama_3_2_nv_rerankqa_1b_v2: + env: + - name: NIM_HTTP_API_PORT + value: "8000" + - name: NIM_TRITON_LOG_VERBOSE + value: "1" + - name: NIM_TRITON_MAX_BATCH_SIZE + value: "1" diff --git a/tools/harness/README.md b/tools/harness/README.md index 6178125f9..004506cc4 100644 --- a/tools/harness/README.md +++ b/tools/harness/README.md @@ -782,7 +782,7 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --no-dump-logs ### GPU-Specific Configuration (SKU Override) -The harness supports GPU-specific configuration overrides for Docker Compose deployments via the `--sku` option: +The harness supports GPU-specific configuration overrides via the `--sku` option for both Docker Compose and Helm deployments: ```bash # A10G GPU settings @@ -796,15 +796,10 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --sku=a100-40g ``` **How it works:** -- Loads GPU-specific override file: `docker-compose..yaml` -- Merges with base `docker-compose.yaml` configuration -- Override settings take precedence (typically batch sizes, memory limits, etc.) -- Only applies to Docker Compose deployments (ignored for Helm) - -**Available SKUs:** -- `a10g` - NVIDIA A10G GPU settings -- `l40s` - NVIDIA L40S GPU settings -- `a100-40gb` - NVIDIA A100 40GB GPU settings +- **Compose:** Loads `docker-compose..yaml` and merges with base `docker-compose.yaml` (override takes precedence). +- **Helm:** Loads `helm/overrides/values-.yaml` via `helm upgrade -f`; merged with chart defaults (and any `helm_values_file` / `helm_values` still override). + +**Available SKUs:** `a10g` (NVIDIA A10G), `l40s` (NVIDIA L40S), `a100-40gb` (NVIDIA A100 40GB). ## Nightly Benchmarks diff --git a/tools/harness/plans/SERVICE_MANAGER.md b/tools/harness/plans/SERVICE_MANAGER.md index 067b2b48c..38ba9c59a 100644 --- a/tools/harness/plans/SERVICE_MANAGER.md +++ b/tools/harness/plans/SERVICE_MANAGER.md @@ -45,6 +45,7 @@ tools/harness/src/nv_ingest_harness/service_manager/ - Installs/upgrades Helm releases with version support - Supports both remote charts (from Helm repos) and local `./helm` chart - Supports custom values files and inline values + - **SKU override**: Optional `helm/overrides/values-.yaml` via `-f` when `--sku` is set - **Port-forwarding**: Automatically sets up resilient `kubectl port-forward` for services - **Wildcard matching**: Supports dynamic service name patterns (e.g., `*embed*`) - **Auto-restart**: Port-forwards automatically restart on pod restarts/failures @@ -58,12 +59,11 @@ tools/harness/src/nv_ingest_harness/service_manager/ - Includes timestamps and handles multi-container pods 4. **create_service_manager()**: Factory function that creates the appropriate manager based on `deployment_type` config - - Accepts `sku` parameter for Docker Compose GPU-specific overrides - - Passes `sku` to `DockerComposeManager` constructor + - Accepts `sku` parameter; passes it to `DockerComposeManager` or `HelmManager` for GPU-specific overrides ## SKU Override Support -The service manager supports GPU-specific configuration overrides for Docker Compose deployments via the `--sku` CLI option. +The service manager supports GPU-specific configuration overrides via the `--sku` CLI option for both Docker Compose and Helm deployments. ### Implementation Details @@ -74,8 +74,7 @@ The service manager supports GPU-specific configuration overrides for Docker Com #### Service Manager Factory (`service_manager/__init__.py`) - `create_service_manager()` accepts `sku` parameter -- Passes `sku` to `DockerComposeManager` constructor -- Ignored for Helm deployments (only applies to Docker Compose) +- Passes `sku` to `DockerComposeManager` (Compose) or `HelmManager` (Helm) #### Docker Compose Manager (`service_manager/docker_compose.py`) - Accepts `sku` parameter in `__init__()` @@ -84,8 +83,14 @@ The service manager supports GPU-specific configuration overrides for Docker Com - Uses `_build_compose_cmd()` helper to construct commands - All docker compose commands include override file via multiple `-f` flags +#### Helm Manager (`service_manager/helm.py`) +- Accepts `sku` parameter in `__init__()` +- In `start()`, if `sku` is set and `helm/overrides/values-.yaml` exists, adds `-f ` to the `helm upgrade --install` command +- Prints warning if override file is specified but not found + ### SKU Parameter Flow +**Compose:** ``` CLI (run.py/nightly.py) └─> --sku=a10g @@ -96,10 +101,18 @@ CLI (run.py/nightly.py) └─> Used by: start(), stop(), dump_logs() ``` +**Helm:** +``` +CLI (run.py/nightly.py) + └─> --sku=a10g + └─> create_service_manager(config, repo_root, sku) + └─> HelmManager(config, repo_root, sku) + └─> start(): if helm/overrides/values-a10g.yaml exists, add -f to helm upgrade --install +``` + ### Available SKU Override Files -- `docker-compose.a10g.yaml` - NVIDIA A10G GPU settings -- `docker-compose.a100-40gb.yaml` - NVIDIA A100 40GB GPU settings -- `docker-compose.l40s.yaml` - NVIDIA L40S GPU settings +- **Compose:** `docker-compose.a10g.yaml`, `docker-compose.l40s.yaml`, `docker-compose.a100-40gb.yaml` (repo root) +- **Helm:** `helm/overrides/values-a10g.yaml`, `helm/overrides/values-l40s.yaml`, `helm/overrides/values-a100-40gb.yaml` ### Usage Examples @@ -117,6 +130,12 @@ python -m nv_ingest_harness.cli.nightly \ --deployment-type=compose \ --managed \ --sku=l40s + +# Helm with A10G GPU override (loads helm/overrides/values-a10g.yaml) +python -m nv_ingest_harness.cli.run \ + --dataset=bo767 --case=e2e --managed \ + --deployment-type=helm \ + --sku=a10g ``` ### Override File Structure @@ -330,7 +349,7 @@ The service manager is controlled via CLI flags: - `--deployment-type=`: Set deployment type (`compose` or `helm`) - Overrides `deployment_type` in YAML config - Defaults to `compose` if not specified in either place -- `--sku=`: GPU-specific override file for Docker Compose (e.g., `a10g`, `l40s`, `a100-40gb`) +- `--sku=`: GPU-specific override (Compose: `docker-compose..yaml`; Helm: `helm/overrides/values-.yaml`). e.g. `a10g`, `l40s`, `a100-40gb` - `--no-build`: Skip building Docker images (Docker Compose only) - `--keep-up`: Keep services running after test completes (does not apply to port-forwards) - `--doc-analysis`: Show per-document element breakdown in results diff --git a/tools/harness/src/nv_ingest_harness/cli/run.py b/tools/harness/src/nv_ingest_harness/cli/run.py index 6dc4b155d..6c13e55b0 100644 --- a/tools/harness/src/nv_ingest_harness/cli/run.py +++ b/tools/harness/src/nv_ingest_harness/cli/run.py @@ -354,8 +354,8 @@ def close(self): "--sku", type=str, default=None, - help="GPU SKU for Docker Compose override file (e.g., a10g, a100-40gb, l40s). Only applies to managed Compose " - "services.", + help="GPU SKU for override file (Compose: docker-compose..yaml; Helm: helm/overrides/values-.yaml). " + "Applies to managed Compose and Helm deployments (e.g., a10g, a100-40gb, l40s).", ) @click.option( "--dump-logs/--no-dump-logs", diff --git a/tools/harness/src/nv_ingest_harness/service_manager/__init__.py b/tools/harness/src/nv_ingest_harness/service_manager/__init__.py index d7d617c80..1f10d7e9b 100644 --- a/tools/harness/src/nv_ingest_harness/service_manager/__init__.py +++ b/tools/harness/src/nv_ingest_harness/service_manager/__init__.py @@ -14,7 +14,8 @@ def create_service_manager(config, repo_root: Path, sku: str | None = None) -> S Args: config: Configuration object with deployment_type attribute repo_root: Path to the repository root - sku: Optional GPU SKU for Docker Compose override file (e.g., a10g, a100-40gb, l40s) + sku: Optional GPU SKU for override file (Compose: docker-compose..yaml; + Helm: helm/overrides/values-.yaml) Returns: ServiceManager instance (DockerComposeManager or HelmManager) @@ -27,7 +28,7 @@ def create_service_manager(config, repo_root: Path, sku: str | None = None) -> S if deployment_type == "compose": return DockerComposeManager(config, repo_root, sku=sku) elif deployment_type == "helm": - return HelmManager(config, repo_root) + return HelmManager(config, repo_root, sku=sku) else: raise ValueError(f"Unknown deployment_type: {deployment_type}. Must be 'compose' or 'helm'") diff --git a/tools/harness/src/nv_ingest_harness/service_manager/helm.py b/tools/harness/src/nv_ingest_harness/service_manager/helm.py index 40a00ccd3..0fc8d7ba1 100644 --- a/tools/harness/src/nv_ingest_harness/service_manager/helm.py +++ b/tools/harness/src/nv_ingest_harness/service_manager/helm.py @@ -72,15 +72,17 @@ def _add_values_to_command(cmd: list, values_dict: dict) -> list: cmd += ["--set", f"{key}={str_value}"] return cmd - def __init__(self, config, repo_root: Path): + def __init__(self, config, repo_root: Path, sku: str | None = None): """ Initialize Helm manager. Args: config: Configuration object with Helm settings repo_root: Path to the repository root + sku: Optional GPU SKU for values override file (e.g., a10g, a100-40gb, l40s) """ super().__init__(config, repo_root) + self.sku = sku # Helm binary command (supports "helm", "microk8s helm", "k3s helm", etc.) helm_bin = getattr(config, "helm_bin", "helm") helm_sudo = getattr(config, "helm_sudo", False) @@ -137,6 +139,15 @@ def start(self, no_build: bool = False) -> int: if self.chart_version: cmd += ["--version", self.chart_version] + # Add GPU SKU values override file if specified (helm/overrides/values-.yaml) + if self.sku: + sku_values_path = self.repo_root / "helm" / "overrides" / f"values-{self.sku}.yaml" + if sku_values_path.exists(): + cmd += ["-f", str(sku_values_path)] + print(f"Using Helm values override: {sku_values_path}") + else: + print(f"Warning: Helm SKU override file not found: {sku_values_path}") + # Parse and add values from YAML file if specified if self.values_file: values_path = self.repo_root / self.values_file