Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions helm/overrides/values-a100-40gb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# GPU-specific overrides for A100 40GB (loaded by harness when --deployment-type helm --sku a100-40gb).
# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.a100-40gb.yaml.

nimOperator:
page_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR
value: "2"
- name: OMP_NUM_THREADS
value: "2"
- name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR
value: "1"
- name: NIM_ENABLE_OTEL
value: "true"
- name: NIM_OTEL_SERVICE_NAME
value: "page-elements"
- name: NIM_OTEL_TRACES_EXPORTER
value: "otlp"
- name: NIM_OTEL_METRICS_EXPORTER
value: "console"
- name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://otel-collector:4318"
- name: TRITON_OTEL_URL
value: "http://otel-collector:4318/v1/traces"
- name: TRITON_OTEL_RATE
value: "1"

graphic_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

table_structure:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

nemoretriever_ocr_v1:
env:
- name: OMP_NUM_THREADS
value: "8"
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"

llama_3_2_nv_rerankqa_1b_v2:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
85 changes: 85 additions & 0 deletions helm/overrides/values-a10g.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# GPU-specific overrides for A10G (loaded by harness when --deployment-type helm --sku a10g).
# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.a10g.yaml.

nimOperator:
page_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR
value: "2"
- name: OMP_NUM_THREADS
value: "2"
- name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR
value: "1"
- name: NIM_ENABLE_OTEL
value: "true"
- name: NIM_OTEL_SERVICE_NAME
value: "page-elements"
- name: NIM_OTEL_TRACES_EXPORTER
value: "otlp"
- name: NIM_OTEL_METRICS_EXPORTER
value: "console"
- name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://otel-collector:4318"
- name: TRITON_OTEL_URL
value: "http://otel-collector:4318/v1/traces"
- name: TRITON_OTEL_RATE
value: "1"

graphic_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

table_structure:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

nemoretriever_ocr_v1:
env:
- name: OMP_NUM_THREADS
value: "8"
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"

llama_3_2_nv_rerankqa_1b_v2:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
85 changes: 85 additions & 0 deletions helm/overrides/values-l40s.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# GPU-specific overrides for L40S (loaded by harness when --deployment-type helm --sku l40s).
# Sets NIM_TRITON_MAX_BATCH_SIZE=1 per NIM to match docker-compose.l40s.yaml.

nimOperator:
page_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CPU_THREADS_PRE_PROCESSOR
value: "2"
- name: OMP_NUM_THREADS
value: "2"
- name: NIM_TRITON_CPU_THREADS_POST_PROCESSOR
value: "1"
- name: NIM_ENABLE_OTEL
value: "true"
- name: NIM_OTEL_SERVICE_NAME
value: "page-elements"
- name: NIM_OTEL_TRACES_EXPORTER
value: "otlp"
- name: NIM_OTEL_METRICS_EXPORTER
value: "console"
- name: NIM_OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://otel-collector:4318"
- name: TRITON_OTEL_URL
value: "http://otel-collector:4318/v1/traces"
- name: TRITON_OTEL_RATE
value: "1"

graphic_elements:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

table_structure:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_RATE_LIMIT
value: "3"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
- name: NIM_TRITON_CUDA_MEMORY_POOL_MB
value: "2048"
- name: OMP_NUM_THREADS
value: "1"

nemoretriever_ocr_v1:
env:
- name: OMP_NUM_THREADS
value: "8"
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"

llama_3_2_nv_rerankqa_1b_v2:
env:
- name: NIM_HTTP_API_PORT
value: "8000"
- name: NIM_TRITON_LOG_VERBOSE
value: "1"
- name: NIM_TRITON_MAX_BATCH_SIZE
value: "1"
15 changes: 5 additions & 10 deletions tools/harness/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,7 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --no-dump-logs

### GPU-Specific Configuration (SKU Override)

The harness supports GPU-specific configuration overrides for Docker Compose deployments via the `--sku` option:
The harness supports GPU-specific configuration overrides via the `--sku` option for both Docker Compose and Helm deployments:

```bash
# A10G GPU settings
Expand All @@ -796,15 +796,10 @@ uv run nv-ingest-harness-run --case=e2e --dataset=bo767 --managed --sku=a100-40g
```

**How it works:**
- Loads GPU-specific override file: `docker-compose.<sku>.yaml`
- Merges with base `docker-compose.yaml` configuration
- Override settings take precedence (typically batch sizes, memory limits, etc.)
- Only applies to Docker Compose deployments (ignored for Helm)

**Available SKUs:**
- `a10g` - NVIDIA A10G GPU settings
- `l40s` - NVIDIA L40S GPU settings
- `a100-40gb` - NVIDIA A100 40GB GPU settings
- **Compose:** Loads `docker-compose.<sku>.yaml` and merges with base `docker-compose.yaml` (override takes precedence).
- **Helm:** Loads `helm/overrides/values-<sku>.yaml` via `helm upgrade -f`; merged with chart defaults (and any `helm_values_file` / `helm_values` still override).

**Available SKUs:** `a10g` (NVIDIA A10G), `l40s` (NVIDIA L40S), `a100-40gb` (NVIDIA A100 40GB).

## Nightly Benchmarks

Expand Down
37 changes: 28 additions & 9 deletions tools/harness/plans/SERVICE_MANAGER.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ tools/harness/src/nv_ingest_harness/service_manager/
- Installs/upgrades Helm releases with version support
- Supports both remote charts (from Helm repos) and local `./helm` chart
- Supports custom values files and inline values
- **SKU override**: Optional `helm/overrides/values-<sku>.yaml` via `-f` when `--sku` is set
- **Port-forwarding**: Automatically sets up resilient `kubectl port-forward` for services
- **Wildcard matching**: Supports dynamic service name patterns (e.g., `*embed*`)
- **Auto-restart**: Port-forwards automatically restart on pod restarts/failures
Expand All @@ -58,12 +59,11 @@ tools/harness/src/nv_ingest_harness/service_manager/
- Includes timestamps and handles multi-container pods

4. **create_service_manager()**: Factory function that creates the appropriate manager based on `deployment_type` config
- Accepts `sku` parameter for Docker Compose GPU-specific overrides
- Passes `sku` to `DockerComposeManager` constructor
- Accepts `sku` parameter; passes it to `DockerComposeManager` or `HelmManager` for GPU-specific overrides

## SKU Override Support

The service manager supports GPU-specific configuration overrides for Docker Compose deployments via the `--sku` CLI option.
The service manager supports GPU-specific configuration overrides via the `--sku` CLI option for both Docker Compose and Helm deployments.

### Implementation Details

Expand All @@ -74,8 +74,7 @@ The service manager supports GPU-specific configuration overrides for Docker Com

#### Service Manager Factory (`service_manager/__init__.py`)
- `create_service_manager()` accepts `sku` parameter
- Passes `sku` to `DockerComposeManager` constructor
- Ignored for Helm deployments (only applies to Docker Compose)
- Passes `sku` to `DockerComposeManager` (Compose) or `HelmManager` (Helm)

#### Docker Compose Manager (`service_manager/docker_compose.py`)
- Accepts `sku` parameter in `__init__()`
Expand All @@ -84,8 +83,14 @@ The service manager supports GPU-specific configuration overrides for Docker Com
- Uses `_build_compose_cmd()` helper to construct commands
- All docker compose commands include override file via multiple `-f` flags

#### Helm Manager (`service_manager/helm.py`)
- Accepts `sku` parameter in `__init__()`
- In `start()`, if `sku` is set and `helm/overrides/values-<sku>.yaml` exists, adds `-f <path>` to the `helm upgrade --install` command
- Prints warning if override file is specified but not found

### SKU Parameter Flow

**Compose:**
```
CLI (run.py/nightly.py)
└─> --sku=a10g
Expand All @@ -96,10 +101,18 @@ CLI (run.py/nightly.py)
└─> Used by: start(), stop(), dump_logs()
```

**Helm:**
```
CLI (run.py/nightly.py)
└─> --sku=a10g
└─> create_service_manager(config, repo_root, sku)
└─> HelmManager(config, repo_root, sku)
└─> start(): if helm/overrides/values-a10g.yaml exists, add -f <path> to helm upgrade --install
```

### Available SKU Override Files
- `docker-compose.a10g.yaml` - NVIDIA A10G GPU settings
- `docker-compose.a100-40gb.yaml` - NVIDIA A100 40GB GPU settings
- `docker-compose.l40s.yaml` - NVIDIA L40S GPU settings
- **Compose:** `docker-compose.a10g.yaml`, `docker-compose.l40s.yaml`, `docker-compose.a100-40gb.yaml` (repo root)
- **Helm:** `helm/overrides/values-a10g.yaml`, `helm/overrides/values-l40s.yaml`, `helm/overrides/values-a100-40gb.yaml`

### Usage Examples

Expand All @@ -117,6 +130,12 @@ python -m nv_ingest_harness.cli.nightly \
--deployment-type=compose \
--managed \
--sku=l40s

# Helm with A10G GPU override (loads helm/overrides/values-a10g.yaml)
python -m nv_ingest_harness.cli.run \
--dataset=bo767 --case=e2e --managed \
--deployment-type=helm \
--sku=a10g
```

### Override File Structure
Expand Down Expand Up @@ -330,7 +349,7 @@ The service manager is controlled via CLI flags:
- `--deployment-type=<type>`: Set deployment type (`compose` or `helm`)
- Overrides `deployment_type` in YAML config
- Defaults to `compose` if not specified in either place
- `--sku=<sku>`: GPU-specific override file for Docker Compose (e.g., `a10g`, `l40s`, `a100-40gb`)
- `--sku=<sku>`: GPU-specific override (Compose: `docker-compose.<sku>.yaml`; Helm: `helm/overrides/values-<sku>.yaml`). e.g. `a10g`, `l40s`, `a100-40gb`
- `--no-build`: Skip building Docker images (Docker Compose only)
- `--keep-up`: Keep services running after test completes (does not apply to port-forwards)
- `--doc-analysis`: Show per-document element breakdown in results
Expand Down
4 changes: 2 additions & 2 deletions tools/harness/src/nv_ingest_harness/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ def close(self):
"--sku",
type=str,
default=None,
help="GPU SKU for Docker Compose override file (e.g., a10g, a100-40gb, l40s). Only applies to managed Compose "
"services.",
help="GPU SKU for override file (Compose: docker-compose.<sku>.yaml; Helm: helm/overrides/values-<sku>.yaml). "
"Applies to managed Compose and Helm deployments (e.g., a10g, a100-40gb, l40s).",
)
@click.option(
"--dump-logs/--no-dump-logs",
Expand Down
Loading
Loading