From bb2197d89425c944cbb94dbb34d39da4aae4e17b Mon Sep 17 00:00:00 2001 From: Anurag Guda Date: Tue, 30 Dec 2025 14:57:37 -0600 Subject: [PATCH 1/4] NIM Operator DRA Support --- helm/README.md | 21 +++++++++++++++++ helm/templates/deployment.yaml | 16 +++++++++++-- helm/templates/dra.yaml | 23 +++++++++++++++++++ .../templates/llama-3.2-nv-embedqa-1b-v2.yaml | 2 +- .../llama-3.2-nv-rerankqa-1b-v2.yaml | 6 +++++ .../nemoretriever-graphic-elements-v1.yaml | 6 +++++ helm/templates/nemoretriever-ocr-v1.yaml | 9 +++++++- .../nemoretriever-page-elements-v3.yaml | 9 +++++++- .../nemoretriever-table-structure-v1.yaml | 6 +++++ helm/templates/nemotron-nano-12b-v2-vl.yaml | 6 +++++ helm/values.yaml | 20 +++++++++++++++- 11 files changed, 118 insertions(+), 6 deletions(-) create mode 100644 helm/templates/dra.yaml mode change 100644 => 100755 helm/templates/nemoretriever-page-elements-v3.yaml diff --git a/helm/README.md b/helm/README.md index b4880114d..ac79e0ca0 100644 --- a/helm/README.md +++ b/helm/README.md @@ -90,6 +90,27 @@ In this case, make sure to remove the following from your helm command: --set ngcApiSecret.password="${NGC_API_KEY}" \ ``` +### Install or Upgrade the Helm Chart with NVIDIA DRA Support + +To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. + +```bash +helm upgrade \ + --install \ + nv-ingest \ + https://helm.ngc.nvidia.com/nvidia/nemo-microservices/charts/nv-ingest-25.9.0.tgz \ + -n ${NAMESPACE} \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set ngcImagePullSecret.create=true \ + --set ngcImagePullSecret.password="${NGC_API_KEY}" \ + --set ngcApiSecret.create=true \ + --set ngcApiSecret.password="${NGC_API_KEY}" \ + --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ + --set image.tag="25.9.0" \ + --set nimOperator.draResources.enabled="true" +``` + ## Usage Jobs are submitted via the `nv-ingest-cli` command. diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 7f8d7e4ff..27867f73b 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -130,8 +130,15 @@ spec: readinessProbe: {{- toYaml (omit .Values.readinessProbe "enabled") | nindent 12 }} {{- end }} + {{- if .Values.nimOperator.draResources.enabled }} resources: - {{- toYaml .Values.resources | nindent 12 }} + claims: + - name: shared-gpu + {{- end }} + {{- if not .Values.nimOperator.draResources.enabled }} + resources: + {{ toYaml .Values.resources | nindent 12 }} + {{- end }} volumeMounts: - mountPath: /dev/shm name: dshm @@ -148,6 +155,11 @@ spec: nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.nimOperator.draResources.enabled }} + resourceClaims: + - name: shared-gpu + resourceClaimName: {{ .Values.nimOperator.draResources.name }} + {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} @@ -168,4 +180,4 @@ spec: - name: {{ $k }} {{- toYaml $v | nindent 10 }} {{- end }} - {{- end }} + {{- end }} \ No newline at end of file diff --git a/helm/templates/dra.yaml b/helm/templates/dra.yaml new file mode 100644 index 000000000..5bfd43179 --- /dev/null +++ b/helm/templates/dra.yaml @@ -0,0 +1,23 @@ +{{- $claimName := .Values.nimOperator.draResources.name -}} +{{- $existing := lookup "resource.k8s.io/v1beta2" "ResourceClaim" .Release.Namespace $claimName -}} +{{- if and + (.Capabilities.APIVersions.Has "resource.k8s.io/v1beta2") + (.Values.nimOperator.draResources.enabled) + (not $existing) +}} +apiVersion: resource.k8s.io/v1beta2 +kind: ResourceClaim +metadata: + name: {{ $claimName }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "5" +spec: + devices: + requests: + - name: {{ $claimName }}-gpu + exactly: + deviceClassName: gpu.nvidia.com + count: 1 + allocationMode: ExactCount +{{- end }} \ No newline at end of file diff --git a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml index e4479ca88..6f8d37598 100644 --- a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml @@ -44,4 +44,4 @@ spec: {{ toYaml .Values.nimOperator.embedqa.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.embedqa.env | nindent 4 }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml index 066fb9215..35b087e1c 100644 --- a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemoretriever-graphic-elements-v1.yaml b/helm/templates/nemoretriever-graphic-elements-v1.yaml index 2c1e97555..c32d63827 100644 --- a/helm/templates/nemoretriever-graphic-elements-v1.yaml +++ b/helm/templates/nemoretriever-graphic-elements-v1.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.graphic_elements.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.graphic_elements.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.graphic_elements.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.graphic_elements.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.graphic_elements.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemoretriever-ocr-v1.yaml b/helm/templates/nemoretriever-ocr-v1.yaml index 5cba31147..91187d12b 100644 --- a/helm/templates/nemoretriever-ocr-v1.yaml +++ b/helm/templates/nemoretriever-ocr-v1.yaml @@ -34,7 +34,14 @@ spec: name: nemoretriever-ocr-v1 replicas: {{ .Values.nimOperator.nemoretriever_ocr_v1.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.nodeSelector | nindent 4 }} - resources: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.resources | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} + resources: +{{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.tolerations | nindent 4 }} expose: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.env | nindent 4 }} diff --git a/helm/templates/nemoretriever-page-elements-v3.yaml b/helm/templates/nemoretriever-page-elements-v3.yaml old mode 100644 new mode 100755 index a2567bbf4..463880951 --- a/helm/templates/nemoretriever-page-elements-v3.yaml +++ b/helm/templates/nemoretriever-page-elements-v3.yaml @@ -36,7 +36,14 @@ spec: profile: '' replicas: {{ .Values.nimOperator.page_elements.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.page_elements.nodeSelector | nindent 4 }} - resources: {{ toYaml .Values.nimOperator.page_elements.resources | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.page_elements.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} + resources: +{{ toYaml .Values.nimOperator.page_elements.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.page_elements.tolerations | nindent 4 }} expose: {{ toYaml .Values.nimOperator.page_elements.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.page_elements.env | nindent 4 }} diff --git a/helm/templates/nemoretriever-table-structure-v1.yaml b/helm/templates/nemoretriever-table-structure-v1.yaml index 20fb7bb6f..8c1352f9c 100644 --- a/helm/templates/nemoretriever-table-structure-v1.yaml +++ b/helm/templates/nemoretriever-table-structure-v1.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.table_structure.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.table_structure.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.table_structure.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.table_structure.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.table_structure.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemotron-nano-12b-v2-vl.yaml b/helm/templates/nemotron-nano-12b-v2-vl.yaml index 1b5c1ca9d..b6d01c8a1 100644 --- a/helm/templates/nemotron-nano-12b-v2-vl.yaml +++ b/helm/templates/nemotron-nano-12b-v2-vl.yaml @@ -34,8 +34,14 @@ spec: nimCache: name: nemotron-nano-12b-v2-vl replicas: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.replicas }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.tolerations | nindent 4 }} expose: diff --git a/helm/values.yaml b/helm/values.yaml index 27c2b3d71..b5c107b72 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -606,6 +606,9 @@ containerArgs: [] ## @section Nim Operator parameters ## @param nimCache.pvc.storageClass Specifies the storage class for the PVCs nimOperator: + draResources: + enabled: false + name: "nvingest-claim" ## @section Nim Operator NimCache Configuration ## @param nimOperator.nimCache.pvc.storageClass [string] Storage class to use for all NimCache PVCs. Overrides per-model storageClass if set. nimCache: @@ -665,11 +668,12 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP port: 8000 - service: grpcPort: 8001 env: - name: NIM_HTTP_API_PORT @@ -733,6 +737,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -779,6 +785,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -825,6 +833,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -867,6 +877,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -915,6 +927,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -963,6 +977,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -1003,6 +1019,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP From 1687855966243e67b96ecf7b9a5f772c5697a52d Mon Sep 17 00:00:00 2001 From: Anurag Guda Date: Tue, 30 Dec 2025 15:04:33 -0600 Subject: [PATCH 2/4] NIM Operator DRA Support --- helm/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/README.md b/helm/README.md index ac79e0ca0..e42d5ae62 100644 --- a/helm/README.md +++ b/helm/README.md @@ -92,6 +92,8 @@ In this case, make sure to remove the following from your helm command: ### Install or Upgrade the Helm Chart with NVIDIA DRA Support +- Prerequisites: 1 GPU with at least 48GB memory + To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. ```bash From 8c23eec9414a3acc1237695e91cdb89c1dc17088 Mon Sep 17 00:00:00 2001 From: Anurag Guda Date: Tue, 30 Dec 2025 15:13:36 -0600 Subject: [PATCH 3/4] NIM Operator DRA Support Update --- helm/README.md | 29 +++++++++++++++++-- .../templates/llama-3.2-nv-embedqa-1b-v2.yaml | 8 ++++- 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/helm/README.md b/helm/README.md index e42d5ae62..16880a120 100644 --- a/helm/README.md +++ b/helm/README.md @@ -92,7 +92,32 @@ In this case, make sure to remove the following from your helm command: ### Install or Upgrade the Helm Chart with NVIDIA DRA Support -- Prerequisites: 1 GPU with at least 48GB memory +- Prerequisites: 1 GPU with at least 64GB memory + +create a `custom-values.yaml` with below content + +```yaml +nv-ingest: + nimOperator: + draResources: + enabled: true + name: "nvingest-claim" + embedqa: + draResources: + - resourceClaimName: nvingest-claim + page_elements: + draResources: + - resourceClaimName: nvingest-claim + graphic_elements: + draResources: + - resourceClaimName: nvingest-claim + table_structure: + draResources: + - resourceClaimName: nvingest-claim + nemoretriever_ocr_v1: + draResources: + - resourceClaimName: nvingest-claim +``` To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. @@ -110,7 +135,7 @@ helm upgrade \ --set ngcApiSecret.password="${NGC_API_KEY}" \ --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ --set image.tag="25.9.0" \ - --set nimOperator.draResources.enabled="true" + -f ./custom-values.yaml ``` ## Usage diff --git a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml index 6f8d37598..862d1293a 100644 --- a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.embedqa.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.embedqa.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: -{{ toYaml .Values.nimOperator.embedqa.resources | nindent 4 }} +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.embedqa.tolerations | nindent 4 }} expose: From d3ba651dff48fae9938bf9bef063b11cb2da7080 Mon Sep 17 00:00:00 2001 From: Jeremy Dyer Date: Tue, 13 Jan 2026 15:39:20 -0500 Subject: [PATCH 4/4] Add docs to gotmpl as well otherwise they would have been overridden --- helm/README.md | 46 ++++++++++++++++--------------------------- helm/README.md.gotmpl | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 29 deletions(-) diff --git a/helm/README.md b/helm/README.md index 9963a8c26..0415a64b0 100644 --- a/helm/README.md +++ b/helm/README.md @@ -9,20 +9,8 @@ This documentation contains documentation for the NV-Ingest Helm charts. Before you install the Helm charts, be sure you meet the hardware and software prerequisites. Refer to the [supported configurations](https://github.com/NVIDIA/nv-ingest?tab=readme-ov-file#hardware). -> Starting with version 26.1.0, the [NVIDIA NIM Operator](https://docs.nvidia.com/nim-operator/latest/install.html) is required. All NIM services are now deployed by using NIM Operator CRDs (NIMCache and NIMService), not Helm subcharts. -> -> **Upgrading from 25.9.0:** -> 1. Install NIM Operator before upgrading -> 2. Update your values file with the new configuration keys: -> -> | 25.9.0 | 26.x | -> |--------|------| -> | `nim-vlm-image-captioning.deployed=true` | `nimOperator.nemotron_nano_12b_v2_vl.enabled=true` | -> | `paddleocr-nim.deployed=true` | `nimOperator.nemoretriever_ocr_v1.enabled=true` | -> | `riva-nim.deployed=true` | `nimOperator.audio.enabled=true` | -> | `nim-vlm-text-extraction.deployed=true` | `nimOperator.nemotron_parse.enabled=true` | - -The [Nvidia GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html) must also be installed and configured in your cluster. +The [Nvidia nim-operator](https://docs.nvidia.com/nim-operator/latest/install.html) must also be installed and configured in your cluster to ensure that +the Nvidia NIMs are properly deployed. ## Initial Environment Setup @@ -106,7 +94,7 @@ In this case, make sure to remove the following from your helm command: - Prerequisites: 1 GPU with at least 64GB memory -create a `custom-values.yaml` with below content +create a `custom-values.yaml` with below content ```yaml nv-ingest: @@ -130,9 +118,7 @@ nv-ingest: draResources: - resourceClaimName: nvingest-claim ``` - To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. - ```bash helm upgrade \ --install \ @@ -476,7 +462,9 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.audio.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.audio.storage.pvc.create | bool | `true` | | | nimOperator.audio.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.audio.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.audio.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | +| nimOperator.draResources.enabled | bool | `false` | | +| nimOperator.draResources.name | string | `"nvingest-claim"` | | | nimOperator.embedqa.authSecret | string | `"ngc-api"` | | | nimOperator.embedqa.enabled | bool | `true` | | | nimOperator.embedqa.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -493,12 +481,12 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.embedqa.image.pullPolicy | string | `"IfNotPresent"` | | | nimOperator.embedqa.image.pullSecrets[0] | string | `"ngc-secret"` | | | nimOperator.embedqa.image.repository | string | `"nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2"` | | -| nimOperator.embedqa.image.tag | string | `"1.10.0"` | | +| nimOperator.embedqa.image.tag | string | `"1.10.1"` | | | nimOperator.embedqa.replicas | int | `1` | | | nimOperator.embedqa.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.embedqa.storage.pvc.create | bool | `true` | | | nimOperator.embedqa.storage.pvc.size | string | `"50Gi"` | | -| nimOperator.embedqa.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.embedqa.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.graphic_elements.authSecret | string | `"ngc-api"` | | | nimOperator.graphic_elements.enabled | bool | `true` | | | nimOperator.graphic_elements.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -524,7 +512,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.graphic_elements.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.graphic_elements.storage.pvc.create | bool | `true` | | | nimOperator.graphic_elements.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.graphic_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.graphic_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.authSecret | string | `"ngc-api"` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.enabled | bool | `false` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.env | list | `[]` | | @@ -539,7 +527,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.create | bool | `true` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.size | string | `"50Gi"` | | -| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemoretriever_ocr_v1.authSecret | string | `"ngc-api"` | | | nimOperator.nemoretriever_ocr_v1.enabled | bool | `true` | | | nimOperator.nemoretriever_ocr_v1.env[0].name | string | `"OMP_NUM_THREADS"` | | @@ -567,7 +555,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemoretriever_ocr_v1.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemoretriever_ocr_v1.storage.pvc.create | bool | `true` | | | nimOperator.nemoretriever_ocr_v1.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.nemoretriever_ocr_v1.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemoretriever_ocr_v1.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemotron_nano_12b_v2_vl.authSecret | string | `"ngc-api"` | | | nimOperator.nemotron_nano_12b_v2_vl.enabled | bool | `false` | | | nimOperator.nemotron_nano_12b_v2_vl.expose.service.grpcPort | int | `8001` | | @@ -581,7 +569,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemotron_nano_12b_v2_vl.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.create | bool | `true` | | | nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.size | string | `"300Gi"` | | -| nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemotron_parse.authSecret | string | `"ngc-api"` | | | nimOperator.nemotron_parse.enabled | bool | `false` | | | nimOperator.nemotron_parse.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -599,11 +587,11 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemotron_parse.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemotron_parse.storage.pvc.create | bool | `true` | | | nimOperator.nemotron_parse.storage.pvc.size | string | `"100Gi"` | | -| nimOperator.nemotron_parse.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemotron_parse.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nimCache.pvc.create | bool | `true` | | | nimOperator.nimCache.pvc.size | string | `"25Gi"` | | | nimOperator.nimCache.pvc.storageClass | string | `"default"` | | -| nimOperator.nimCache.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nimCache.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nimService.namespaces | list | `[]` | | | nimOperator.nimService.resources | object | `{}` | | | nimOperator.page_elements.authSecret | string | `"ngc-api"` | | @@ -651,7 +639,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.page_elements.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.page_elements.storage.pvc.create | bool | `true` | | | nimOperator.page_elements.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.page_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.page_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.table_structure.authSecret | string | `"ngc-api"` | | | nimOperator.table_structure.enabled | bool | `true` | | | nimOperator.table_structure.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -677,7 +665,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.table_structure.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.table_structure.storage.pvc.create | bool | `true` | | | nimOperator.table_structure.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.table_structure.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.table_structure.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nodeSelector | object | `{}` | | | opentelemetry-collector.config.exporters.debug.verbosity | string | `"detailed"` | | | opentelemetry-collector.config.exporters.zipkin.endpoint | string | `"http://nv-ingest-zipkin:9411/api/v2/spans"` | | @@ -730,7 +718,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | prometheus.alertmanager.enabled | bool | `false` | | | prometheus.enabled | bool | `false` | | | prometheus.server.enabled | bool | `false` | | -| readinessProbe.enabled | bool | `false` | | +| readinessProbe.enabled | bool | `true` | | | readinessProbe.failureThreshold | int | `220` | | | readinessProbe.httpGet.path | string | `"/v1/health/ready"` | | | readinessProbe.httpGet.port | string | `"http"` | | diff --git a/helm/README.md.gotmpl b/helm/README.md.gotmpl index 48e79c6b4..b8d532065 100644 --- a/helm/README.md.gotmpl +++ b/helm/README.md.gotmpl @@ -92,6 +92,52 @@ In this case, make sure to remove the following from your helm command: --set ngcApiSecret.password="${NGC_API_KEY}" \ ``` +### Install or Upgrade the Helm Chart with NVIDIA DRA Support + +- Prerequisites: 1 GPU with at least 64GB memory + +create a `custom-values.yaml` with below content + +```yaml +nv-ingest: + nimOperator: + draResources: + enabled: true + name: "nvingest-claim" + embedqa: + draResources: + - resourceClaimName: nvingest-claim + page_elements: + draResources: + - resourceClaimName: nvingest-claim + graphic_elements: + draResources: + - resourceClaimName: nvingest-claim + table_structure: + draResources: + - resourceClaimName: nvingest-claim + nemoretriever_ocr_v1: + draResources: + - resourceClaimName: nvingest-claim +``` +To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. +```bash +helm upgrade \ + --install \ + nv-ingest \ + https://helm.ngc.nvidia.com/nvidia/nemo-microservices/charts/nv-ingest-25.9.0.tgz \ + -n ${NAMESPACE} \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set ngcImagePullSecret.create=true \ + --set ngcImagePullSecret.password="${NGC_API_KEY}" \ + --set ngcApiSecret.create=true \ + --set ngcApiSecret.password="${NGC_API_KEY}" \ + --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ + --set image.tag="25.9.0" \ + -f ./custom-values.yaml +``` + ## Usage Jobs are submitted via the `nv-ingest-cli` command.