diff --git a/helm/README.md b/helm/README.md index b1280130a..0415a64b0 100644 --- a/helm/README.md +++ b/helm/README.md @@ -9,20 +9,8 @@ This documentation contains documentation for the NV-Ingest Helm charts. Before you install the Helm charts, be sure you meet the hardware and software prerequisites. Refer to the [supported configurations](https://github.com/NVIDIA/nv-ingest?tab=readme-ov-file#hardware). -> Starting with version 26.1.0, the [NVIDIA NIM Operator](https://docs.nvidia.com/nim-operator/latest/install.html) is required. All NIM services are now deployed by using NIM Operator CRDs (NIMCache and NIMService), not Helm subcharts. -> -> **Upgrading from 25.9.0:** -> 1. Install NIM Operator before upgrading -> 2. Update your values file with the new configuration keys: -> -> | 25.9.0 | 26.x | -> |--------|------| -> | `nim-vlm-image-captioning.deployed=true` | `nimOperator.nemotron_nano_12b_v2_vl.enabled=true` | -> | `paddleocr-nim.deployed=true` | `nimOperator.nemoretriever_ocr_v1.enabled=true` | -> | `riva-nim.deployed=true` | `nimOperator.audio.enabled=true` | -> | `nim-vlm-text-extraction.deployed=true` | `nimOperator.nemotron_parse.enabled=true` | - -The [Nvidia GPU Operator](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html) must also be installed and configured in your cluster. +The [Nvidia nim-operator](https://docs.nvidia.com/nim-operator/latest/install.html) must also be installed and configured in your cluster to ensure that +the Nvidia NIMs are properly deployed. ## Initial Environment Setup @@ -102,6 +90,52 @@ In this case, make sure to remove the following from your helm command: --set ngcApiSecret.password="${NGC_API_KEY}" \ ``` +### Install or Upgrade the Helm Chart with NVIDIA DRA Support + +- Prerequisites: 1 GPU with at least 64GB memory + +create a `custom-values.yaml` with below content + +```yaml +nv-ingest: + nimOperator: + draResources: + enabled: true + name: "nvingest-claim" + embedqa: + draResources: + - resourceClaimName: nvingest-claim + page_elements: + draResources: + - resourceClaimName: nvingest-claim + graphic_elements: + draResources: + - resourceClaimName: nvingest-claim + table_structure: + draResources: + - resourceClaimName: nvingest-claim + nemoretriever_ocr_v1: + draResources: + - resourceClaimName: nvingest-claim +``` +To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. +```bash +helm upgrade \ + --install \ + nv-ingest \ + https://helm.ngc.nvidia.com/nvidia/nemo-microservices/charts/nv-ingest-25.9.0.tgz \ + -n ${NAMESPACE} \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set ngcImagePullSecret.create=true \ + --set ngcImagePullSecret.password="${NGC_API_KEY}" \ + --set ngcApiSecret.create=true \ + --set ngcApiSecret.password="${NGC_API_KEY}" \ + --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ + --set image.tag="25.9.0" \ + -f ./custom-values.yaml +``` + ## Usage Jobs are submitted via the `nv-ingest-cli` command. @@ -428,7 +462,9 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.audio.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.audio.storage.pvc.create | bool | `true` | | | nimOperator.audio.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.audio.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.audio.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | +| nimOperator.draResources.enabled | bool | `false` | | +| nimOperator.draResources.name | string | `"nvingest-claim"` | | | nimOperator.embedqa.authSecret | string | `"ngc-api"` | | | nimOperator.embedqa.enabled | bool | `true` | | | nimOperator.embedqa.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -445,12 +481,12 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.embedqa.image.pullPolicy | string | `"IfNotPresent"` | | | nimOperator.embedqa.image.pullSecrets[0] | string | `"ngc-secret"` | | | nimOperator.embedqa.image.repository | string | `"nvcr.io/nim/nvidia/llama-3.2-nv-embedqa-1b-v2"` | | -| nimOperator.embedqa.image.tag | string | `"1.10.0"` | | +| nimOperator.embedqa.image.tag | string | `"1.10.1"` | | | nimOperator.embedqa.replicas | int | `1` | | | nimOperator.embedqa.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.embedqa.storage.pvc.create | bool | `true` | | | nimOperator.embedqa.storage.pvc.size | string | `"50Gi"` | | -| nimOperator.embedqa.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.embedqa.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.graphic_elements.authSecret | string | `"ngc-api"` | | | nimOperator.graphic_elements.enabled | bool | `true` | | | nimOperator.graphic_elements.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -476,7 +512,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.graphic_elements.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.graphic_elements.storage.pvc.create | bool | `true` | | | nimOperator.graphic_elements.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.graphic_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.graphic_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.authSecret | string | `"ngc-api"` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.enabled | bool | `false` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.env | list | `[]` | | @@ -491,7 +527,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.create | bool | `true` | | | nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.size | string | `"50Gi"` | | -| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.llama_3_2_nv_rerankqa_1b_v2.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemoretriever_ocr_v1.authSecret | string | `"ngc-api"` | | | nimOperator.nemoretriever_ocr_v1.enabled | bool | `true` | | | nimOperator.nemoretriever_ocr_v1.env[0].name | string | `"OMP_NUM_THREADS"` | | @@ -519,7 +555,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemoretriever_ocr_v1.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemoretriever_ocr_v1.storage.pvc.create | bool | `true` | | | nimOperator.nemoretriever_ocr_v1.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.nemoretriever_ocr_v1.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemoretriever_ocr_v1.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemotron_nano_12b_v2_vl.authSecret | string | `"ngc-api"` | | | nimOperator.nemotron_nano_12b_v2_vl.enabled | bool | `false` | | | nimOperator.nemotron_nano_12b_v2_vl.expose.service.grpcPort | int | `8001` | | @@ -533,7 +569,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemotron_nano_12b_v2_vl.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.create | bool | `true` | | | nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.size | string | `"300Gi"` | | -| nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemotron_nano_12b_v2_vl.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nemotron_parse.authSecret | string | `"ngc-api"` | | | nimOperator.nemotron_parse.enabled | bool | `false` | | | nimOperator.nemotron_parse.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -551,11 +587,11 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.nemotron_parse.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.nemotron_parse.storage.pvc.create | bool | `true` | | | nimOperator.nemotron_parse.storage.pvc.size | string | `"100Gi"` | | -| nimOperator.nemotron_parse.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nemotron_parse.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nimCache.pvc.create | bool | `true` | | | nimOperator.nimCache.pvc.size | string | `"25Gi"` | | | nimOperator.nimCache.pvc.storageClass | string | `"default"` | | -| nimOperator.nimCache.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.nimCache.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.nimService.namespaces | list | `[]` | | | nimOperator.nimService.resources | object | `{}` | | | nimOperator.page_elements.authSecret | string | `"ngc-api"` | | @@ -603,7 +639,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.page_elements.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.page_elements.storage.pvc.create | bool | `true` | | | nimOperator.page_elements.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.page_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.page_elements.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nimOperator.table_structure.authSecret | string | `"ngc-api"` | | | nimOperator.table_structure.enabled | bool | `true` | | | nimOperator.table_structure.env[0].name | string | `"NIM_HTTP_API_PORT"` | | @@ -629,7 +665,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | nimOperator.table_structure.resources.limits."nvidia.com/gpu" | int | `1` | | | nimOperator.table_structure.storage.pvc.create | bool | `true` | | | nimOperator.table_structure.storage.pvc.size | string | `"25Gi"` | | -| nimOperator.table_structure.storage.pvc.volumeAccessMode | string | `"ReadWriteMany"` | | +| nimOperator.table_structure.storage.pvc.volumeAccessMode | string | `"ReadWriteOnce"` | | | nodeSelector | object | `{}` | | | opentelemetry-collector.config.exporters.debug.verbosity | string | `"detailed"` | | | opentelemetry-collector.config.exporters.zipkin.endpoint | string | `"http://nv-ingest-zipkin:9411/api/v2/spans"` | | @@ -682,7 +718,7 @@ You can also use NV-Ingest's Python client API to interact with the service runn | prometheus.alertmanager.enabled | bool | `false` | | | prometheus.enabled | bool | `false` | | | prometheus.server.enabled | bool | `false` | | -| readinessProbe.enabled | bool | `false` | | +| readinessProbe.enabled | bool | `true` | | | readinessProbe.failureThreshold | int | `220` | | | readinessProbe.httpGet.path | string | `"/v1/health/ready"` | | | readinessProbe.httpGet.port | string | `"http"` | | diff --git a/helm/README.md.gotmpl b/helm/README.md.gotmpl index 48e79c6b4..b8d532065 100644 --- a/helm/README.md.gotmpl +++ b/helm/README.md.gotmpl @@ -92,6 +92,52 @@ In this case, make sure to remove the following from your helm command: --set ngcApiSecret.password="${NGC_API_KEY}" \ ``` +### Install or Upgrade the Helm Chart with NVIDIA DRA Support + +- Prerequisites: 1 GPU with at least 64GB memory + +create a `custom-values.yaml` with below content + +```yaml +nv-ingest: + nimOperator: + draResources: + enabled: true + name: "nvingest-claim" + embedqa: + draResources: + - resourceClaimName: nvingest-claim + page_elements: + draResources: + - resourceClaimName: nvingest-claim + graphic_elements: + draResources: + - resourceClaimName: nvingest-claim + table_structure: + draResources: + - resourceClaimName: nvingest-claim + nemoretriever_ocr_v1: + draResources: + - resourceClaimName: nvingest-claim +``` +To install or upgrade the Helm chart with [NVIDIA DRA](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/dra-intro-install.html), run the following code. +```bash +helm upgrade \ + --install \ + nv-ingest \ + https://helm.ngc.nvidia.com/nvidia/nemo-microservices/charts/nv-ingest-25.9.0.tgz \ + -n ${NAMESPACE} \ + --username '$oauthtoken' \ + --password "${NGC_API_KEY}" \ + --set ngcImagePullSecret.create=true \ + --set ngcImagePullSecret.password="${NGC_API_KEY}" \ + --set ngcApiSecret.create=true \ + --set ngcApiSecret.password="${NGC_API_KEY}" \ + --set image.repository="nvcr.io/nvidia/nemo-microservices/nv-ingest" \ + --set image.tag="25.9.0" \ + -f ./custom-values.yaml +``` + ## Usage Jobs are submitted via the `nv-ingest-cli` command. diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 29307fdd8..5872a582d 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -145,8 +145,15 @@ spec: readinessProbe: {{- toYaml (omit .Values.readinessProbe "enabled") | nindent 12 }} {{- end }} + {{- if .Values.nimOperator.draResources.enabled }} resources: - {{- toYaml .Values.resources | nindent 12 }} + claims: + - name: shared-gpu + {{- end }} + {{- if not .Values.nimOperator.draResources.enabled }} + resources: + {{ toYaml .Values.resources | nindent 12 }} + {{- end }} volumeMounts: - mountPath: /dev/shm name: dshm @@ -163,6 +170,11 @@ spec: nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.nimOperator.draResources.enabled }} + resourceClaims: + - name: shared-gpu + resourceClaimName: {{ .Values.nimOperator.draResources.name }} + {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} @@ -183,4 +195,4 @@ spec: - name: {{ $k }} {{- toYaml $v | nindent 10 }} {{- end }} - {{- end }} + {{- end }} \ No newline at end of file diff --git a/helm/templates/dra.yaml b/helm/templates/dra.yaml new file mode 100644 index 000000000..5bfd43179 --- /dev/null +++ b/helm/templates/dra.yaml @@ -0,0 +1,23 @@ +{{- $claimName := .Values.nimOperator.draResources.name -}} +{{- $existing := lookup "resource.k8s.io/v1beta2" "ResourceClaim" .Release.Namespace $claimName -}} +{{- if and + (.Capabilities.APIVersions.Has "resource.k8s.io/v1beta2") + (.Values.nimOperator.draResources.enabled) + (not $existing) +}} +apiVersion: resource.k8s.io/v1beta2 +kind: ResourceClaim +metadata: + name: {{ $claimName }} + annotations: + "helm.sh/hook": pre-install + "helm.sh/hook-weight": "5" +spec: + devices: + requests: + - name: {{ $claimName }}-gpu + exactly: + deviceClassName: gpu.nvidia.com + count: 1 + allocationMode: ExactCount +{{- end }} \ No newline at end of file diff --git a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml index e4479ca88..862d1293a 100644 --- a/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-embedqa-1b-v2.yaml @@ -36,12 +36,18 @@ spec: replicas: {{ .Values.nimOperator.embedqa.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.embedqa.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: -{{ toYaml .Values.nimOperator.embedqa.resources | nindent 4 }} +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.embedqa.tolerations | nindent 4 }} expose: {{ toYaml .Values.nimOperator.embedqa.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.embedqa.env | nindent 4 }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml index 066fb9215..35b087e1c 100644 --- a/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml +++ b/helm/templates/llama-3.2-nv-rerankqa-1b-v2.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.llama_3_2_nv_rerankqa_1b_v2.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemoretriever-graphic-elements-v1.yaml b/helm/templates/nemoretriever-graphic-elements-v1.yaml index 2c1e97555..c32d63827 100644 --- a/helm/templates/nemoretriever-graphic-elements-v1.yaml +++ b/helm/templates/nemoretriever-graphic-elements-v1.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.graphic_elements.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.graphic_elements.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.graphic_elements.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.graphic_elements.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.graphic_elements.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemoretriever-ocr-v1.yaml b/helm/templates/nemoretriever-ocr-v1.yaml index 5cba31147..91187d12b 100644 --- a/helm/templates/nemoretriever-ocr-v1.yaml +++ b/helm/templates/nemoretriever-ocr-v1.yaml @@ -34,7 +34,14 @@ spec: name: nemoretriever-ocr-v1 replicas: {{ .Values.nimOperator.nemoretriever_ocr_v1.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.nodeSelector | nindent 4 }} - resources: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.resources | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} + resources: +{{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.tolerations | nindent 4 }} expose: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.nemoretriever_ocr_v1.env | nindent 4 }} diff --git a/helm/templates/nemoretriever-page-elements-v3.yaml b/helm/templates/nemoretriever-page-elements-v3.yaml old mode 100644 new mode 100755 index a2567bbf4..463880951 --- a/helm/templates/nemoretriever-page-elements-v3.yaml +++ b/helm/templates/nemoretriever-page-elements-v3.yaml @@ -36,7 +36,14 @@ spec: profile: '' replicas: {{ .Values.nimOperator.page_elements.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.page_elements.nodeSelector | nindent 4 }} - resources: {{ toYaml .Values.nimOperator.page_elements.resources | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.page_elements.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} + resources: +{{ toYaml .Values.nimOperator.page_elements.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.page_elements.tolerations | nindent 4 }} expose: {{ toYaml .Values.nimOperator.page_elements.expose | nindent 4 }} env: {{ toYaml .Values.nimOperator.page_elements.env | nindent 4 }} diff --git a/helm/templates/nemoretriever-table-structure-v1.yaml b/helm/templates/nemoretriever-table-structure-v1.yaml index 20fb7bb6f..8c1352f9c 100644 --- a/helm/templates/nemoretriever-table-structure-v1.yaml +++ b/helm/templates/nemoretriever-table-structure-v1.yaml @@ -36,8 +36,14 @@ spec: replicas: {{ .Values.nimOperator.table_structure.replicas }} nodeSelector: {{ toYaml .Values.nimOperator.table_structure.nodeSelector | nindent 4 }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.table_structure.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.table_structure.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.table_structure.tolerations | nindent 4 }} expose: diff --git a/helm/templates/nemotron-nano-12b-v2-vl.yaml b/helm/templates/nemotron-nano-12b-v2-vl.yaml index 1b5c1ca9d..b6d01c8a1 100644 --- a/helm/templates/nemotron-nano-12b-v2-vl.yaml +++ b/helm/templates/nemotron-nano-12b-v2-vl.yaml @@ -34,8 +34,14 @@ spec: nimCache: name: nemotron-nano-12b-v2-vl replicas: {{ .Values.nimOperator.nemotron_nano_12b_v2_vl.replicas }} +{{- if .Values.nimOperator.draResources.enabled }} + draResources: +{{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.draResources | nindent 4 }} +{{- end }} +{{- if not .Values.nimOperator.draResources.enabled }} resources: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.resources | nindent 4 }} +{{- end }} tolerations: {{ toYaml .Values.nimOperator.nemotron_nano_12b_v2_vl.tolerations | nindent 4 }} expose: diff --git a/helm/values.yaml b/helm/values.yaml index f69c83fe6..ac0587ead 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -599,6 +599,9 @@ containerArgs: [] ## @section Nim Operator parameters ## @param nimCache.pvc.storageClass Specifies the storage class for the PVCs nimOperator: + draResources: + enabled: false + name: "nvingest-claim" ## @section Nim Operator NimCache Configuration ## @param nimOperator.nimCache.pvc.storageClass [string] Storage class to use for all NimCache PVCs. Overrides per-model storageClass if set. nimCache: @@ -658,11 +661,12 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP port: 8000 - service: grpcPort: 8001 env: - name: NIM_HTTP_API_PORT @@ -726,6 +730,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -772,6 +778,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -818,6 +826,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -860,6 +870,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -908,6 +920,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP @@ -988,6 +1002,8 @@ nimOperator: resources: limits: nvidia.com/gpu: 1 +# draResources: +# - resourceClaimName: nvingest-claim expose: service: type: ClusterIP