From 4298fc320e7abe01e183a9ad372abd9c908f1736 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Tue, 20 Jan 2026 14:14:24 -0800 Subject: [PATCH 1/2] working with pipeline models Signed-off-by: Vibhu Jawa --- .../internal/extract/image/chart_extractor.py | 9 ++++----- .../extract/image/infographic_extractor.py | 9 ++++----- .../internal/extract/image/ocr_extractor.py | 9 ++++----- .../internal/extract/image/table_extractor.py | 9 ++++----- .../primitives/nim/model_interface/ocr.py | 18 +++++++++++++++++- 5 files changed, 33 insertions(+), 21 deletions(-) diff --git a/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py b/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py index 7e2fea053..508a6c090 100644 --- a/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py @@ -98,7 +98,7 @@ def _run_chart_inference( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}: future_ocr_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -239,9 +239,10 @@ def _create_ocr_client( ocr_model_name: str, auth_token: str, ) -> NimClient: + nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"} ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in nemo_retriever_ocr_models else PaddleOCRModelInterface() ) @@ -250,9 +251,7 @@ def _create_ocr_client( model_interface=ocr_model_interface, auth_token=auth_token, infer_protocol=ocr_protocol, - enable_dynamic_batching=( - True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False - ), + enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False), dynamic_batch_memory_budget_mb=32, ) diff --git a/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py b/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py index 048ff979c..13521804d 100644 --- a/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py @@ -107,7 +107,7 @@ def _update_infographic_metadata( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}: infer_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -151,9 +151,10 @@ def _create_ocr_client( ocr_model_name: str, auth_token: str, ) -> NimClient: + nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"} ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in nemo_retriever_ocr_models else PaddleOCRModelInterface() ) @@ -162,9 +163,7 @@ def _create_ocr_client( model_interface=ocr_model_interface, auth_token=auth_token, infer_protocol=ocr_protocol, - enable_dynamic_batching=( - True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False - ), + enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False), dynamic_batch_memory_budget_mb=32, ) diff --git a/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py b/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py index 31e69c481..6e4f47851 100644 --- a/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py @@ -107,7 +107,7 @@ def _update_text_metadata( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}: infer_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -141,9 +141,10 @@ def _create_ocr_client( ocr_model_name: str, auth_token: str, ) -> NimClient: + nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"} ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in nemo_retriever_ocr_models else PaddleOCRModelInterface() ) @@ -152,9 +153,7 @@ def _create_ocr_client( model_interface=ocr_model_interface, auth_token=auth_token, infer_protocol=ocr_protocol, - enable_dynamic_batching=( - True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False - ), + enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False), dynamic_batch_memory_budget_mb=32, ) diff --git a/api/src/nv_ingest_api/internal/extract/image/table_extractor.py b/api/src/nv_ingest_api/internal/extract/image/table_extractor.py index ad188de5d..74f4ed3c8 100644 --- a/api/src/nv_ingest_api/internal/extract/image/table_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/table_extractor.py @@ -100,7 +100,7 @@ def _run_inference( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}: future_ocr_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -248,9 +248,10 @@ def _create_ocr_client( ocr_model_name: str, auth_token: str, ) -> NimClient: + nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"} ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in nemo_retriever_ocr_models else PaddleOCRModelInterface() ) @@ -259,9 +260,7 @@ def _create_ocr_client( model_interface=ocr_model_interface, auth_token=auth_token, infer_protocol=ocr_protocol, - enable_dynamic_batching=( - True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False - ), + enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False), dynamic_batch_memory_budget_mb=32, ) diff --git a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py index 6dc1cf089..1b70ec59f 100644 --- a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +++ b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py @@ -24,6 +24,7 @@ NEMORETRIEVER_OCR_MODEL_NAME = "scene_text_wrapper" NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME = "scene_text_ensemble" NEMORETRIEVER_OCR_BLS_MODEL_NAME = "scene_text_python" +NEMORETRIEVER_OCR_PIPELINE_MODEL_NAME = "pipeline" logger = logging.getLogger(__name__) @@ -234,11 +235,26 @@ def _extract_content_from_ocr_grpc_response( if not isinstance(response, np.ndarray): raise ValueError("Unexpected response format: response is not a NumPy array.") - if model_name in [ + # Handle different response formats from OCR models + if model_name == NEMORETRIEVER_OCR_PIPELINE_MODEL_NAME: + # Pipeline model returns flat array (N*3,) with interleaved data: + # [bbox0, text0, conf0, bbox1, text1, conf1, ...] + # Reshape to (3, N) format + if response.ndim == 1: + if response.shape[0] % 3 != 0: + raise ValueError( + f"Pipeline response length {response.shape[0]} is not divisible by 3. " + "Expected format: [bbox0, text0, conf0, bbox1, text1, conf1, ...]" + ) + batch_size = response.shape[0] // 3 + # Reshape from (N*3,) to (N, 3) then transpose to (3, N) + response = response.reshape(batch_size, 3).transpose((1, 0)) + elif model_name in [ NEMORETRIEVER_OCR_MODEL_NAME, NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME, NEMORETRIEVER_OCR_BLS_MODEL_NAME, ]: + # Other NemoRetriever models return (batch_size, 3), transpose to (3, batch_size) response = response.transpose((1, 0)) # If we have shape (3,), convert to (3, 1) From 27c3243a335ee0b744b1453dd12a890c3f891e10 Mon Sep 17 00:00:00 2001 From: Vibhu Jawa Date: Tue, 20 Jan 2026 23:32:08 -0800 Subject: [PATCH 2/2] add a multi gpu yaml that i use for benchmarking --- docker-compose.multigpu.yaml | 95 ++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 docker-compose.multigpu.yaml diff --git a/docker-compose.multigpu.yaml b/docker-compose.multigpu.yaml new file mode 100644 index 000000000..1c18046c7 --- /dev/null +++ b/docker-compose.multigpu.yaml @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Docker Compose Override File +# This file allows you to customize environment variables for NIM services +# and override the nv-ingest-ms-runtime inference protocols to use HTTP. +# +# Usage: docker compose -f docker-compose.yaml -f docker-compose.override.yaml up + +services: + # NV-Ingest Runtime - Mount config and set custom pipeline + nv-ingest-ms-runtime: + volumes: + - ./config:/workspace/config + environment: + - INGEST_CONFIG_PATH=/workspace/config/default_pipeline.yaml + + # Page Elements NIM - Customize environment variables as needed + page-elements: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=8 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + - NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${PAGE_ELEMENTS_GPU_ID:-0}"] + capabilities: [gpu] + # Graphic Elements NIM - Customize environment variables as needed + graphic-elements: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=8 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + - NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${GRAPHIC_ELEMENTS_GPU_ID:-0}"] + capabilities: [gpu] + # Table Structure NIM - Customize environment variables as needed + table-structure: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=8 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + - NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${TABLE_STRUCTURE_GPU_ID:-0}"] + capabilities: [gpu] + + # OCR NIM - Customize environment variables as needed + ocr: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=8 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + - NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${OCR_GPU_ID:-1}"] + capabilities: [gpu] \ No newline at end of file