Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _run_chart_inference(
model_name="paddle",
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
)
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}:
future_ocr_kwargs.update(
model_name=ocr_model_name,
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
Expand Down Expand Up @@ -239,9 +239,10 @@ def _create_ocr_client(
ocr_model_name: str,
auth_token: str,
) -> NimClient:
nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}
ocr_model_interface = (
NemoRetrieverOCRModelInterface()
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
if ocr_model_name in nemo_retriever_ocr_models
else PaddleOCRModelInterface()
)

Expand All @@ -250,9 +251,7 @@ def _create_ocr_client(
model_interface=ocr_model_interface,
auth_token=auth_token,
infer_protocol=ocr_protocol,
enable_dynamic_batching=(
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
),
enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False),
dynamic_batch_memory_budget_mb=32,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _update_infographic_metadata(
model_name="paddle",
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
)
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}:
infer_kwargs.update(
model_name=ocr_model_name,
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
Expand Down Expand Up @@ -151,9 +151,10 @@ def _create_ocr_client(
ocr_model_name: str,
auth_token: str,
) -> NimClient:
nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}
ocr_model_interface = (
NemoRetrieverOCRModelInterface()
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
if ocr_model_name in nemo_retriever_ocr_models
else PaddleOCRModelInterface()
)

Expand All @@ -162,9 +163,7 @@ def _create_ocr_client(
model_interface=ocr_model_interface,
auth_token=auth_token,
infer_protocol=ocr_protocol,
enable_dynamic_batching=(
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
),
enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False),
dynamic_batch_memory_budget_mb=32,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _update_text_metadata(
model_name="paddle",
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
)
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}:
infer_kwargs.update(
model_name=ocr_model_name,
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
Expand Down Expand Up @@ -141,9 +141,10 @@ def _create_ocr_client(
ocr_model_name: str,
auth_token: str,
) -> NimClient:
nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}
ocr_model_interface = (
NemoRetrieverOCRModelInterface()
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
if ocr_model_name in nemo_retriever_ocr_models
else PaddleOCRModelInterface()
)

Expand All @@ -152,9 +153,7 @@ def _create_ocr_client(
model_interface=ocr_model_interface,
auth_token=auth_token,
infer_protocol=ocr_protocol,
enable_dynamic_batching=(
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
),
enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False),
dynamic_batch_memory_budget_mb=32,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def _run_inference(
model_name="paddle",
max_batch_size=1 if ocr_client.protocol == "grpc" else 2,
)
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}:
elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}:
future_ocr_kwargs.update(
model_name=ocr_model_name,
input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"],
Expand Down Expand Up @@ -248,9 +248,10 @@ def _create_ocr_client(
ocr_model_name: str,
auth_token: str,
) -> NimClient:
nemo_retriever_ocr_models = {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python", "pipeline"}
ocr_model_interface = (
NemoRetrieverOCRModelInterface()
if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}
if ocr_model_name in nemo_retriever_ocr_models
else PaddleOCRModelInterface()
)

Expand All @@ -259,9 +260,7 @@ def _create_ocr_client(
model_interface=ocr_model_interface,
auth_token=auth_token,
infer_protocol=ocr_protocol,
enable_dynamic_batching=(
True if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else False
),
enable_dynamic_batching=(True if ocr_model_name in nemo_retriever_ocr_models else False),
dynamic_batch_memory_budget_mb=32,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
NEMORETRIEVER_OCR_MODEL_NAME = "scene_text_wrapper"
NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME = "scene_text_ensemble"
NEMORETRIEVER_OCR_BLS_MODEL_NAME = "scene_text_python"
NEMORETRIEVER_OCR_PIPELINE_MODEL_NAME = "pipeline"


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -234,11 +235,26 @@ def _extract_content_from_ocr_grpc_response(
if not isinstance(response, np.ndarray):
raise ValueError("Unexpected response format: response is not a NumPy array.")

if model_name in [
# Handle different response formats from OCR models
if model_name == NEMORETRIEVER_OCR_PIPELINE_MODEL_NAME:
# Pipeline model returns flat array (N*3,) with interleaved data:
# [bbox0, text0, conf0, bbox1, text1, conf1, ...]
# Reshape to (3, N) format
if response.ndim == 1:
if response.shape[0] % 3 != 0:
raise ValueError(
f"Pipeline response length {response.shape[0]} is not divisible by 3. "
"Expected format: [bbox0, text0, conf0, bbox1, text1, conf1, ...]"
)
batch_size = response.shape[0] // 3
# Reshape from (N*3,) to (N, 3) then transpose to (3, N)
response = response.reshape(batch_size, 3).transpose((1, 0))
elif model_name in [
NEMORETRIEVER_OCR_MODEL_NAME,
NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME,
NEMORETRIEVER_OCR_BLS_MODEL_NAME,
]:
# Other NemoRetriever models return (batch_size, 3), transpose to (3, batch_size)
response = response.transpose((1, 0))

# If we have shape (3,), convert to (3, 1)
Expand Down
95 changes: 95 additions & 0 deletions docker-compose.multigpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Docker Compose Override File
# This file allows you to customize environment variables for NIM services
# and override the nv-ingest-ms-runtime inference protocols to use HTTP.
#
# Usage: docker compose -f docker-compose.yaml -f docker-compose.override.yaml up

services:
# NV-Ingest Runtime - Mount config and set custom pipeline
nv-ingest-ms-runtime:
volumes:
- ./config:/workspace/config
environment:
- INGEST_CONFIG_PATH=/workspace/config/default_pipeline.yaml

# Page Elements NIM - Customize environment variables as needed
page-elements:
environment:
- CUDA_VISIBLE_DEVICES=0
- NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64
- NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_DATA_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_WORKER_INSTANCE_COUNT=8
- NIM_TRITON_ENABLE_PIPELINE_TIMING=true
- NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${PAGE_ELEMENTS_GPU_ID:-0}"]
capabilities: [gpu]
# Graphic Elements NIM - Customize environment variables as needed
graphic-elements:
environment:
- CUDA_VISIBLE_DEVICES=0
- NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64
- NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_DATA_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_WORKER_INSTANCE_COUNT=8
- NIM_TRITON_ENABLE_PIPELINE_TIMING=true
- NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${GRAPHIC_ELEMENTS_GPU_ID:-0}"]
capabilities: [gpu]
# Table Structure NIM - Customize environment variables as needed
table-structure:
environment:
- CUDA_VISIBLE_DEVICES=0
- NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64
- NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_DATA_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_WORKER_INSTANCE_COUNT=8
- NIM_TRITON_ENABLE_PIPELINE_TIMING=true
- NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${TABLE_STRUCTURE_GPU_ID:-0}"]
capabilities: [gpu]

# OCR NIM - Customize environment variables as needed
ocr:
environment:
- CUDA_VISIBLE_DEVICES=0
- NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64
- NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_DATA_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_BATCH_SIZE=16
- NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000
- NIM_TRITON_WORKER_INSTANCE_COUNT=8
- NIM_TRITON_ENABLE_PIPELINE_TIMING=true
- NIM_TRITON_RATE_LIMIT=256 # Override base value of 3 to allow high concurrency
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${OCR_GPU_ID:-1}"]
capabilities: [gpu]
Loading