diff --git a/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py b/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py index 7e2fea053..428f96333 100644 --- a/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/chart_extractor.py @@ -98,7 +98,7 @@ def _run_chart_inference( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: future_ocr_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -241,7 +241,7 @@ def _create_ocr_client( ) -> NimClient: ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else PaddleOCRModelInterface() ) diff --git a/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py b/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py index 048ff979c..1308312fa 100644 --- a/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/infographic_extractor.py @@ -107,7 +107,7 @@ def _update_infographic_metadata( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: infer_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -153,7 +153,7 @@ def _create_ocr_client( ) -> NimClient: ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else PaddleOCRModelInterface() ) diff --git a/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py b/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py index 31e69c481..87a8fc7f2 100644 --- a/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/ocr_extractor.py @@ -107,7 +107,7 @@ def _update_text_metadata( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: infer_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -143,7 +143,7 @@ def _create_ocr_client( ) -> NimClient: ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else PaddleOCRModelInterface() ) diff --git a/api/src/nv_ingest_api/internal/extract/image/table_extractor.py b/api/src/nv_ingest_api/internal/extract/image/table_extractor.py index ad188de5d..920390708 100644 --- a/api/src/nv_ingest_api/internal/extract/image/table_extractor.py +++ b/api/src/nv_ingest_api/internal/extract/image/table_extractor.py @@ -100,7 +100,7 @@ def _run_inference( model_name="paddle", max_batch_size=1 if ocr_client.protocol == "grpc" else 2, ) - elif ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: + elif ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"}: future_ocr_kwargs.update( model_name=ocr_model_name, input_names=["INPUT_IMAGE_URLS", "MERGE_LEVELS"], @@ -250,7 +250,7 @@ def _create_ocr_client( ) -> NimClient: ocr_model_interface = ( NemoRetrieverOCRModelInterface() - if ocr_model_name in {"scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} + if ocr_model_name in {"pipeline", "scene_text_ensemble", "scene_text_wrapper", "scene_text_python"} else PaddleOCRModelInterface() ) diff --git a/api/src/nv_ingest_api/internal/primitives/nim/default_values.py b/api/src/nv_ingest_api/internal/primitives/nim/default_values.py index 03a6f83ae..6e55db1c9 100644 --- a/api/src/nv_ingest_api/internal/primitives/nim/default_values.py +++ b/api/src/nv_ingest_api/internal/primitives/nim/default_values.py @@ -5,7 +5,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -YOLOX_MAX_BATCH_SIZE = 8 +YOLOX_MAX_BATCH_SIZE = 16 YOLOX_MAX_WIDTH = 1536 YOLOX_MAX_HEIGHT = 1536 YOLOX_CONF_THRESHOLD = 0.01 diff --git a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py index 6dc1cf089..ff64db991 100644 --- a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +++ b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py @@ -20,10 +20,10 @@ from nv_ingest_api.internal.primitives.nim.model_interface.helpers import preprocess_image_for_paddle from nv_ingest_api.util.image_processing.transforms import base64_to_numpy -DEFAULT_OCR_MODEL_NAME = "scene_text_ensemble" -NEMORETRIEVER_OCR_MODEL_NAME = "scene_text_wrapper" -NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME = "scene_text_ensemble" -NEMORETRIEVER_OCR_BLS_MODEL_NAME = "scene_text_python" +DEFAULT_OCR_MODEL_NAME = "pipeline" +NEMORETRIEVER_OCR_MODEL_NAME = "pipeline" +NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME = "pipeline" +NEMORETRIEVER_OCR_BLS_MODEL_NAME = "pipeline" logger = logging.getLogger(__name__) @@ -202,64 +202,57 @@ def _extract_content_from_ocr_grpc_response( model_name: str = DEFAULT_OCR_MODEL_NAME, ) -> List[Tuple[str, str]]: """ - Parse a gRPC response for one or more images. The response can have two possible shapes: - - (3,) for batch_size=1 - - (3, n) for batch_size=n + Parse a gRPC response for one or more images from the BLS pipeline. - In either case: - response[0, i]: byte string containing bounding box data - response[1, i]: byte string containing text prediction data - response[2, i]: (Optional) additional data/metadata (ignored here) + For BLS pipeline, output shape is [1, N*3] which gets reshaped to [N, 3] where: + - response[i, 0]: byte string containing bounding box data + - response[i, 1]: byte string containing text prediction data + - response[i, 2]: byte string containing confidence scores Parameters ---------- response : np.ndarray - The raw NumPy array from gRPC. Expected shape: (3,) or (3, n). - table_content_format : str - The format of the output text content, e.g. 'simple' or 'pseudo_markdown'. - dims : list of dict, optional + The raw NumPy array from gRPC. Expected shape: (1, N*3) for BLS pipeline. + model_name : str + The name of the model used for inference. + dimensions : list of dict, optional A list of dict for each corresponding image, used for bounding box scaling. Returns ------- - list of (str, str) - A list of (content, table_content_format) for each image. + list of [bounding_boxes, text_predictions, conf_scores] + A list of results for each image. Raises ------ ValueError - If the response is not a NumPy array or has an unexpected shape, - or if the `table_content_format` is unrecognized. + If the response is not a NumPy array or has an unexpected shape. """ if not isinstance(response, np.ndarray): raise ValueError("Unexpected response format: response is not a NumPy array.") - if model_name in [ - NEMORETRIEVER_OCR_MODEL_NAME, - NEMORETRIEVER_OCR_ENSEMBLE_MODEL_NAME, - NEMORETRIEVER_OCR_BLS_MODEL_NAME, - ]: - response = response.transpose((1, 0)) - - # If we have shape (3,), convert to (3, 1) - if response.ndim == 1 and response.shape == (3,): - response = response.reshape(3, 1) - elif response.ndim != 2 or response.shape[0] != 3: - raise ValueError(f"Unexpected response shape: {response.shape}. Expecting (3,) or (3, n).") - batch_size = response.shape[1] + # BLS pipeline returns shape [1, N*3] - flatten and reshape to [N, 3] + # Each row contains [bboxes_json, texts_json, scores_json] as byte strings + flat_response = response.flatten() + if flat_response.size % 3 != 0: + raise ValueError(f"Unexpected response size: {flat_response.size}. Expected multiple of 3.") + + num_images = flat_response.size // 3 + response = flat_response.reshape(num_images, 3) + results: List[Tuple[str, str]] = [] - for i in range(batch_size): + for i in range(num_images): # 1) Parse bounding boxes - bboxes_bytestr: bytes = response[0, i] + bboxes_bytestr: bytes = response[i, 0] bounding_boxes = json.loads(bboxes_bytestr.decode("utf8")) # 2) Parse text predictions - texts_bytestr: bytes = response[1, i] + texts_bytestr: bytes = response[i, 1] text_predictions = json.loads(texts_bytestr.decode("utf8")) # 3) Parse confidence scores - confs_bytestr: bytes = response[2, i] + confs_bytestr: bytes = response[i, 2] conf_scores = json.loads(confs_bytestr.decode("utf8")) # Some gRPC responses nest single-item lists; flatten them if needed @@ -699,25 +692,26 @@ def _format_single_batch( merge_level = kwargs.get("merge_level", "paragraph") if protocol == "grpc": - logger.debug("Formatting input for gRPC OCR model (batched).") - processed: List[np.ndarray] = [] + logger.debug("Formatting input for gRPC OCR BLS pipeline model (batched).") + # Build image URLs with data URL prefix as expected by NIM BLS pipeline + image_urls = [] for img, shape in zip(batch_images, batch_dims): _dims = {"new_width": shape[1], "new_height": shape[0]} dims.append(_dims) + image_url = f"data:image/png;base64,{img}" + image_urls.append(image_url) - arr = np.array([img], dtype=np.object_) - arr = np.expand_dims(arr, axis=0) - processed.append(arr) - - batched_input = np.concatenate(processed, axis=0) + # Create input arrays with shape [1, N] for BLS pipeline + # This matches the NIM's expected format: batch of 1 request containing N images + num_images = len(image_urls) + image_array = np.array(image_urls, dtype=np.object_).reshape(1, num_images) - batch_size = batched_input.shape[0] + # Merge levels with shape [1, N] for BLS pipeline + merge_levels_list = [merge_level] * num_images + merge_levels_array = np.array(merge_levels_list, dtype=np.object_).reshape(1, num_images) - merge_levels_list = [[merge_level] for _ in range(batch_size)] - merge_levels = np.array(merge_levels_list, dtype="object") - - final_batch = [batched_input, merge_levels] + final_batch = [image_array, merge_levels_array] batch_data = {"image_dims": dims} return final_batch, batch_data @@ -768,7 +762,14 @@ def get_ocr_model_name(ocr_grpc_endpoint=None, default_model_name=DEFAULT_OCR_MO client = grpcclient.InferenceServerClient(ocr_grpc_endpoint) model_index = client.get_model_repository_index(as_json=True) model_names = [x["name"] for x in model_index.get("models", [])] - ocr_model_name = model_names[0] + + # Prefer 'pipeline' model if available (BLS model) + if "pipeline" in model_names: + ocr_model_name = "pipeline" + elif "scene_text_ensemble" in model_names: + ocr_model_name = "scene_text_ensemble" + else: + ocr_model_name = model_names[0] if model_names else default_model_name except Exception: logger.warning(f"Failed to get ocr model name after 30 seconds. Falling back to '{default_model_name}'.") ocr_model_name = default_model_name diff --git a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py index 8045d470a..d91fdfd0f 100644 --- a/api/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +++ b/api/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py @@ -131,10 +131,12 @@ def __init__( self.class_labels = class_labels if endpoints: - self.model_name = get_yolox_model_name(endpoints[0], default_model_name="yolox_ensemble") - self._grpc_uses_bls = self.model_name == "pipeline" + self.model_name = get_yolox_model_name(endpoints[0], default_model_name="pipeline") else: - self._grpc_uses_bls = False + self.model_name = "pipeline" + + # Always use BLS format for gRPC - NIMs use the pipeline model + self._grpc_uses_bls = True def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]: """ @@ -213,26 +215,25 @@ def chunk_list_geometrically(lst: list, max_size: int) -> List[list]: return chunks if protocol == "grpc": - logger.debug("Formatting input for gRPC Yolox Ensemble model") + logger.debug("Formatting input for gRPC Yolox BLS pipeline model") + + # Convert images to base64 data URLs as expected by the NIM BLS pipeline b64_images = [numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT) for image in data["images"]] - b64_chunks = chunk_list_geometrically(b64_images, max_batch_size) + b64_data_urls = [f"data:image/{YOLOX_PAGE_IMAGE_FORMAT.lower()};base64,{b64}" for b64 in b64_images] + + b64_chunks = chunk_list_geometrically(b64_data_urls, max_batch_size) original_chunks = chunk_list_geometrically(data["images"], max_batch_size) shape_chunks = chunk_list_geometrically(data["original_image_shapes"], max_batch_size) batched_inputs = [] formatted_batch_data = [] for b64_chunk, orig_chunk, shapes in zip(b64_chunks, original_chunks, shape_chunks): - input_array = np.array(b64_chunk, dtype=np.object_) + # Create input array with shape [1, N] for BLS pipeline + # This matches the NIM's expected format: batch of 1 request containing N images + input_array = np.array(b64_chunk, dtype=np.object_).reshape(1, -1) - if self._grpc_uses_bls: - # For BLS with dynamic batching (max_batch_size > 0), we need to add explicit batch dimension - # Shape [N] becomes [1, N] to indicate: batch of 1, containing N images - input_array = input_array.reshape(1, -1) - thresholds = np.array([[self.conf_threshold, self.iou_threshold]], dtype=np.float32) - else: - current_batch_size = input_array.shape[0] - single_threshold_pair = [self.conf_threshold, self.iou_threshold] - thresholds = np.tile(single_threshold_pair, (current_batch_size, 1)).astype(np.float32) + # Thresholds with shape [1, 2] for BLS pipeline + thresholds = np.array([[self.conf_threshold, self.iou_threshold]], dtype=np.float32) batched_inputs.append([input_array, thresholds]) formatted_batch_data.append({"images": orig_chunk, "original_image_shapes": shapes}) @@ -336,7 +337,7 @@ def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Lis Parameters ---------- output_array : np.ndarray - The raw output from the Yolox model. + The raw output from the Yolox model. For BLS pipeline, shape is [1, N]. kwargs : dict Additional parameters for processing, including thresholds and number of classes. @@ -351,11 +352,13 @@ def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Lis elif protocol == "grpc": results = [] - # For grpc, apply the same NIM postprocessing. - for out in output: + # For BLS pipeline, output shape is [1, N] - flatten to iterate over individual results + flat_output = output.flatten() if hasattr(output, "flatten") else output + for out in flat_output: if isinstance(out, bytes): out = out.decode("utf-8") if isinstance(out, dict): + results.append(out) continue results.append(json.loads(out)) inference_results = self.postprocess_annotations(results, **kwargs) diff --git a/api/src/nv_ingest_api/internal/primitives/nim/nim_client.py b/api/src/nv_ingest_api/internal/primitives/nim/nim_client.py index a956c4913..c211ede35 100644 --- a/api/src/nv_ingest_api/internal/primitives/nim/nim_client.py +++ b/api/src/nv_ingest_api/internal/primitives/nim/nim_client.py @@ -256,7 +256,7 @@ def infer(self, data: dict, model_name: str, **kwargs) -> Any: ) # Check for a custom maximum pool worker count, and remove it from kwargs. - max_pool_workers = kwargs.pop("max_pool_workers", 16) + max_pool_workers = kwargs.pop("max_pool_workers", 24) # 4. Process each batch concurrently using a thread pool. # We enumerate the batches so that we can later reassemble results in order. diff --git a/api/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py b/api/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py index 6be0785e6..c9da0fb36 100644 --- a/api/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +++ b/api/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py @@ -54,8 +54,8 @@ class ChartExtractorConfigSchema(LowercaseProtocolMixin): ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) ocr_infer_protocol: str = "" - nim_batch_size: int = 2 - workers_per_progress_engine: int = 5 + nim_batch_size: int = 8 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod diff --git a/api/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py b/api/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py index 59c007d11..d4c64137a 100644 --- a/api/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +++ b/api/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py @@ -47,8 +47,8 @@ class InfographicExtractorConfigSchema(LowercaseProtocolMixin): ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) ocr_infer_protocol: str = "" - nim_batch_size: int = 2 - workers_per_progress_engine: int = 5 + nim_batch_size: int = 8 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod diff --git a/api/src/nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py b/api/src/nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py index ab36d15db..e1ca27da2 100644 --- a/api/src/nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py +++ b/api/src/nv_ingest_api/internal/schemas/extract/extract_ocr_schema.py @@ -47,8 +47,8 @@ class OCRExtractorConfigSchema(LowercaseProtocolMixin): ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) ocr_infer_protocol: str = "" - nim_batch_size: int = 2 - workers_per_progress_engine: int = 5 + nim_batch_size: int = 8 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod diff --git a/api/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py b/api/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py index 9089eb6b9..3773ab8fa 100644 --- a/api/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +++ b/api/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py @@ -48,8 +48,8 @@ class PDFiumConfigSchema(LowercaseProtocolMixin): yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) yolox_infer_protocol: str = "" - nim_batch_size: int = 4 - workers_per_progress_engine: int = 5 + nim_batch_size: int = 8 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod @@ -137,7 +137,7 @@ class NemotronParseConfigSchema(LowercaseProtocolMixin): timeout: float = 300.0 - workers_per_progress_engine: int = 5 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod diff --git a/api/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py b/api/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py index cdc981a17..ab5a21e5f 100644 --- a/api/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +++ b/api/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py @@ -51,8 +51,8 @@ class TableExtractorConfigSchema(LowercaseProtocolMixin): ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None) ocr_infer_protocol: str = "" - nim_batch_size: int = 2 - workers_per_progress_engine: int = 5 + nim_batch_size: int = 8 + workers_per_progress_engine: int = 12 @model_validator(mode="before") @classmethod diff --git a/config/default_pipeline.yaml b/config/default_pipeline.yaml index 9756db4d7..1fe549999 100644 --- a/config/default_pipeline.yaml +++ b/config/default_pipeline.yaml @@ -72,11 +72,11 @@ stages: min_replicas: 0 max_replicas: strategy: "memory_thresholding" - memory_per_replica_mb: 10000 # Heuristic max consumption + memory_per_replica_mb: 5000 # Reduced to allow more replicas static_replicas: strategy: "memory_static_global_percent" - memory_per_replica_mb: 10000 - limit: 16 + memory_per_replica_mb: 5000 + limit: 32 - name: "audio_extractor" type: "stage" @@ -193,10 +193,10 @@ stages: min_replicas: 0 max_replicas: strategy: "static" - value: 2 + value: 8 static_replicas: strategy: "static" - value: 1 + value: 4 - name: "table_extractor" type: "stage" @@ -219,11 +219,11 @@ stages: min_replicas: 0 max_replicas: strategy: "memory_thresholding" - memory_per_replica_mb: 10000 + memory_per_replica_mb: 5000 static_replicas: strategy: "memory_static_global_percent" - memory_per_replica_mb: 10000 - limit: 6 + memory_per_replica_mb: 5000 + limit: 24 - name: "chart_extractor" type: "stage" @@ -246,11 +246,11 @@ stages: min_replicas: 0 max_replicas: strategy: "memory_thresholding" - memory_per_replica_mb: 10000 + memory_per_replica_mb: 5000 static_replicas: strategy: "memory_static_global_percent" - memory_per_replica_mb: 10000 - limit: 6 + memory_per_replica_mb: 5000 + limit: 24 # Post-processing / Mutators - name: "image_filter" @@ -414,41 +414,41 @@ edges: # Intake - from: "source_stage" to: "metadata_injector" - queue_size: 32 + queue_size: 64 # Document Extractors - from: "metadata_injector" to: "pdf_extractor" - queue_size: 32 + queue_size: 128 - from: "pdf_extractor" to: "audio_extractor" - queue_size: 32 + queue_size: 64 - from: "audio_extractor" to: "docx_extractor" - queue_size: 32 + queue_size: 64 - from: "docx_extractor" to: "pptx_extractor" - queue_size: 32 + queue_size: 64 - from: "pptx_extractor" to: "image_extractor" - queue_size: 32 + queue_size: 64 - from: "image_extractor" to: "html_extractor" - queue_size: 32 + queue_size: 64 - from: "html_extractor" to: "infographic_extractor" - queue_size: 32 + queue_size: 128 # Primitive Extractors - from: "infographic_extractor" to: "table_extractor" - queue_size: 32 + queue_size: 128 - from: "table_extractor" to: "chart_extractor" - queue_size: 32 + queue_size: 128 - from: "chart_extractor" to: "image_filter" - queue_size: 32 + queue_size: 64 # Primitive Mutators - from: "image_filter" diff --git a/docker-compose.override.yaml b/docker-compose.override.yaml new file mode 100644 index 000000000..d658b8670 --- /dev/null +++ b/docker-compose.override.yaml @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Docker Compose Override File +# This file allows you to customize environment variables for NIM services +# and override the nv-ingest-ms-runtime inference protocols to use HTTP. +# +# Usage: docker compose -f docker-compose.yaml -f docker-compose.override.yaml up + +services: + # Page Elements NIM - Customize environment variables as needed + page-elements: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=50000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=4 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${PAGE_ELEMENTS_GPU_ID:-0}"] + capabilities: [gpu] + # Graphic Elements NIM - Customize environment variables as needed + graphic-elements: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=50000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=4 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${GRAPHIC_ELEMENTS_GPU_ID:-1}"] + capabilities: [gpu] + # Table Structure NIM - Customize environment variables as needed + table-structure: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=50000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=4 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${TABLE_STRUCTURE_GPU_ID:-2}"] + capabilities: [gpu] + + # OCR NIM - Customize environment variables as needed + ocr: + environment: + - CUDA_VISIBLE_DEVICES=0 + - NIM_TRITON_PIPELINE_MAX_BATCH_SIZE=64 + - NIM_TRITON_PIPELINE_MAX_QUEUE_DELAY_MICROSECONDS=50000 + - NIM_TRITON_DATA_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_BATCH_SIZE=16 + - NIM_TRITON_MODEL_MAX_QUEUE_DELAY_MICROSECONDS=5000 + - NIM_TRITON_WORKER_INSTANCE_COUNT=8 + - NIM_TRITON_ENABLE_PIPELINE_TIMING=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${OCR_GPU_ID:-3}"] + capabilities: [gpu] + # NV-Ingest Runtime - Override inference protocols to HTTP + nv-ingest-ms-runtime: + volumes: + - ./config:/workspace/config + environment: + # Load custom pipeline config with increased stage replicas + - INGEST_CONFIG_PATH=/workspace/config/default_pipeline.yaml + # Override INFER_PROTOCOL to gRPC for all NIM services + # Page Elements (YOLOX) + - YOLOX_INFER_PROTOCOL=grpc + # Graphic Elements + - YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL=grpc + # Table Structure + - YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL=grpc + # OCR + - OCR_INFER_PROTOCOL=grpc diff --git a/docker-compose.yaml b/docker-compose.yaml index 45b19368e..5a5d6e804 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -241,6 +241,8 @@ services: nv-ingest-ms-runtime: image: nvcr.io/nvidia/nemo-microservices/nv-ingest:latest shm_size: 40gb # Should be at minimum 30% of assigned memory per Ray documentation + depends_on: + - redis build: context: ${NV_INGEST_ROOT:-.} dockerfile: "./Dockerfile"