From 26d470c5f1c3f6cfd3020e9867050566a76328d2 Mon Sep 17 00:00:00 2001 From: edknv Date: Wed, 4 Feb 2026 12:36:48 -0800 Subject: [PATCH] Remove padding for page elements --- api/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py b/api/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py index 95585748f..e2f24ad40 100644 --- a/api/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +++ b/api/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py @@ -577,14 +577,13 @@ def pdfium_extractor( # If we want OCR extraction, rasterize the page and store it if extraction_needed_for_text or extraction_needed_for_structured: - image, padding_offsets = pdfium_pages_to_numpy( + image, _ = pdfium_pages_to_numpy( [page], scale_tuple=(YOLOX_PAGE_IMAGE_PREPROC_WIDTH, YOLOX_PAGE_IMAGE_PREPROC_HEIGHT), - padding_tuple=(YOLOX_PAGE_IMAGE_PREPROC_WIDTH, YOLOX_PAGE_IMAGE_PREPROC_HEIGHT), render_rev_byteorder=True, trace_info=execution_trace_log, ) - pages_for_extractions.append((page_idx, image[0], padding_offsets[0])) + pages_for_extractions.append((page_idx, image[0], (0, 0))) # No padding offset # Whenever pages_for_extractions hits YOLOX_MAX_BATCH_SIZE, submit a job if len(pages_for_extractions) >= YOLOX_MAX_BATCH_SIZE: