From ca60e8759c97045b7034237fdf2165e73542f52c Mon Sep 17 00:00:00 2001 From: Ayoub EL Mhamdi Date: Fri, 19 May 2023 20:31:21 +0100 Subject: [PATCH] fix: Fix unbound font_threshold and Improve error handling. - Initialize `font_threshold` variable after use it - Improve exception handling [lyrapdf/pre_proc.py] - Add `font_threshold` variable to `analyze_font_size` function [lyrapdf/app.py] - Add exception handling to extract_and_process function --- lyrapdf/app.py | 2 ++ lyrapdf/pre_proc.py | 1 + 2 files changed, 3 insertions(+) diff --git a/lyrapdf/app.py b/lyrapdf/app.py index 1d1667c..75b71cf 100644 --- a/lyrapdf/app.py +++ b/lyrapdf/app.py @@ -150,6 +150,8 @@ def extract_and_process(input_dir, pdf_path, json_output): print("PDFSyntaxError: Is this really a PDF? ", pdf_path) except PDFTextExtractionNotAllowed as e: print(e) + except Exception as e: + print(e) def get_file_list(input_dir): diff --git a/lyrapdf/pre_proc.py b/lyrapdf/pre_proc.py index 5fc26ac..6df4cbf 100644 --- a/lyrapdf/pre_proc.py +++ b/lyrapdf/pre_proc.py @@ -301,6 +301,7 @@ def analyze_font_size(text): total = sum(font_size_dict.values()) percentage_sum = 0 max_quote = 0 + font_threshold = 0 i = 0 # Keep track of the index for key in sorted_font_size_dict: # Update accumulated percentage