diff --git a/lyrapdf/app.py b/lyrapdf/app.py index 1d1667c..75b71cf 100644 --- a/lyrapdf/app.py +++ b/lyrapdf/app.py @@ -150,6 +150,8 @@ def extract_and_process(input_dir, pdf_path, json_output): print("PDFSyntaxError: Is this really a PDF? ", pdf_path) except PDFTextExtractionNotAllowed as e: print(e) + except Exception as e: + print(e) def get_file_list(input_dir): diff --git a/lyrapdf/pre_proc.py b/lyrapdf/pre_proc.py index 5fc26ac..6df4cbf 100644 --- a/lyrapdf/pre_proc.py +++ b/lyrapdf/pre_proc.py @@ -301,6 +301,7 @@ def analyze_font_size(text): total = sum(font_size_dict.values()) percentage_sum = 0 max_quote = 0 + font_threshold = 0 i = 0 # Keep track of the index for key in sorted_font_size_dict: # Update accumulated percentage