|
12 | 12 | logger = logging.getLogger(__name__) |
13 | 13 |
|
14 | 14 |
|
| 15 | +class PDFBakeError(Exception): |
| 16 | + """Base exception for PDF baking errors.""" |
| 17 | + |
| 18 | + |
| 19 | +class SVGConversionError(PDFBakeError): |
| 20 | + """Failed to convert SVG to PDF.""" |
| 21 | + |
| 22 | + def __init__(self, svg_path, backend, cause=None): |
| 23 | + self.svg_path = svg_path |
| 24 | + self.backend = backend |
| 25 | + self.cause = cause |
| 26 | + super().__init__(f"Failed to convert {svg_path} using {backend}: {cause}") |
| 27 | + |
| 28 | + |
| 29 | +class PDFCombineError(PDFBakeError): |
| 30 | + """Failed to combine PDFs.""" |
| 31 | + |
| 32 | + |
| 33 | +class PDFCompressionError(PDFBakeError): |
| 34 | + """Failed to compress PDF.""" |
| 35 | + |
| 36 | + |
15 | 37 | def deep_merge(base, update): |
16 | 38 | """Recursively merge two dictionaries. |
17 | 39 |
|
@@ -42,16 +64,59 @@ def load_pages(pages_dir): |
42 | 64 | return pages |
43 | 65 |
|
44 | 66 |
|
45 | | -def _run_subprocess_logged(cmd, check=True, env=None): |
| 67 | +def combine_pdfs(pdf_files, output_file): |
| 68 | + """Combine multiple PDF files into a single PDF. |
| 69 | +
|
| 70 | + Args: |
| 71 | + pdf_files: List of paths to PDF files to combine |
| 72 | + output_file: Path where the combined PDF will be written |
| 73 | +
|
| 74 | + Returns: |
| 75 | + Path to the combined PDF file |
| 76 | +
|
| 77 | + Raises: |
| 78 | + PDFCombineError: If no PDF files provided or if combining fails |
| 79 | + """ |
| 80 | + if not pdf_files: |
| 81 | + raise PDFCombineError("No PDF files provided to combine") |
| 82 | + |
| 83 | + pdf_writer = pypdf.PdfWriter() |
| 84 | + |
| 85 | + with open(output_file, "wb") as output_stream: |
| 86 | + for pdf_file in pdf_files: |
| 87 | + with open(pdf_file, "rb") as file_obj: |
| 88 | + try: |
| 89 | + pdf_reader = pypdf.PdfReader(file_obj) |
| 90 | + try: |
| 91 | + pdf_writer.append(pdf_reader) |
| 92 | + except KeyError as exc: |
| 93 | + if str(exc) == "'/Subtype'": |
| 94 | + # PDF has broken annotations with missing /Subtype |
| 95 | + logger.warning( |
| 96 | + "PDF %s has broken annotations. " |
| 97 | + "Falling back to page-by-page method.", |
| 98 | + pdf_file, |
| 99 | + ) |
| 100 | + for page in pdf_reader.pages: |
| 101 | + pdf_writer.add_page(page) |
| 102 | + else: |
| 103 | + raise |
| 104 | + except Exception as exc: |
| 105 | + raise PDFCombineError(f"Failed to combine PDFs: {exc}") from exc |
| 106 | + pdf_writer.write(output_stream) |
| 107 | + |
| 108 | + return output_file |
| 109 | + |
| 110 | + |
| 111 | +def _run_subprocess_logged(cmd, env=None): |
46 | 112 | """Run a subprocess with output redirected to logging. |
47 | 113 |
|
48 | 114 | Args: |
49 | 115 | cmd: Command and arguments to run |
50 | | - check: If True, raise CalledProcessError on non-zero exit |
51 | 116 | env: Optional environment variables to set |
52 | 117 |
|
53 | 118 | Returns: |
54 | | - Return code from process |
| 119 | + 0 if successful, otherwise raises CalledProcessError |
55 | 120 | """ |
56 | 121 | env = env or os.environ.copy() |
57 | 122 | env["PYTHONUNBUFFERED"] = "True" |
@@ -86,76 +151,80 @@ def _run_subprocess_logged(cmd, check=True, env=None): |
86 | 151 | if line.strip(): |
87 | 152 | log(line.rstrip()) |
88 | 153 |
|
89 | | - if ret_code != 0 and check: |
| 154 | + if ret_code != 0: |
90 | 155 | raise subprocess.CalledProcessError(ret_code, cmd) |
91 | 156 |
|
92 | | - return ret_code |
| 157 | + return 0 |
93 | 158 |
|
94 | 159 |
|
95 | 160 | def compress_pdf(input_pdf, output_pdf, dpi=300): |
96 | | - """Compress a PDF file using Ghostscript.""" |
97 | | - _run_subprocess_logged( |
98 | | - [ |
99 | | - "gs", |
100 | | - "-sDEVICE=pdfwrite", |
101 | | - "-dCompatibilityLevel=1.7", |
102 | | - "-dPDFSETTINGS=/printer", |
103 | | - f"-r{dpi}", |
104 | | - "-dNOPAUSE", |
105 | | - "-dQUIET", |
106 | | - "-dBATCH", |
107 | | - f"-sOutputFile={output_pdf}", |
108 | | - input_pdf, |
109 | | - ] |
110 | | - ) |
111 | | - return output_pdf |
112 | | - |
| 161 | + """Compress a PDF file using Ghostscript. |
113 | 162 |
|
114 | | -def combine_pdfs(pdf_files, output_file): |
115 | | - """Combine multiple PDF files into a single PDF.""" |
116 | | - pdf_writer = pypdf.PdfWriter() |
| 163 | + Args: |
| 164 | + input_pdf: Path to the input PDF file |
| 165 | + output_pdf: Path where the compressed PDF will be written |
| 166 | + dpi: Resolution in dots per inch (default: 300) |
117 | 167 |
|
118 | | - with open(output_file, "wb") as output_stream: |
119 | | - for pdf_file in pdf_files: |
120 | | - with open(pdf_file, "rb") as file_obj: |
121 | | - pdf_reader = pypdf.PdfReader(file_obj) |
122 | | - try: |
123 | | - pdf_writer.append(pdf_reader) |
124 | | - except KeyError as exc: |
125 | | - if str(exc) == "'/Subtype'": |
126 | | - # PDF has broken annotations with missing /Subtype |
127 | | - logger.warning( |
128 | | - "PDF %s has broken annotations. " |
129 | | - "Falling back to page-by-page method.", |
130 | | - pdf_file, |
131 | | - ) |
132 | | - for page in pdf_reader.pages: |
133 | | - pdf_writer.add_page(page) |
134 | | - else: |
135 | | - raise |
136 | | - pdf_writer.write(output_stream) |
| 168 | + Returns: |
| 169 | + Path to the compressed PDF file |
137 | 170 |
|
138 | | - return output_file |
| 171 | + Raises: |
| 172 | + PDFCompressionError: If Ghostscript compression fails |
| 173 | + """ |
| 174 | + try: |
| 175 | + _run_subprocess_logged( |
| 176 | + [ |
| 177 | + "gs", |
| 178 | + "-sDEVICE=pdfwrite", |
| 179 | + "-dCompatibilityLevel=1.7", |
| 180 | + "-dPDFSETTINGS=/printer", |
| 181 | + f"-r{dpi}", |
| 182 | + "-dNOPAUSE", |
| 183 | + "-dQUIET", |
| 184 | + "-dBATCH", |
| 185 | + f"-sOutputFile={output_pdf}", |
| 186 | + input_pdf, |
| 187 | + ] |
| 188 | + ) |
| 189 | + return output_pdf |
| 190 | + except subprocess.SubprocessError as exc: |
| 191 | + raise PDFCompressionError(f"Ghostscript compression failed: {exc}") from exc |
139 | 192 |
|
140 | 193 |
|
141 | 194 | def convert_svg_to_pdf(svg_path, pdf_path, backend="cairosvg"): |
142 | | - """Convert an SVG file to PDF.""" |
143 | | - if backend == "inkscape": |
144 | | - try: |
145 | | - _run_subprocess_logged( |
146 | | - [ |
147 | | - "inkscape", |
148 | | - f"--export-filename={pdf_path}", |
149 | | - svg_path, |
150 | | - ] |
151 | | - ) |
152 | | - except subprocess.SubprocessError as exc: |
153 | | - raise RuntimeError( |
154 | | - "Inkscape command failed. Please ensure Inkscape is installed " |
155 | | - 'and in your PATH or set svg2pdf_backend to "cairosvg" in your config.' |
156 | | - ) from exc |
157 | | - else: |
158 | | - with open(svg_path, "rb") as svg_file: |
159 | | - svg2pdf(file_obj=svg_file, write_to=pdf_path) |
160 | | - |
161 | | - return pdf_path |
| 195 | + """Convert an SVG file to PDF. |
| 196 | +
|
| 197 | + Args: |
| 198 | + svg_path: Path to the input SVG file |
| 199 | + pdf_path: Path where the PDF will be written |
| 200 | + backend: Conversion backend to use, either "cairosvg" or "inkscape" |
| 201 | + (default: "cairosvg") |
| 202 | +
|
| 203 | + Returns: |
| 204 | + Path to the converted PDF file |
| 205 | +
|
| 206 | + Raises: |
| 207 | + SVGConversionError: If SVG conversion fails, includes the backend used and cause |
| 208 | + """ |
| 209 | + try: |
| 210 | + if backend == "inkscape": |
| 211 | + try: |
| 212 | + _run_subprocess_logged( |
| 213 | + [ |
| 214 | + "inkscape", |
| 215 | + f"--export-filename={pdf_path}", |
| 216 | + svg_path, |
| 217 | + ] |
| 218 | + ) |
| 219 | + except subprocess.SubprocessError as exc: |
| 220 | + raise SVGConversionError(svg_path, backend, str(exc)) from exc |
| 221 | + else: |
| 222 | + try: |
| 223 | + with open(svg_path, "rb") as svg_file: |
| 224 | + svg2pdf(file_obj=svg_file, write_to=pdf_path) |
| 225 | + except Exception as exc: |
| 226 | + raise SVGConversionError(svg_path, backend, str(exc)) from exc |
| 227 | + |
| 228 | + return pdf_path |
| 229 | + except Exception as exc: |
| 230 | + raise SVGConversionError(svg_path, backend, str(exc)) from exc |
0 commit comments