Skip to content

Commit e085df5

Browse files
committed
Handle broken annotations
Use proper `append()` method if possible, fall back to an iterative `add_page()` if we encounter broken annotations. Will still fail on other problems - need to investigate and potentially also handle this way if we encounter more.
1 parent 8c2fb2f commit e085df5

1 file changed

Lines changed: 15 additions & 5 deletions

File tree

src/pdfbaker/common.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,19 @@ def load_pages(pages_dir):
2828
return pages
2929

3030

31-
def compress_pdf(input_pdf, output_pdf):
31+
def compress_pdf(input_pdf, output_pdf, dpi=300):
3232
"""Compress a PDF file using Ghostscript."""
3333
subprocess.run(
3434
[
3535
"gs",
3636
"-sDEVICE=pdfwrite",
3737
"-dCompatibilityLevel=1.7",
3838
"-dPDFSETTINGS=/printer",
39-
"-r300",
39+
f"-r{dpi}",
4040
"-dNOPAUSE",
4141
"-dQUIET",
4242
"-dBATCH",
43-
"-sOutputFile=" + output_pdf,
43+
f"-sOutputFile={output_pdf}",
4444
input_pdf,
4545
],
4646
check=True,
@@ -55,8 +55,18 @@ def combine_pdfs(pdf_files, output_file):
5555
for pdf_file in pdf_files:
5656
with open(pdf_file, "rb") as file_obj:
5757
pdf_reader = pypdf.PdfReader(file_obj)
58-
for page in pdf_reader.pages:
59-
pdf_writer.add_page(page)
58+
try:
59+
# The proper method to append PDFs
60+
pdf_writer.append(pdf_reader)
61+
except KeyError as exc:
62+
# PDF has broken annotations with missing /Subtype
63+
if str(exc) == "'/Subtype'":
64+
print(f"Warning: PDF {pdf_file} has broken annotations. Falling back to page-by-page method.")
65+
for page in pdf_reader.pages:
66+
pdf_writer.add_page(page)
67+
else:
68+
# Re-raise unexpected KeyError
69+
raise
6070
pdf_writer.write(output_stream)
6171

6272

0 commit comments

Comments
 (0)