11"""Common functionality for document generation."""
22
33import logging
4+ import os
5+ import select
46import subprocess
57
68import pypdf
@@ -40,9 +42,59 @@ def load_pages(pages_dir):
4042 return pages
4143
4244
45+ def _run_subprocess_logged (cmd , check = True , env = None ):
46+ """Run a subprocess with output redirected to logging.
47+
48+ Args:
49+ cmd: Command and arguments to run
50+ check: If True, raise CalledProcessError on non-zero exit
51+ env: Optional environment variables to set
52+
53+ Returns:
54+ Return code from process
55+ """
56+ env = env or os .environ .copy ()
57+ env ["PYTHONUNBUFFERED" ] = "True"
58+
59+ with subprocess .Popen (
60+ cmd ,
61+ bufsize = 1 ,
62+ text = True ,
63+ stdout = subprocess .PIPE ,
64+ stderr = subprocess .PIPE ,
65+ env = env ,
66+ ) as proc :
67+ # Set up select for both pipes
68+ readable = {
69+ proc .stdout .fileno (): (proc .stdout , logger .info ),
70+ proc .stderr .fileno (): (proc .stderr , logger .warning ),
71+ }
72+
73+ while (ret_code := proc .poll ()) is None :
74+ # Wait for output on either pipe
75+ ready , _ , _ = select .select (readable .keys (), [], [])
76+
77+ for fd in ready :
78+ stream , log = readable [fd ]
79+ line = stream .readline ()
80+ if line :
81+ log (line .rstrip ())
82+
83+ # Read any remaining output after process exits
84+ for stream , log in readable .values ():
85+ for line in stream :
86+ if line .strip ():
87+ log (line .rstrip ())
88+
89+ if ret_code != 0 and check :
90+ raise subprocess .CalledProcessError (ret_code , cmd )
91+
92+ return ret_code
93+
94+
4395def compress_pdf (input_pdf , output_pdf , dpi = 300 ):
4496 """Compress a PDF file using Ghostscript."""
45- subprocess . run (
97+ _run_subprocess_logged (
4698 [
4799 "gs" ,
48100 "-sDEVICE=pdfwrite" ,
@@ -54,9 +106,9 @@ def compress_pdf(input_pdf, output_pdf, dpi=300):
54106 "-dBATCH" ,
55107 f"-sOutputFile={ output_pdf } " ,
56108 input_pdf ,
57- ],
58- check = True ,
109+ ]
59110 )
111+ return output_pdf
60112
61113
62114def combine_pdfs (pdf_files , output_file ):
@@ -68,11 +120,10 @@ def combine_pdfs(pdf_files, output_file):
68120 with open (pdf_file , "rb" ) as file_obj :
69121 pdf_reader = pypdf .PdfReader (file_obj )
70122 try :
71- # The proper method to assemble PDFs
72123 pdf_writer .append (pdf_reader )
73124 except KeyError as exc :
74- # PDF has broken annotations with missing /Subtype
75125 if str (exc ) == "'/Subtype'" :
126+ # PDF has broken annotations with missing /Subtype
76127 logger .warning (
77128 "PDF %s has broken annotations. "
78129 "Falling back to page-by-page method." ,
@@ -81,33 +132,24 @@ def combine_pdfs(pdf_files, output_file):
81132 for page in pdf_reader .pages :
82133 pdf_writer .add_page (page )
83134 else :
84- # Re-raise unexpected KeyError
85135 raise
86136 pdf_writer .write (output_stream )
87137
138+ return output_file
88139
89- def convert_svg_to_pdf (svg_path , pdf_path , backend = "cairosvg" ):
90- """Convert an SVG file to PDF.
91-
92- Args:
93- svg_path: Path to input SVG file
94- pdf_path: Path to output PDF file
95- backend: Conversion backend to use (cairosvg or inkscape)
96140
97- Returns:
98- Path to the generated PDF file
99- """
141+ def convert_svg_to_pdf (svg_path , pdf_path , backend = "cairosvg" ):
142+ """Convert an SVG file to PDF."""
100143 if backend == "inkscape" :
101144 try :
102- subprocess . run (
145+ _run_subprocess_logged (
103146 [
104147 "inkscape" ,
105148 f"--export-filename={ pdf_path } " ,
106149 svg_path ,
107- ],
108- check = True ,
150+ ]
109151 )
110- except ( subprocess .SubprocessError , FileNotFoundError ) as exc :
152+ except subprocess .SubprocessError as exc :
111153 raise RuntimeError (
112154 "Inkscape command failed. Please ensure Inkscape is installed "
113155 'and in your PATH or set svg2pdf_backend to "cairosvg" in your config.'
0 commit comments