chore: implemented branch execution using processes

ringoldsdev · ringoldsdev · commit cbc5ff17f6a9 · 2025-07-30T10:29:01.000Z
diff --git a/laygo/pipeline.py b/laygo/pipeline.py
@@ -2,14 +2,19 @@
 from collections.abc import Callable
 from collections.abc import Iterable
 from collections.abc import Iterator
+from collections.abc import Mapping
 from concurrent.futures import ThreadPoolExecutor
 from concurrent.futures import as_completed
 import itertools
+from multiprocessing import Manager
 from queue import Queue
 from typing import Any
+from typing import Literal
 from typing import TypeVar
 from typing import overload
 
+from loky import get_reusable_executor
+
 from laygo.context import IContextManager
 from laygo.context.parallel import ParallelContextManager
 from laygo.context.types import IContextHandle
@@ -21,6 +26,29 @@
 PipelineFunction = Callable[[T], Any]
 
 
+# This function must be defined at the top level of the module (e.g., after imports)
+def _branch_consumer_process[T](transformer: Transformer, queue: "Queue", context_handle: IContextHandle) -> list[Any]:
+  """
+  The entry point for a consumer process. It reconstructs the necessary
+  objects and runs a dedicated pipeline instance on the data from its queue.
+  """
+  # Re-create the context proxy within the new process
+  context_proxy = context_handle.create_proxy()
+
+  def stream_from_queue() -> Iterator[T]:
+    """A generator that yields items from the process-safe queue."""
+    while (batch := queue.get()) is not None:
+      yield from batch
+
+  try:
+    # Each consumer process runs its own mini-pipeline
+    branch_pipeline = Pipeline(stream_from_queue(), context_manager=context_proxy)
+    result_list, _ = branch_pipeline.apply(transformer).to_list()
+    return result_list
+  finally:
+    context_proxy.shutdown()
+
+
 class Pipeline[T]:
   """Manages a data source and applies transformers to it.
 
@@ -303,12 +331,78 @@ def consume(self) -> tuple[None, dict[str, Any]]:
 
     return None, self.context_manager.to_dict()
 
+  def _producer_fanout(
+    self,
+    source_iterator: Iterator[T],
+    queues: dict[str, Queue],
+    batch_size: int,
+  ) -> None:
+    """Producer for fan-out: sends every item to every branch."""
+    for batch_tuple in itertools.batched(source_iterator, batch_size):
+      batch_list = list(batch_tuple)
+      for q in queues.values():
+        q.put(batch_list)
+    for q in queues.values():
+      q.put(None)
+
+  def _producer_router(
+    self,
+    source_iterator: Iterator[T],
+    queues: dict[str, Queue],
+    parsed_branches: list[tuple[str, Transformer, Callable]],
+    batch_size: int,
+  ) -> None:
+    """Producer for router (`first_match=True`): sends item to the first matching branch."""
+    buffers = {name: [] for name, _, _ in parsed_branches}
+    for item in source_iterator:
+      for name, _, condition in parsed_branches:
+        if condition(item):
+          branch_buffer = buffers[name]
+          branch_buffer.append(item)
+          if len(branch_buffer) >= batch_size:
+            queues[name].put(branch_buffer)
+            buffers[name] = []
+          break
+    for name, buffer_list in buffers.items():
+      if buffer_list:
+        queues[name].put(buffer_list)
+    for q in queues.values():
+      q.put(None)
+
+  def _producer_broadcast(
+    self,
+    source_iterator: Iterator[T],
+    queues: dict[str, Queue],
+    parsed_branches: list[tuple[str, Transformer, Callable]],
+    batch_size: int,
+  ) -> None:
+    """Producer for broadcast (`first_match=False`): sends item to all matching branches."""
+    buffers = {name: [] for name, _, _ in parsed_branches}
+    for item in source_iterator:
+      item_matches = [name for name, _, condition in parsed_branches if condition(item)]
+
+      for name in item_matches:
+        buffers[name].append(item)
+        branch_buffer = buffers[name]
+        if len(branch_buffer) >= batch_size:
+          queues[name].put(branch_buffer)
+          buffers[name] = []
+
+    for name, buffer_list in buffers.items():
+      if buffer_list:
+        queues[name].put(buffer_list)
+    for q in queues.values():
+      q.put(None)
+
+  # In your Pipeline class
+
   # Overload 1: Unconditional fan-out
   @overload
   def branch(
     self,
-    branches: dict[str, Transformer[T, Any]],
+    branches: Mapping[str, Transformer[T, Any]],
     *,
+    executor_type: Literal["thread", "process"] = "thread",
     batch_size: int = 1000,
     max_batch_buffer: int = 1,
   ) -> tuple[dict[str, list[Any]], dict[str, Any]]: ...
@@ -317,17 +411,19 @@ def branch(
   @overload
   def branch(
     self,
-    branches: dict[str, tuple[Transformer[T, Any], Callable[[T], bool]]],
+    branches: Mapping[str, tuple[Transformer[T, Any], Callable[[T], bool]]],
     *,
+    executor_type: Literal["thread", "process"] = "thread",
     first_match: bool = True,
     batch_size: int = 1000,
     max_batch_buffer: int = 1,
   ) -> tuple[dict[str, list[Any]], dict[str, Any]]: ...
 
   def branch(
     self,
-    branches: dict[str, Transformer[T, Any]] | dict[str, tuple[Transformer[T, Any], Callable[[T], bool]]],
+    branches: Mapping[str, Transformer[T, Any]] | Mapping[str, tuple[Transformer[T, Any], Callable[[T], bool]]],
     *,
+    executor_type: Literal["thread", "process"] = "thread",
     first_match: bool = True,
     batch_size: int = 1000,
     max_batch_buffer: int = 1,
@@ -350,9 +446,11 @@ def branch(
 
     Args:
         branches: A dictionary defining the branches.
-        first_match (bool): Determines the routing logic for conditional branches.
-        batch_size (int): The number of items to batch for processing.
-        max_batch_buffer (int): The max number of batches to buffer per branch.
+        executor_type: The parallelism model. 'thread' for I/O-bound tasks,
+            'process' for CPU-bound tasks. Defaults to 'thread'.
+        first_match: Determines the routing logic for conditional branches.
+        batch_size: The number of items to batch for processing.
+        max_batch_buffer: The max number of batches to buffer per branch.
 
     Returns:
         A tuple containing a dictionary of results and the final context.
@@ -378,85 +476,93 @@ def branch(
     else:
       producer_fn = self._producer_broadcast
 
-    return self._execute_branching(
-      producer_fn=producer_fn,
-      parsed_branches=parsed_branches,
-      batch_size=batch_size,
-      max_batch_buffer=max_batch_buffer,
-    )
-
-  def _producer_fanout(
-    self,
-    source_iterator: Iterator[T],
-    queues: dict[str, Queue],
-    batch_size: int,
-  ) -> None:
-    """Producer for fan-out: sends every item to every branch."""
-    for batch_tuple in itertools.batched(source_iterator, batch_size):
-      batch_list = list(batch_tuple)
-      for q in queues.values():
-        q.put(batch_list)
-    for q in queues.values():
-      q.put(None)
+    # Dispatch to the correct executor based on the chosen type
+    if executor_type == "thread":
+      return self._execute_branching_thread(
+        producer_fn=producer_fn,
+        parsed_branches=parsed_branches,
+        batch_size=batch_size,
+        max_batch_buffer=max_batch_buffer,
+      )
+    elif executor_type == "process":
+      return self._execute_branching_process(
+        producer_fn=producer_fn,
+        parsed_branches=parsed_branches,
+        batch_size=batch_size,
+        max_batch_buffer=max_batch_buffer,
+      )
+    else:
+      raise ValueError(f"Unsupported executor_type: '{executor_type}'. Must be 'thread' or 'process'.")
 
-  def _producer_router(
+  def _execute_branching_process(
     self,
-    source_iterator: Iterator[T],
-    queues: dict[str, Queue],
+    *,
+    producer_fn: Callable,
     parsed_branches: list[tuple[str, Transformer, Callable]],
     batch_size: int,
-  ) -> None:
-    """Producer for router (`first_match=True`): sends item to the first matching branch."""
-    buffers = {name: [] for name, _, _ in parsed_branches}
-    for item in source_iterator:
-      for name, _, condition in parsed_branches:
-        if condition(item):
-          branch_buffer = buffers[name]
-          branch_buffer.append(item)
-          if len(branch_buffer) >= batch_size:
-            queues[name].put(branch_buffer)
-            buffers[name] = []
-          break
-    for name, buffer_list in buffers.items():
-      if buffer_list:
-        queues[name].put(buffer_list)
-    for q in queues.values():
-      q.put(None)
+    max_batch_buffer: int,
+  ) -> tuple[dict[str, list[Any]], dict[str, Any]]:
+    """Branching execution using a process pool for consumers."""
+    source_iterator = self.processed_data
+    num_branches = len(parsed_branches)
+    final_results: dict[str, list[Any]] = {name: [] for name, _, _ in parsed_branches}
+    context_handle = self.context_manager.get_handle()
 
-  def _producer_broadcast(
-    self,
-    source_iterator: Iterator[T],
-    queues: dict[str, Queue],
-    parsed_branches: list[tuple[str, Transformer, Callable]],
-    batch_size: int,
-  ) -> None:
-    """Producer for broadcast (`first_match=False`): sends item to all matching branches."""
-    buffers = {name: [] for name, _, _ in parsed_branches}
-    for item in source_iterator:
-      item_matches = [name for name, _, condition in parsed_branches if condition(item)]
+    # A Manager creates queues that can be shared between processes
+    manager = Manager()
+    queues = {name: manager.Queue(maxsize=max_batch_buffer) for name, _, _ in parsed_branches}
 
-      for name in item_matches:
-        buffers[name].append(item)
-        branch_buffer = buffers[name]
-        if len(branch_buffer) >= batch_size:
-          queues[name].put(branch_buffer)
-          buffers[name] = []
+    # The producer must run in a thread to access the pipeline's iterator,
+    # while consumers run in processes for true CPU parallelism.
+    producer_executor = ThreadPoolExecutor(max_workers=1)
+    consumer_executor = get_reusable_executor(max_workers=num_branches)
 
-    for name, buffer_list in buffers.items():
-      if buffer_list:
-        queues[name].put(buffer_list)
-    for q in queues.values():
-      q.put(None)
+    try:
+      # Determine arguments for the producer function
+      producer_args: tuple
+      if producer_fn == self._producer_fanout:
+        producer_args = (source_iterator, queues, batch_size)
+      else:
+        producer_args = (source_iterator, queues, parsed_branches, batch_size)
+
+      # Submit the producer to the thread pool
+      producer_future = producer_executor.submit(producer_fn, *producer_args)
+
+      # Submit consumers to the process pool
+      future_to_name = {
+        consumer_executor.submit(_branch_consumer_process, transformer, queues[name], context_handle): name
+        for name, transformer, _ in parsed_branches
+      }
+
+      # Collect results as they complete
+      for future in as_completed(future_to_name):
+        name = future_to_name[future]
+        try:
+          final_results[name] = future.result()
+        except Exception:
+          final_results[name] = []
+
+      # Check for producer errors after consumers are done
+      producer_future.result()
+
+    finally:
+      producer_executor.shutdown()
+      # The reusable executor from loky is managed globally
+
+    final_context = self.context_manager.to_dict()
+    return final_results, final_context
 
-  def _execute_branching(
+  # Rename original _execute_branching to be specific
+  def _execute_branching_thread(
     self,
     *,
     producer_fn: Callable,
     parsed_branches: list[tuple[str, Transformer, Callable]],
     batch_size: int,
     max_batch_buffer: int,
   ) -> tuple[dict[str, list[Any]], dict[str, Any]]:
-    """Shared execution logic for all branching modes."""
+    """Shared execution logic for thread-based branching modes."""
+    # ... (The original implementation of _execute_branching goes here)
     source_iterator = self.processed_data
     num_branches = len(parsed_branches)
     final_results: dict[str, list[Any]] = {name: [] for name, _, _ in parsed_branches}
@@ -474,7 +580,6 @@ def stream_from_queue() -> Iterator[T]:
       return result_list
 
     with ThreadPoolExecutor(max_workers=num_branches + 1) as executor:
-      # The producer needs different arguments depending on the type
       producer_args: tuple
       if producer_fn == self._producer_fanout:
         producer_args = (source_iterator, queues, batch_size)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -1,5 +1,8 @@
 """Tests for the Pipeline class."""
 
+import os
+import time
+
 from laygo import Pipeline
 from laygo.context.types import IContextManager
 from laygo.transformers.transformer import createTransformer
@@ -565,3 +568,52 @@ def test_branch_conditional_broadcast_mode(self):
     assert sorted(result["strings"]) == ["A", "B"]
     # The float (99.9) AND the integers (1, 2, 3) are processed by the 'numbers' branch.
     assert sorted(result["numbers"]) == [10.0, 20.0, 30.0, 999.0]
+
+  def test_branch_process_executor(self):
+    """Test branching with executor_type='process' for CPU-bound work."""
+
+    # Setup: A CPU-bound task is ideal for demonstrating process parallelism.
+    def heavy_computation(x: int) -> int:
+      # A simple but non-trivial calculation
+      time.sleep(0.01)  # Simulate work
+      return x * x
+
+    # This function will run inside the worker process to check its PID
+    def check_pid(chunk: list[int], ctx: IContextManager) -> list[int]:
+      # Store the worker's process ID in the shared context
+      if chunk:
+        ctx[f"pid_for_item_{chunk[0]}"] = os.getpid()
+      return chunk
+
+    data = [1, 2, 3, 4]
+    pipeline = Pipeline(data)
+    main_pid = os.getpid()
+
+    # Define branches with CPU-bound work and the PID check
+    branches = {
+      "evens": (
+        createTransformer(int).filter(lambda x: x % 2 == 0).map(heavy_computation)._pipe(check_pid),
+        lambda x: True,  # Condition to route data
+      ),
+      "odds": (
+        createTransformer(int).filter(lambda x: x % 2 != 0).map(heavy_computation)._pipe(check_pid),
+        lambda x: True,
+      ),
+    }
+
+    # Action: Execute the branch with the process executor
+    result, context = pipeline.branch(
+      branches,
+      first_match=False,  # Use broadcast to send to all matching
+      executor_type="process",
+    )
+
+    # Assert: The computational results are correct
+    assert sorted(result["evens"]) == [4, 16]  # 2*2, 4*4
+    assert sorted(result["odds"]) == [1, 9]  # 1*1, 3*3
+
+    # Assert: The work was done in different processes
+    worker_pids = {v for k, v in context.items() if "pid" in k}
+    assert len(worker_pids) > 0, "No worker PIDs were found in the context."
+    for pid in worker_pids:
+      assert pid != main_pid, f"Worker PID {pid} is the same as the main PID."