Skip to content

Commit 0fa429d

Browse files
committed
fix: http transformer types and added tests
1 parent 6e0bd88 commit 0fa429d

8 files changed

Lines changed: 158 additions & 20 deletions

File tree

laygo/errors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Callable
22

3-
from laygo import PipelineContext
3+
from laygo.helpers import PipelineContext
44

55
ChunkErrorHandler = Callable[[list, Exception, PipelineContext], None]
66

laygo/pipeline.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
from typing import TypeVar
77
from typing import overload
88

9-
from laygo import PipelineContext
10-
from laygo import Transformer
9+
from laygo.helpers import PipelineContext
1110
from laygo.helpers import is_context_aware
11+
from laygo.transformers.transformer import Transformer
1212

1313
T = TypeVar("T")
1414
PipelineFunction = Callable[[T], Any]

laygo/transformers/http.py

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
2-
The final, self-sufficient DistributedTransformer.
2+
The final, self-sufficient DistributedTransformer with corrected typing.
33
"""
44

5+
from collections.abc import Callable
56
from collections.abc import Iterable
67
from collections.abc import Iterator
78
from concurrent.futures import FIRST_COMPLETED
@@ -10,14 +11,26 @@
1011
import hashlib
1112
import itertools
1213
import pickle
14+
from typing import Any
15+
from typing import TypeVar
16+
from typing import Union
17+
from typing import overload
1318

1419
import requests
1520

16-
from laygo import PipelineContext
17-
from laygo import Transformer
21+
from laygo.errors import ErrorHandler
22+
from laygo.helpers import PipelineContext
23+
from laygo.transformers.transformer import ChunkErrorHandler
24+
from laygo.transformers.transformer import PipelineFunction
25+
from laygo.transformers.transformer import Transformer
1826

27+
In = TypeVar("In")
28+
Out = TypeVar("Out")
29+
T = TypeVar("T")
30+
U = TypeVar("U")
1931

20-
class HTTPTransformer(Transformer):
32+
33+
class HTTPTransformer(Transformer[In, Out]):
2134
"""
2235
A self-sufficient, chainable transformer that manages its own
2336
distributed execution and worker endpoint definition.
@@ -29,32 +42,39 @@ def __init__(self, base_url: str, endpoint: str | None = None, max_workers: int
2942
self.endpoint = endpoint
3043
self.max_workers = max_workers
3144
self.session = requests.Session()
32-
self._worker_url: str
45+
self._worker_url: str | None = None
3346

3447
def _finalize_config(self):
3548
"""Determines the final worker URL, generating one if needed."""
36-
if self._worker_url:
49+
if hasattr(self, "_worker_url") and self._worker_url:
3750
return
3851

3952
if self.endpoint:
4053
path = self.endpoint
4154
else:
42-
# Using pickle to serialize the function chain and hashing for a unique ID
55+
if not self.transformer:
56+
raise ValueError("Cannot determine endpoint for an empty transformer.")
4357
serialized_logic = pickle.dumps(self.transformer)
4458
hash_id = hashlib.sha1(serialized_logic).hexdigest()[:16]
4559
path = f"/autogen/{hash_id}"
4660

4761
self.endpoint = path.lstrip("/")
4862
self._worker_url = f"{self.base_url}/{self.endpoint}"
4963

50-
def __call__(self, data: Iterable, context=None) -> Iterator:
64+
# --- Original HTTPTransformer Methods ---
65+
66+
def __call__(self, data: Iterable[In], context: PipelineContext | None = None) -> Iterator[Out]:
5167
"""CLIENT-SIDE: Called by the Pipeline to start distributed processing."""
5268
self._finalize_config()
5369

5470
def process_chunk(chunk: list) -> list:
5571
"""Target for a thread: sends one chunk to the worker."""
5672
try:
57-
response = self.session.post(self._worker_url, json=chunk, timeout=300)
73+
response = self.session.post(
74+
self._worker_url, # type: ignore
75+
json=chunk,
76+
timeout=300,
77+
)
5878
response.raise_for_status()
5979
return response.json()
6080
except requests.RequestException as e:
@@ -78,14 +98,58 @@ def get_route(self):
7898
"""
7999
Function that returns the route for the worker.
80100
This is used to register the worker in a Flask app or similar.
81-
82-
Returns:
83-
A tuple containing the endpoint and the worker function.
84101
"""
85102
self._finalize_config()
86103

87104
def worker_view_func(chunk: list, context: PipelineContext):
88-
"""The actual Flask view function for this transformer's logic."""
105+
"""The actual worker logic for this transformer."""
89106
return self.transformer(chunk, context)
90107

91108
return (f"/{self.endpoint}", worker_view_func)
109+
110+
# --- Overridden Chaining Methods to Preserve Type ---
111+
112+
def on_error(self, handler: ChunkErrorHandler[In, Out] | ErrorHandler) -> "HTTPTransformer[In, Out]":
113+
super().on_error(handler)
114+
return self
115+
116+
def map[U](self, function: PipelineFunction[Out, U]) -> "HTTPTransformer[In, U]":
117+
super().map(function)
118+
return self # type: ignore
119+
120+
def filter(self, predicate: PipelineFunction[Out, bool]) -> "HTTPTransformer[In, Out]":
121+
super().filter(predicate)
122+
return self
123+
124+
@overload
125+
def flatten[T](self: "HTTPTransformer[In, list[T]]") -> "HTTPTransformer[In, T]": ...
126+
@overload
127+
def flatten[T](self: "HTTPTransformer[In, tuple[T, ...]]") -> "HTTPTransformer[In, T]": ...
128+
@overload
129+
def flatten[T](self: "HTTPTransformer[In, set[T]]") -> "HTTPTransformer[In, T]": ...
130+
def flatten[T](
131+
self: Union["HTTPTransformer[In, list[T]]", "HTTPTransformer[In, tuple[T, ...]]", "HTTPTransformer[In, set[T]]"],
132+
) -> "HTTPTransformer[In, T]":
133+
super().flatten()
134+
return self # type: ignore
135+
136+
def tap(self, function: PipelineFunction[Out, Any]) -> "HTTPTransformer[In, Out]":
137+
super().tap(function)
138+
return self
139+
140+
def apply[T](self, t: Callable[["HTTPTransformer[In, Out]"], "Transformer[In, T]"]) -> "HTTPTransformer[In, T]":
141+
# Note: The type hint for `t` is slightly adjusted to reflect it receives an HTTPTransformer
142+
super().apply(t) # type: ignore
143+
return self # type: ignore
144+
145+
def catch[U](
146+
self,
147+
sub_pipeline_builder: Callable[[Transformer[Out, Out]], Transformer[Out, U]],
148+
on_error: ChunkErrorHandler[Out, U] | None = None,
149+
) -> "HTTPTransformer[In, U]":
150+
super().catch(sub_pipeline_builder, on_error)
151+
return self # type: ignore
152+
153+
def short_circuit(self, function: Callable[[PipelineContext], bool | None]) -> "HTTPTransformer[In, Out]":
154+
super().short_circuit(function)
155+
return self

laygo/transformers/parallel.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
import itertools
1313
import threading
1414

15-
from laygo import PipelineContext
16-
from laygo import Transformer
15+
from laygo.helpers import PipelineContext
1716
from laygo.transformers.transformer import DEFAULT_CHUNK_SIZE
1817
from laygo.transformers.transformer import InternalTransformer
18+
from laygo.transformers.transformer import Transformer
1919

2020

2121
class ParallelPipelineContextType(PipelineContext):

laygo/transformers/transformer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
from typing import Union
1010
from typing import overload
1111

12-
from laygo import ErrorHandler
13-
from laygo import PipelineContext
12+
from laygo.errors import ErrorHandler
13+
from laygo.helpers import PipelineContext
1414
from laygo.helpers import is_context_aware
1515
from laygo.helpers import is_context_aware_reduce
1616

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ dev = [
4343
"pytest>=7.0.0",
4444
"ruff>=0.1.0",
4545
"twine>=4.0.0",
46+
"requests-mock>=1.12.1",
4647
]
4748

4849
[tool.ruff]

tests/test_http_transformer.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Assuming the classes from your latest example are in a file named `pipeline_lib.py`
2+
# This includes Pipeline, Transformer, and your HTTPTransformer.
3+
import requests_mock
4+
5+
from laygo import HTTPTransformer
6+
from laygo import Pipeline
7+
from laygo import PipelineContext
8+
9+
10+
class TestHTTPTransformer:
11+
"""
12+
Test suite for the HTTPTransformer class.
13+
"""
14+
15+
def test_distributed_transformer_with_mock(self):
16+
"""
17+
Tests the HTTPTransformer by mocking the worker endpoint.
18+
This test validates that the client-side of the transformer correctly
19+
calls the endpoint and processes the response from the (mocked) worker.
20+
"""
21+
# 1. Define the transformer's properties
22+
base_url = "http://mock-worker.com"
23+
endpoint = "/process/data"
24+
worker_url = f"{base_url}{endpoint}"
25+
26+
# 2. Define the transformer and its logic using the chainable API.
27+
# This single instance holds both the client and server logic.
28+
http_transformer = (
29+
HTTPTransformer(base_url=base_url, endpoint=endpoint).map(lambda x: x * 2).filter(lambda x: x > 10)
30+
)
31+
32+
# Set a small chunk_size to ensure the client makes multiple requests
33+
http_transformer.chunk_size = 4
34+
35+
# 3. Get the worker's logic from the transformer itself
36+
# The `get_route` method provides the exact function the worker would run.
37+
_, worker_view_func = http_transformer.get_route()
38+
39+
# 4. Configure the mock endpoint to use the real worker logic
40+
def mock_response(request, context):
41+
"""The behavior of the mocked Flask endpoint."""
42+
input_chunk = request.json()
43+
# Call the actual view function logic obtained from get_route()
44+
# We pass None for the context as it's not used in this simple case.
45+
output_chunk = worker_view_func(chunk=input_chunk, context=PipelineContext())
46+
return output_chunk
47+
48+
# Use requests_mock context manager
49+
with requests_mock.Mocker() as m:
50+
m.post(worker_url, json=mock_response)
51+
52+
# 5. Run the standard Pipeline with the configured transformer
53+
initial_data = list(range(10)) # [0, 1, 2, ..., 9]
54+
pipeline = Pipeline(initial_data).apply(http_transformer)
55+
result = pipeline.to_list()
56+
57+
# 6. Assert the final result
58+
expected_result = [12, 14, 16, 18]
59+
assert sorted(result) == sorted(expected_result)

uv.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)