-
Notifications
You must be signed in to change notification settings - Fork 25
feat(eval): add runtime simulation via --simulation flag #1624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
4528fb2
feat(eval): add runtime simulation support via --simulation flag
AAgnihotry 8926cd2
test(eval): increase coverage for simulation code paths
AAgnihotry 12ae5ec
fix(testcase): use uipath --debug run to surface simulation log lines
AAgnihotry a8debd9
fix(testcase): remove fragile log assertion, verify simulation via ou…
AAgnihotry a4ef51c
fix(mocks): downgrade simulation config load log to debug level
AAgnihotry 522c383
refactor(mocks): add SimulationConfig pydantic model and typed build_…
AAgnihotry e20d97f
fix(testcase): sync local editable uipath, run against sample dir wit…
AAgnihotry 792b0ef
fix(testcase): run auth and agent from sample dir so credentials are …
AAgnihotry cee5cad
fix(testcase): copy runtime output.json from sample dir instead of us…
AAgnihotry File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| { | ||
| "code": "def add(a, b):\n return a+b\n\ndef divide(a,b):\n return a/b", | ||
| "language": "python" | ||
| } |
186 changes: 186 additions & 0 deletions
186
packages/uipath/samples/runtime-simulations-agent/main.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,186 @@ | ||
| """Coding agent that reviews code and suggests improvements. | ||
|
|
||
| This sample demonstrates the --simulation flag: the three tool functions | ||
| (check_syntax, check_style, suggest_improvements) are decorated with @mockable, | ||
| so they can be intercepted by an LLM during a simulated run instead of | ||
| requiring a real linter or compiler to be installed. | ||
|
|
||
| Run with real tools: | ||
| uipath run main.py:main -f input.json | ||
|
|
||
| Run with simulation (no real tools needed): | ||
| uipath run main.py:main -f input.json --simulation "$(cat simulation.json)" | ||
| """ | ||
|
|
||
| import logging | ||
|
|
||
| from pydantic import BaseModel | ||
| from pydantic.dataclasses import dataclass | ||
|
|
||
| from uipath.eval.mocks import ExampleCall, mockable | ||
| from uipath.tracing import traced | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Input / Output models | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| @dataclass | ||
| class CodeReviewInput: | ||
| code: str | ||
| language: str = "python" | ||
|
|
||
|
|
||
| class SyntaxResult(BaseModel): | ||
| valid: bool | ||
| errors: list[str] = [] | ||
|
|
||
|
|
||
| class StyleResult(BaseModel): | ||
| score: int # 0-100 | ||
| violations: list[str] = [] | ||
|
|
||
|
|
||
| class ImprovementResult(BaseModel): | ||
| suggestions: list[str] = [] | ||
| refactored_snippet: str = "" | ||
|
|
||
|
|
||
| class CodeReviewOutput(BaseModel): | ||
| syntax: SyntaxResult | ||
| style: StyleResult | ||
| improvements: ImprovementResult | ||
| summary: str | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Mockable tool functions | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
| CHECK_SYNTAX_EXAMPLES = [ | ||
| ExampleCall( | ||
| id="valid-python", | ||
| input='{"code": "def hello():\\n return 42", "language": "python"}', | ||
| output='{"valid": true, "errors": []}', | ||
| ), | ||
| ExampleCall( | ||
| id="syntax-error", | ||
| input='{"code": "def hello(\\n return 42", "language": "python"}', | ||
| output='{"valid": false, "errors": ["SyntaxError: unexpected EOF"]}', | ||
| ), | ||
| ] | ||
|
|
||
|
|
||
| @traced(name="check_syntax", span_type="tool") | ||
| @mockable(example_calls=CHECK_SYNTAX_EXAMPLES) | ||
| async def check_syntax(code: str, language: str = "python") -> SyntaxResult: | ||
| """Check code for syntax errors using the language's parser. | ||
|
|
||
| Args: | ||
| code: Source code to check. | ||
| language: Programming language (default: python). | ||
|
|
||
| Returns: | ||
| SyntaxResult with valid flag and list of error messages. | ||
| """ | ||
| if language != "python": | ||
| return SyntaxResult(valid=True, errors=[]) | ||
|
|
||
| try: | ||
| compile(code, "<string>", "exec") | ||
| return SyntaxResult(valid=True, errors=[]) | ||
| except SyntaxError as exc: | ||
| return SyntaxResult(valid=False, errors=[str(exc)]) | ||
|
|
||
|
|
||
| CHECK_STYLE_EXAMPLES = [ | ||
| ExampleCall( | ||
| id="clean-code", | ||
| input='{"code": "def hello():\\n return 42\\n", "language": "python"}', | ||
| output='{"score": 95, "violations": []}', | ||
| ), | ||
| ExampleCall( | ||
| id="style-issues", | ||
| input='{"code": "def hello( ):\\n return 42", "language": "python"}', | ||
| output='{"score": 60, "violations": ["E211 whitespace before \'(\'", "W291 trailing whitespace"]}', | ||
| ), | ||
| ] | ||
|
|
||
|
|
||
| @traced(name="check_style", span_type="tool") | ||
| @mockable(example_calls=CHECK_STYLE_EXAMPLES) | ||
| async def check_style(code: str, language: str = "python") -> StyleResult: | ||
| """Run style checks (e.g. PEP 8 for Python) on the provided code. | ||
|
|
||
| Args: | ||
| code: Source code to check. | ||
| language: Programming language (default: python). | ||
|
|
||
| Returns: | ||
| StyleResult with a 0-100 score and list of style violations. | ||
| """ | ||
| # Real implementation would call ruff / pycodestyle / eslint etc. | ||
| # For demo purposes we return a perfect score when not simulated. | ||
| return StyleResult(score=100, violations=[]) | ||
|
|
||
|
|
||
| SUGGEST_IMPROVEMENTS_EXAMPLES = [ | ||
| ExampleCall( | ||
| id="basic-function", | ||
| input='{"code": "def add(a, b):\\n return a + b"}', | ||
| output=( | ||
| '{"suggestions": ["Add type annotations", "Add a docstring"],' | ||
| ' "refactored_snippet": "def add(a: int, b: int) -> int:\\n ' | ||
| "'''Return the sum of a and b.'''\\n return a + b\"}" | ||
| ), | ||
| ) | ||
| ] | ||
|
|
||
|
|
||
| @traced(name="suggest_improvements", span_type="tool") | ||
| @mockable(example_calls=SUGGEST_IMPROVEMENTS_EXAMPLES) | ||
| async def suggest_improvements(code: str) -> ImprovementResult: | ||
| """Analyse code and return actionable improvement suggestions. | ||
|
|
||
| Args: | ||
| code: Source code to analyse. | ||
|
|
||
| Returns: | ||
| ImprovementResult with suggestions and an optional refactored snippet. | ||
| """ | ||
| # Real implementation would call an LLM or static analysis tool. | ||
| return ImprovementResult(suggestions=[], refactored_snippet=code) | ||
|
|
||
|
|
||
| # --------------------------------------------------------------------------- | ||
| # Agent entrypoint | ||
| # --------------------------------------------------------------------------- | ||
|
|
||
|
|
||
| @traced(name="main") | ||
| async def main(input: CodeReviewInput) -> CodeReviewOutput: | ||
| """Orchestrate three code-review tools and produce a unified report. | ||
|
|
||
| Each tool call creates its own OpenTelemetry span with span_type="tool", | ||
| which enables trajectory-based evaluation and simulation. | ||
| """ | ||
| syntax = await check_syntax(input.code, input.language) | ||
| style = await check_style(input.code, input.language) | ||
| improvements = await suggest_improvements(input.code) | ||
|
|
||
| issues = len(syntax.errors) + len(style.violations) | ||
| summary = ( | ||
| f"Found {issues} issue(s). " | ||
| f"Style score: {style.score}/100. " | ||
| f"{len(improvements.suggestions)} improvement suggestion(s)." | ||
| ) | ||
|
|
||
| return CodeReviewOutput( | ||
| syntax=syntax, | ||
| style=style, | ||
| improvements=improvements, | ||
| summary=summary, | ||
| ) |
14 changes: 14 additions & 0 deletions
14
packages/uipath/samples/runtime-simulations-agent/pyproject.toml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| [project] | ||
| name = "runtime-simulations-agent" | ||
| version = "0.0.1" | ||
| description = "Code review agent demonstrating runtime simulation" | ||
| authors = [{ name = "UiPath", email = "python-sdk@uipath.com" }] | ||
| dependencies = [ | ||
| "uipath", | ||
| ] | ||
| requires-python = ">=3.11" | ||
|
|
||
| [dependency-groups] | ||
| dev = [ | ||
| "uipath-dev", | ||
| ] |
15 changes: 15 additions & 0 deletions
15
packages/uipath/samples/runtime-simulations-agent/simulation.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| { | ||
| "enabled": true, | ||
| "toolsToSimulate": [ | ||
| { | ||
| "name": "check_syntax" | ||
| }, | ||
| { | ||
| "name": "check_style" | ||
| }, | ||
| { | ||
| "name": "suggest_improvements" | ||
| } | ||
| ], | ||
| "instructions": "You are simulating a code review system. Given a tool name and its input arguments, produce a realistic JSON response that matches the tool's output schema.\n\n- check_syntax: return {\"valid\": <bool>, \"errors\": [<string>, ...]}. If the code looks syntactically correct return valid=true and an empty errors list. Otherwise list the syntax errors.\n- check_style: return {\"score\": <0-100>, \"violations\": [<string>, ...]}. Evaluate PEP 8 compliance for Python code. Deduct points for missing spaces, missing type annotations, etc.\n- suggest_improvements: return {\"suggestions\": [<string>, ...], \"refactored_snippet\": \"<improved code>\"}. Suggest concrete improvements such as adding type hints, docstrings, or handling edge cases (e.g. division by zero)." | ||
| } |
5 changes: 5 additions & 0 deletions
5
packages/uipath/samples/runtime-simulations-agent/uipath.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| { | ||
| "functions": { | ||
| "main": "main.py:main" | ||
| } | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,21 @@ | ||
| """Mock interface.""" | ||
|
|
||
| from ._mock_context import is_tool_simulated | ||
| from ._mock_runtime import UiPathMockRuntime | ||
| from ._types import ExampleCall, MockingContext | ||
| from ._mock_runtime import ( | ||
| UiPathMockRuntime, | ||
| build_mocking_context, | ||
| build_mocking_context_from_dict, | ||
| ) | ||
| from ._types import ExampleCall, MockingContext, SimulationConfig | ||
| from .mockable import mockable | ||
|
|
||
| __all__ = [ | ||
| "ExampleCall", | ||
| "UiPathMockRuntime", | ||
| "MockingContext", | ||
| "mockable", | ||
| "SimulationConfig", | ||
| "UiPathMockRuntime", | ||
| "build_mocking_context", | ||
| "build_mocking_context_from_dict", | ||
| "is_tool_simulated", | ||
| "mockable", | ||
| ] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.