From 1219bfabea7eaf6dc4962933df8f1da5077a0200 Mon Sep 17 00:00:00 2001 From: JR Boos Date: Tue, 30 Jun 2026 13:06:53 -0400 Subject: [PATCH] e2e(skills): added E2E steps for agent skills refined E2E tests for skills and added necessary step implementations. close: LCORE-2080 --- docker-compose-library.yaml | 1 + docker-compose.yaml | 1 + .../lightspeed-stack-skills-directory.yaml | 25 + .../library-mode/lightspeed-stack-skills.yaml | 25 + .../lightspeed-stack-skills-directory.yaml | 26 + .../server-mode/lightspeed-stack-skills.yaml | 26 + tests/e2e/features/skills.feature | 573 +++++++++++------- tests/e2e/features/steps/common_http.py | 27 + .../e2e/features/steps/llm_query_response.py | 11 +- tests/e2e/skills/echo/SKILL.md | 19 + tests/e2e/skills/echo/references/guide.md | 19 + tests/e2e/skills/summarize/SKILL.md | 21 + .../e2e/skills/summarize/references/guide.md | 20 + tests/e2e/test_list.txt | 3 +- 14 files changed, 576 insertions(+), 221 deletions(-) create mode 100644 tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml create mode 100644 tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml create mode 100644 tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml create mode 100644 tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml create mode 100644 tests/e2e/skills/echo/SKILL.md create mode 100644 tests/e2e/skills/echo/references/guide.md create mode 100644 tests/e2e/skills/summarize/SKILL.md create mode 100644 tests/e2e/skills/summarize/references/guide.md diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index f0e075848..2e3d948ce 100755 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -20,6 +20,7 @@ services: - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface + - ./tests/e2e/skills:/app-root/skills:ro,Z - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z environment: diff --git a/docker-compose.yaml b/docker-compose.yaml index c3f026e70..aa4631ad6 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -87,6 +87,7 @@ services: - "8080:8080" volumes: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z + - ./tests/e2e/skills:/app-root/skills:ro,z - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z environment: diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml new file mode 100644 index 000000000..0aff2f67b --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml new file mode 100644 index 000000000..1a7177434 --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills/echo diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml new file mode 100644 index 000000000..0ae7888c7 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml @@ -0,0 +1,26 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml new file mode 100644 index 000000000..387d03856 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml @@ -0,0 +1,26 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills/echo diff --git a/tests/e2e/features/skills.feature b/tests/e2e/features/skills.feature index c6f01d9cf..6e927f364 100644 --- a/tests/e2e/features/skills.feature +++ b/tests/e2e/features/skills.feature @@ -1,4 +1,4 @@ -@e2e_group_2 @skip +@e2e_group_2 Feature: Agent skills tests Background: @@ -11,12 +11,12 @@ Feature: Agent skills tests @SkillsConfig Scenario: Skill tools are registered when skills are configured - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration + And MCP toolgroups are reset for a new MCP configuration And The service is restarted When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 200 - And The body of the response is the following #TODO: Currently placeholder, should reflect actual tools (all tools not just skill tools) + And The body of the response is the following """ { "tools": [ @@ -48,7 +48,7 @@ Feature: Agent skills tests }, { "identifier": "list_skills", - "description": "List available skills with their names and descriptions. Call this to discover what skills are available.", + "description": "Get an overview of all available skills and what they do.\n\nUse this when you need to discover what skills exist or refresh your knowledge\nof available capabilities. Skills provide domain-specific knowledge and instructions\nfor specialized tasks.", "parameters": [], "provider_id": "agent-skills", "toolgroup_id": "builtin::agent-skills", @@ -56,12 +56,12 @@ Feature: Agent skills tests "type": "tool" }, { - "identifier": "activate_skill", - "description": "Load full instructions for a skill. Call this when a task matches a skill's description.", + "identifier": "load_skill", + "description": "Load complete instructions and capabilities for a specific skill.\n\nA skill contains detailed instructions, supplementary resources (like templates or\nreference docs), and executable scripts. Load a skill when you need to perform a\ntask within its domain.", "parameters": [ { - "name": "name", - "description": "The name of the skill to load", + "name": "skill_name", + "description": "Exact name from your available skills list.\nMust match exactly (e.g., \"data-analysis\" not \"data analysis\").", "parameter_type": "string", "required": true, "default": null @@ -73,22 +73,29 @@ Feature: Agent skills tests "type": "tool" }, { - "identifier": "load_skill_resource", - "description": "Load a file from a skill's references/ directory. Use this when skill instructions reference additional documentation.", + "identifier": "read_skill_resource", + "description": "Access supplementary documentation, templates, or data from a skill.\n\nResources are additional files that support skill execution. They can be static\ncontent (markdown docs, templates, schemas) or dynamic callables (functions that\ngenerate content based on parameters).\n\nWhen to use this:\n- When a skill's instructions reference a specific resource\n- To access form templates, reference documentation, or data schemas\n- When you need supplementary information beyond the skill instructions", "parameters": [ { "name": "skill_name", - "description": "The name of the skill containing the resource", + "description": "Name of the skill containing the resource.", "parameter_type": "string", "required": true, "default": null }, { - "name": "path", - "description": "Relative path to the resource file (e.g., 'references/guide.md')", + "name": "resource_name", + "description": "Exact name of the resource as listed in the skill.\nExamples: \"FORMS.md\", \"REFERENCE.md\", \"get_schema\"\nMust match exactly - do not infer or guess.", "parameter_type": "string", "required": true, "default": null + }, + { + "name": "args", + "description": "Arguments for callable resources (optional for static files).\nKeys must match the parameter names in the resource's schema.", + "parameter_type": "object", + "required": false, + "default": null } ], "provider_id": "agent-skills", @@ -98,25 +105,25 @@ Feature: Agent skills tests }, { "identifier": "run_skill_script", - "description": "Execute a skill script that performs actions or computations.", + "description": "Execute a skill script that performs actions or computations.\n\nScripts are executable programs provided by skills that can perform actions\n(API calls, file operations), process data (transformations, analysis), or\ngenerate outputs (reports, visualizations).\n\nWhen to use this:\n- When a skill's instructions tell you to run a specific script\n- To perform automated tasks that the skill provides\n- For data processing, API interactions, or file operations", "parameters": [ { "name": "skill_name", - "description": "Name of the skill containing the script", + "description": "Name of the skill containing the script.", "parameter_type": "string", "required": true, "default": null }, { "name": "script_name", - "description": "Exact name of the script as listed in the skill", + "description": "Exact name of the script as listed in the skill.\nExamples: \"analyze.py\", \"scripts/analyze.py\", \"scripts/deploy.sh\", \"scripts/runner\"\nMust match exactly - do not infer or guess.", "parameter_type": "string", "required": true, "default": null }, { "name": "args", - "description": "Arguments required by the script", + "description": "Arguments required by the script.\nKeys must match the parameter names in the script's schema.", "parameter_type": "object", "required": false, "default": null @@ -133,10 +140,11 @@ Feature: Agent skills tests Scenario: Skill tools are not registered when no skills are configured Given The service uses the lightspeed-stack.yaml configuration + And MCP toolgroups are reset for a new MCP configuration And The service is restarted When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 200 - And The body of the response is the following #TODO: Currently placeholder, should reflect actual tools (default tools, not skill tools) + And The body of the response is the following """ { "tools": [ @@ -166,7 +174,7 @@ Feature: Agent skills tests "server_source": "builtin", "type": "tool_group" } - ], + ] } """ @@ -174,8 +182,7 @@ Feature: Agent skills tests @SkillsConfig Scenario: LLM can discover skills via list_skills tool using query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question @@ -183,16 +190,22 @@ Feature: Agent skills tests {"query": "What skills are available? Use the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output" } ] """ @@ -200,8 +213,7 @@ Feature: Agent skills tests @SkillsConfig Scenario: LLM can discover skills via list_skills tool using streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question @@ -210,17 +222,22 @@ Feature: Agent skills tests """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output" } ] """ @@ -229,54 +246,69 @@ Feature: Agent skills tests # --- Skill activation --- @SkillsConfig - Scenario: LLM can activate a skill and use its instructions via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can Load a skill and use its instructions via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" } ] """ And The token metrics have increased @SkillsConfig - Scenario: LLM can activate a skill and use its instructions via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill and use its instructions via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" } ] """ @@ -286,53 +318,71 @@ Feature: Agent skills tests # --- Skill resource loading --- @SkillsConfig - Scenario: LLM can load a skill reference file via load_skill_resource tool using query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill reference file via read_skill_resource tool using query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, - } ] + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output" + } + ] """ And The token metrics have increased @SkillsConfig - Scenario: LLM can load a skill reference file via load_skill_resource tool using streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill reference file via read_skill_resource tool using streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output" } ] """ @@ -340,103 +390,132 @@ Feature: Agent skills tests # --- Error handling: unknown skill --- - @SkillsConfig - Scenario: activate_skill returns error for unknown skill name via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: load_skill returns error for unknown skill name via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "nonexistent-skill" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ - @SkillsConfig - Scenario: activate_skill returns error for unknown skill name via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + + @SkillsConfig @skip + Scenario: load_skill returns error for unknown skill name via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ - {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "nonexistent-skill" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ # --- Error handling: missing resource --- - @SkillsConfig - Scenario: load_skill_resource returns error for nonexistent resource file via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: read_skill_resource returns error for nonexistent resource file via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/nonexistent.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ - @SkillsConfig - Scenario: load_skill_resource returns error for nonexistent resource file via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: read_skill_resource returns error for nonexistent resource file via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ - {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/nonexistent.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ @@ -444,83 +523,101 @@ Feature: Agent skills tests # --- Context management: deduplication --- - @SkillsConfig + @SkillsConfig @skip Scenario: Duplicate skill activation in same conversation returns already-loaded note via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Activate e2e-test-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the 'echo' skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 And I store conversation details - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results - """ - [ - { - "id": "", - "name": "activate_skill" - "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, - } - ] - """ + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following + """ + [ + { + "status": "success", + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" + } + ] + """ When I use "query" to ask question with same conversation_id """ - {"query": "Activate e2e-test-skill again using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the 'echo' skill again using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results - """ - [ - { - "id": "", - "name": "activate_skill" - "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, - } - ] - """ + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following + """ + [ + { + "status": "failure", + "type": "function_call_output" + } + ] + """ # --- Multiple skills --- @SkillsMultiConfig Scenario: Skills directory path discovers all skills in subdirectories via query endpoint - Given The e2e-test-skill skill directory path is "skills/e2e-test-skill" - And The e2e-second-skill skill directory path is "skills/e2e-second-skill" - And The service uses the lightspeed-stack-skills-directory.yaml configuration + Given The service uses the lightspeed-stack-skills-directory.yaml configuration And The service is restarted When I use "query" to ask question """ {"query": "List all available skills using the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}", + "type": "function_call_output" } ] """ @SkillsMultiConfig Scenario: Skills directory path discovers all skills in subdirectories via streaming_query endpoint - Given The e2e-test-skill skill directory path is "skills/e2e-test-skill" - And The e2e-second-skill skill directory path is "skills/e2e-second-skill" - And The service uses the lightspeed-stack-skills-directory.yaml configuration + Given The service uses the lightspeed-stack-skills-directory.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ @@ -528,104 +625,142 @@ Feature: Agent skills tests """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}", + "type": "function_call_output" } ] """ # --- Full progressive disclosure flow --- - @SkillsConfig @flaky - Scenario: LLM completes list_skills then activate_skill then load_skill_resource via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand) + Scenario: LLM completes list_skills then load_skill then read_skill_resource via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + }, + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + }, + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output", + "round": 1 }, { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output", + "round": 2 }, { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output", + "round": 3 } ] """ - @SkillsConfig - Scenario: LLM completes list_skills then activate_skill then load_skill_resource via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand) + Scenario: LLM completes list_skills then load_skill then read_skill_resource via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 - And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + }, + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + }, + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output", + "round": 1 }, { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output", + "round": 2 }, { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output", + "round": 3 } ] """ diff --git a/tests/e2e/features/steps/common_http.py b/tests/e2e/features/steps/common_http.py index aceaa2fef..33462c1ca 100644 --- a/tests/e2e/features/steps/common_http.py +++ b/tests/e2e/features/steps/common_http.py @@ -318,3 +318,30 @@ def set_header(context: Context, header_name: str) -> None: except json.JSONDecodeError: pass context.auth_headers[header_name] = value + + +@then('The body of the "{field}" field of the response is the following') +def check_response_field_body(context: Context, field: str) -> None: + """Check the content of a specific field in the response body. + + Parameters: + context: Behave context with ``response`` and/or ``response_data``. + field: Name of the field to check (e.g. ``tool_results``). + """ + if getattr(context, "use_streaming_response_data", False): + response_body = context.response_data + else: + assert context.response is not None, "Request needs to be performed first" + response_body = context.response.json() + + assert field in response_body, ( + f"Field '{field}' not found in response. " + f"Available fields: {list(response_body.keys())}" + ) + + actual_value = response_body[field] + + assert context.text, "Expected value for response field body is missing" + + expected_value = json.loads(context.text) + validate_json_partially(actual_value, expected_value) diff --git a/tests/e2e/features/steps/llm_query_response.py b/tests/e2e/features/steps/llm_query_response.py index b0f992861..30d327b3c 100644 --- a/tests/e2e/features/steps/llm_query_response.py +++ b/tests/e2e/features/steps/llm_query_response.py @@ -94,6 +94,7 @@ def wait_for_complete_response(context: Context) -> None: context.response_data = _parse_streaming_response(context.response.text) context.response.raise_for_status() assert context.response_data["finished"] is True + context.use_streaming_response_data = True @step('I use "{endpoint}" to ask question') @@ -364,7 +365,9 @@ def _parse_streaming_response(response_text: str) -> dict: lines = response_text.strip().split("\n") conversation_id = None full_response = "" - full_response_split = [] + full_response_split: list[str] = [] + tool_calls: list[dict[str, Any]] = [] + tool_results: list[dict[str, Any]] = [] finished = False stream_error = ( None # {"status_code": int, "response": str, "cause": str} if event "error" @@ -380,6 +383,10 @@ def _parse_streaming_response(response_text: str) -> dict: conversation_id = data["data"]["conversation_id"] elif event == "token": full_response_split.append(data["data"]["token"]) + elif event == "tool_call": + tool_calls.append(data["data"]) + elif event == "tool_result": + tool_results.append(data["data"]) elif event == "turn_complete": full_response = data["data"]["token"] elif event == "end": @@ -393,6 +400,8 @@ def _parse_streaming_response(response_text: str) -> dict: "conversation_id": conversation_id, "response": "".join(full_response_split), "response_complete": full_response, + "tool_calls": tool_calls, + "tool_results": tool_results, "finished": finished, "stream_error": stream_error, } diff --git a/tests/e2e/skills/echo/SKILL.md b/tests/e2e/skills/echo/SKILL.md new file mode 100644 index 000000000..e94e52612 --- /dev/null +++ b/tests/e2e/skills/echo/SKILL.md @@ -0,0 +1,19 @@ +--- +name: echo +description: Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text. +--- + +# Echo Skill + +## When to use this skill + +Use this skill when: +- A user asks to echo or repeat text +- A user wants to verify that the agent can return their input verbatim + +## Instructions + +1. Read the user's input text +2. Return the exact text back to the user without modification + +For formatting guidelines, see [references/guide.md](references/guide.md). diff --git a/tests/e2e/skills/echo/references/guide.md b/tests/e2e/skills/echo/references/guide.md new file mode 100644 index 000000000..dee54b305 --- /dev/null +++ b/tests/e2e/skills/echo/references/guide.md @@ -0,0 +1,19 @@ +# Echo Formatting Guide + +## Output format + +When echoing text back to the user, follow these rules: + +- Preserve the exact input text without any modification +- Do not add quotation marks around the echoed text +- Do not add any prefix like "Echo:" or "Output:" +- Return only the echoed text as the response +- Preserve whitespace and line breaks exactly as provided + +## Examples + +**Input**: `Hello World!` +**Output**: `Hello World!` + +**Input**: `multiple words with spaces` +**Output**: `multiple words with spaces` diff --git a/tests/e2e/skills/summarize/SKILL.md b/tests/e2e/skills/summarize/SKILL.md new file mode 100644 index 000000000..b9fd63d55 --- /dev/null +++ b/tests/e2e/skills/summarize/SKILL.md @@ -0,0 +1,21 @@ +--- +name: summarize +description: Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text. +--- + +# Summarize Skill + +## When to use this skill + +Use this skill when: +- A user asks to summarize or condense text +- A user wants a brief overview of longer content +- A user requests a TL;DR or short version + +## Instructions + +1. Read the user's input text +2. Identify the key point or main idea +3. Return a single concise sentence that captures the essence of the input + +For formatting guidelines, see [references/guide.md](references/guide.md). diff --git a/tests/e2e/skills/summarize/references/guide.md b/tests/e2e/skills/summarize/references/guide.md new file mode 100644 index 000000000..fa2985dd5 --- /dev/null +++ b/tests/e2e/skills/summarize/references/guide.md @@ -0,0 +1,20 @@ +# Summarize Formatting Guide + +## Output format + +When summarizing text for the user, follow these rules: + +- Return exactly one sentence +- Do not add quotation marks around the summary +- Do not add any prefix like "Summary:" or "TL;DR:" +- Keep the summary under 30 words +- Use simple, clear language +- Preserve the original meaning without adding interpretation + +## Examples + +**Input**: `The quick brown fox jumped over the lazy dog while the cat watched from the windowsill and the bird flew overhead.` +**Output**: `A fox jumped over a dog while a cat and bird observed nearby.` + +**Input**: `We need to upgrade our database server because the current one is running out of disk space and memory, which causes frequent timeouts during peak hours.` +**Output**: `The database server needs upgrading due to insufficient disk space and memory causing peak-hour timeouts.` diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 8747bac96..9a619fba4 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -36,4 +36,5 @@ features/unified-mode-boot.feature features/unified-mode-legacy.feature features/unified-mode-validation.feature features/unified-mode-migration.feature -features/unified-mode-synthesis.feature \ No newline at end of file +features/unified-mode-synthesis.feature +features/skills.feature