diff --git a/dotnet/src/Generated/Rpc.cs b/dotnet/src/Generated/Rpc.cs index 346177f63..4029680be 100644 --- a/dotnet/src/Generated/Rpc.cs +++ b/dotnet/src/Generated/Rpc.cs @@ -73,7 +73,7 @@ public sealed class ModelBillingTokenPricesLongContext [JsonPropertyName("cachePrice")] public double? CachePrice { get; set; } - /// Maximum context window tokens for the long context tier. + /// Prompt token budget (max_prompt_tokens) for the long context tier. The total context window is this value plus the model's max_output_tokens. [JsonPropertyName("contextMax")] public long? ContextMax { get; set; } @@ -97,7 +97,7 @@ public sealed class ModelBillingTokenPrices [JsonPropertyName("cachePrice")] public double? CachePrice { get; set; } - /// Maximum context window tokens for the default tier. + /// Prompt token budget (max_prompt_tokens) for the default tier. The total context window is this value plus the model's max_output_tokens. [JsonPropertyName("contextMax")] public long? ContextMax { get; set; } @@ -7040,7 +7040,7 @@ public sealed class MetadataContextInfoResultContextInfo [JsonPropertyName("conversationTokens")] public long ConversationTokens { get; set; } - /// Total context limit for /context display. promptTokenLimit + min(32k or 64k, outputTokenLimit) depending on model. + /// Total context limit for /context display: promptTokenLimit + outputTokenLimit (the model's full max_output_tokens reserved on top of the prompt budget). [JsonPropertyName("limit")] public long Limit { get; set; } diff --git a/dotnet/test/E2E/PendingWorkResumeE2ETests.cs b/dotnet/test/E2E/PendingWorkResumeE2ETests.cs index 9cc0785bf..79e4cf04e 100644 --- a/dotnet/test/E2E/PendingWorkResumeE2ETests.cs +++ b/dotnet/test/E2E/PendingWorkResumeE2ETests.cs @@ -19,12 +19,18 @@ public class PendingWorkResumeE2ETests(E2ETestFixture fixture, ITestOutputHelper private static readonly TimeSpan PendingWorkTimeout = TimeSpan.FromSeconds(60); private const string SharedToken = "pending-work-resume-shared-token"; - [Fact] + // Skipped after the runtime 1.0.56 bump. Runtime PR #9040 (commit b8e1220b45) changed + // SDKServer.handleConnectionClosed to tear down the session when the last RPC client + // disconnects, so the in-memory pending permission request is gone before the resumed + // client can satisfy it and HandlePendingPermissionRequest returns success=false. This + // test models same-process ForceStop+resume; it needs to be redesigned to either keep + // an owner connected (warm resume) or to model a true process restart against the + // persisted session state. + [Fact(Skip = "Runtime 1.0.56 cleans up the session on last-client disconnect (copilot-agent-runtime PR #9040), so the in-memory pending request is gone before resume can satisfy it. Test needs redesign.")] public async Task Should_Continue_Pending_Permission_Request_After_Resume() { var originalPermissionRequest = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); var releaseOriginalPermission = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); - var resumedToolInvoked = false; await using var server = Ctx.CreateClient(options: new CopilotClientOptions { Connection = RuntimeConnection.ForTcp(connectionToken: SharedToken) }); await server.StartAsync(); @@ -66,10 +72,7 @@ await session1.SendAsync(new MessageOptions [ AIFunctionFactory.Create( ([Description("Value to transform")] string value) => - { - resumedToolInvoked = true; - return $"PERMISSION_RESUMED_{value.ToUpperInvariant()}"; - }, + $"PERMISSION_RESUMED_{value.ToUpperInvariant()}", "resume_permission_tool") ], }); @@ -79,11 +82,6 @@ await session1.SendAsync(new MessageOptions new RpcPermissionDecisionApproveOnce()); Assert.True(permissionResult.Success); - var answer = await TestHelper.GetFinalAssistantMessageAsync(session2, PendingWorkTimeout); - - Assert.True(resumedToolInvoked); - Assert.Contains("PERMISSION_RESUMED_ALPHA", answer?.Data.Content ?? string.Empty); - await session2.DisposeAsync(); await resumedTcpClient.ForceStopAsync(); } @@ -97,7 +95,12 @@ static string ResumePermissionTool([Description("Value to transform")] string va $"ORIGINAL_SHOULD_NOT_RUN_{value}"; } - [Fact] + // Skipped for the same reason as Should_Continue_Pending_Permission_Request_After_Resume: + // runtime 1.0.56 (copilot-agent-runtime PR #9040) tears down the session when the last + // RPC client disconnects, so the in-memory pending external tool call is gone before + // the resumed client can satisfy it. Needs redesign to keep an owner connected (warm) + // or to model true process-restart resume from persisted state. + [Fact(Skip = "Runtime 1.0.56 cleans up the session on last-client disconnect (copilot-agent-runtime PR #9040), so the in-memory pending tool call is gone before resume can satisfy it. Test needs redesign.")] public async Task Should_Continue_Pending_External_Tool_Request_After_Resume() { var originalToolStarted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); @@ -140,10 +143,6 @@ await session1.SendAsync(new MessageOptions result: JsonDocument.Parse("\"EXTERNAL_RESUMED_BETA\"").RootElement.Clone()); Assert.True(toolResult.Success); - var answer = await TestHelper.GetFinalAssistantMessageAsync(session2, PendingWorkTimeout); - - Assert.Contains("EXTERNAL_RESUMED_BETA", answer?.Data.Content ?? string.Empty); - await session2.DisposeAsync(); await resumedClient.ForceStopAsync(); } @@ -161,7 +160,23 @@ async Task BlockingExternalTool([Description("Value to look up")] string } [Fact] - public async Task Should_Keep_Pending_External_Tool_Handleable_On_Warm_Resume_When_ContinuePendingWork_Is_False() + public Task Should_Keep_Pending_External_Tool_Handleable_On_Warm_Resume_When_ContinuePendingWork_Is_False() => + AssertPendingExternalToolHandleableOnResumeAsync( + disconnectOriginalClient: false, + expectedSessionWasActive: true, + expectedHandleResult: true); + + [Fact] + public Task Should_Keep_Pending_External_Tool_Handleable_On_Cold_Resume_When_ContinuePendingWork_Is_False() => + AssertPendingExternalToolHandleableOnResumeAsync( + disconnectOriginalClient: true, + expectedSessionWasActive: false, + expectedHandleResult: false); + + private async Task AssertPendingExternalToolHandleableOnResumeAsync( + bool disconnectOriginalClient, + bool expectedSessionWasActive, + bool expectedHandleResult) { var originalToolStarted = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); var releaseOriginalTool = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); @@ -191,28 +206,54 @@ await session1.SendAsync(new MessageOptions var toolEvent = await toolRequested; Assert.Equal("beta", await originalToolStarted.Task.WaitAsync(PendingWorkTimeout)); - await suspendedClient.ForceStopAsync(); + if (disconnectOriginalClient) + { + await suspendedClient.ForceStopAsync(); + } await using var resumedClient = Ctx.CreateClient(options: new CopilotClientOptions { Connection = RuntimeConnection.ForUri(cliUrl, connectionToken: SharedToken) }); - var session2 = await resumedClient.ResumeSessionAsync(sessionId, new ResumeSessionConfig + + // In warm mode the original client still owns the tool registration; + // re-registering it from the resumed client would cause a name-clash. In + // cold mode the original is gone, so we register a fresh throwing handler + // to assert the runtime doesn't re-invoke the tool on resume (orphan + // auto-completion happens internally). + var resumeConfig = new ResumeSessionConfig { ContinuePendingWork = false, OnPermissionRequest = PermissionHandler.ApproveAll, - }); + }; + if (disconnectOriginalClient) + { + resumeConfig.Tools = [AIFunctionFactory.Create(ResumedExternalTool, "resume_external_tool")]; + } + + var session2 = await resumedClient.ResumeSessionAsync(sessionId, resumeConfig); var resumeEvent = await GetSingleResumeEventAsync(session2); Assert.Equal(false, resumeEvent.Data.ContinuePendingWork); - Assert.Equal(true, resumeEvent.Data.SessionWasActive); + Assert.Equal(expectedSessionWasActive, resumeEvent.Data.SessionWasActive); + // Warm: the runtime still has the pending request and HandlePendingToolCall + // will succeed. + // Cold: the runtime auto-completed the orphaned tool call with a synthetic + // interrupt result during resume, so HandlePendingToolCall correctly reports + // success=false. The session should still be healthy for new turns. var resumedResult = await session2.Rpc.Tools.HandlePendingToolCallAsync( toolEvent.Data.RequestId, result: JsonDocument.Parse("\"EXTERNAL_RESUMED_BETA\"").RootElement.Clone()); - Assert.True(resumedResult.Success); - - // continuePendingWork=false may interrupt agent continuation before this response, - // but the pending call should still accept an explicit completion. + Assert.Equal(expectedHandleResult, resumedResult.Success); Assert.Equal(1, invocationCount); + if (!expectedHandleResult) + { + var followUp = await session2.SendAndWaitAsync(new MessageOptions + { + Prompt = "Reply with exactly: COLD_RESUMED_FOLLOWUP", + }); + Assert.Contains("COLD_RESUMED_FOLLOWUP", followUp?.Data.Content ?? string.Empty); + } + await session2.DisposeAsync(); await resumedClient.ForceStopAsync(); } @@ -228,6 +269,10 @@ async Task BlockingExternalTool([Description("Value to look up")] string originalToolStarted.TrySetResult(value); return await releaseOriginalTool.Task; } + + [Description("Looks up a value after resumption")] + string ResumedExternalTool([Description("Value to look up")] string value) => + throw new InvalidOperationException("Resumed-session handler should not be invoked"); } [Fact] diff --git a/go/internal/e2e/pending_work_resume_e2e_test.go b/go/internal/e2e/pending_work_resume_e2e_test.go index 552886413..d3d8d9521 100644 --- a/go/internal/e2e/pending_work_resume_e2e_test.go +++ b/go/internal/e2e/pending_work_resume_e2e_test.go @@ -1,7 +1,6 @@ package e2e import ( - "context" "errors" "fmt" "strings" @@ -18,14 +17,23 @@ const pendingWorkTimeout = 60 * time.Second // Mirrors dotnet/test/PendingWorkResumeTests.cs (snapshot category "pending_work_resume"). // -// Each subtest spawns a TCP server client, connects a "suspended" client through CLIUrl, -// triggers some pending work (permission request or external tool call), then ForceStops -// the suspended client (preserving session state) and resumes from a fresh client with -// ContinuePendingWork=true. +// Most subtests spawn a TCP server client, connect a "suspended" client through CLIUrl, +// trigger pending work, then ForceStop the suspended client (preserving session state) +// and resume from a fresh client with ContinuePendingWork=true. Warm-join coverage keeps +// the original client connected while a second client resumes the same session. func TestPendingWorkResumeE2E(t *testing.T) { ctx := testharness.NewTestContext(t) t.Run("should continue pending permission request after resume", func(t *testing.T) { + // Skipped after the runtime 1.0.56 bump. Runtime PR #9040 (commit b8e1220b45) + // changed SDKServer.handleConnectionClosed to tear down the session when the + // last RPC client disconnects, so the in-memory pending permission request is + // gone before the resumed client can satisfy it and HandlePendingPermissionRequest + // returns Success=false. This test models same-process ForceStop+resume; it + // needs to be redesigned to either keep an owner connected (warm resume) or to + // model a true process restart against the persisted session state. + t.Skip("Runtime 1.0.56 cleans up the session on last-client disconnect (copilot-agent-runtime PR #9040), so the in-memory pending request is gone before resume can satisfy it. Test needs redesign.") + ctx.ConfigureForTest(t) _, cliURL := startTcpServer(t, ctx) @@ -97,13 +105,8 @@ func TestPendingWorkResumeE2E(t *testing.T) { // Snap the suspended client offline before the original handler resolves. suspendedClient.ForceStop() - var resumedToolInvoked bool - var mu sync.Mutex resumedTool := copilot.DefineTool("resume_permission_tool", "Transforms a value after permission is granted", func(params ValueParams, inv copilot.ToolInvocation) (string, error) { - mu.Lock() - resumedToolInvoked = true - mu.Unlock() return "PERMISSION_RESUMED_" + strings.ToUpper(params.Value), nil }) @@ -134,24 +137,6 @@ func TestPendingWorkResumeE2E(t *testing.T) { t.Fatalf("Expected HandlePendingPermissionRequest to succeed, got %+v", permResult) } - ctxFinal, cancel := context.WithTimeout(t.Context(), pendingWorkTimeout) - defer cancel() - answer, err := testharness.GetFinalAssistantMessage(ctxFinal, session2) - if err != nil { - t.Fatalf("Failed to wait for final assistant message: %v", err) - } - - mu.Lock() - invoked := resumedToolInvoked - mu.Unlock() - if !invoked { - t.Error("Expected resumed tool implementation to be invoked") - } - - if assistant, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistant.Content, "PERMISSION_RESUMED_ALPHA") { - t.Errorf("Expected response to contain 'PERMISSION_RESUMED_ALPHA', got %v", answer.Data) - } - // Allow original handler to unblock so cleanup proceeds. select { case releasePermission <- &rpc.PermissionDecisionUserNotAvailable{}: @@ -162,6 +147,14 @@ func TestPendingWorkResumeE2E(t *testing.T) { }) t.Run("should continue pending external tool request after resume", func(t *testing.T) { + // Skipped for the same reason as "should continue pending permission request + // after resume": runtime 1.0.56 (copilot-agent-runtime PR #9040) tears down + // the session when the last RPC client disconnects, so the in-memory pending + // external tool call is gone before the resumed client can satisfy it. Needs + // redesign to keep an owner connected (warm) or to model true process-restart + // resume from persisted state. + t.Skip("Runtime 1.0.56 cleans up the session on last-client disconnect (copilot-agent-runtime PR #9040), so the in-memory pending tool call is gone before resume can satisfy it. Test needs redesign.") + ctx.ConfigureForTest(t) _, cliURL := startTcpServer(t, ctx) @@ -242,16 +235,6 @@ func TestPendingWorkResumeE2E(t *testing.T) { t.Errorf("Expected HandlePendingToolCall to succeed, got %+v", toolResult) } - ctxFinal, cancel := context.WithTimeout(t.Context(), pendingWorkTimeout) - defer cancel() - answer, err := testharness.GetFinalAssistantMessage(ctxFinal, session2) - if err != nil { - t.Fatalf("Failed to wait for final assistant message: %v", err) - } - if assistant, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistant.Content, "EXTERNAL_RESUMED_BETA") { - t.Errorf("Expected response to contain 'EXTERNAL_RESUMED_BETA', got %v", answer.Data) - } - select { case releaseTool <- "ORIGINAL_SHOULD_NOT_WIN": default: @@ -433,121 +416,167 @@ func TestPendingWorkResumeE2E(t *testing.T) { resumedSession.Disconnect() }) - t.Run("should keep pending external tool handleable on warm resume when continuependingwork is false", func(t *testing.T) { - ctx.ConfigureForTest(t) - - _, cliURL := startTcpServer(t, ctx) - - type ValueParams struct { - Value string `json:"value" jsonschema:"Value to look up"` - } - toolStarted := make(chan string, 1) - releaseTool := make(chan string, 1) - - originalTool := copilot.DefineTool("resume_external_tool", "Looks up a value after resumption", - func(params ValueParams, inv copilot.ToolInvocation) (string, error) { - select { - case toolStarted <- params.Value: - default: - } - return <-releaseTool, nil + for _, scenario := range []struct { + name string + disconnectOriginalClient bool + expectedSessionWasActive bool + expectedHandleResult bool + }{ + {name: "warm", disconnectOriginalClient: false, expectedSessionWasActive: true, expectedHandleResult: true}, + {name: "cold", disconnectOriginalClient: true, expectedSessionWasActive: false, expectedHandleResult: false}, + } { + scenario := scenario + t.Run(fmt.Sprintf("should keep pending external tool handleable on %s resume when continuependingwork is false", scenario.name), func(t *testing.T) { + ctx.ConfigureForTest(t) + + _, cliURL := startTcpServer(t, ctx) + + type ValueParams struct { + Value string `json:"value" jsonschema:"Value to look up"` + } + toolStarted := make(chan string, 1) + releaseTool := make(chan string, 1) + + originalTool := copilot.DefineTool("resume_external_tool", "Looks up a value after resumption", + func(params ValueParams, inv copilot.ToolInvocation) (string, error) { + select { + case toolStarted <- params.Value: + default: + } + return <-releaseTool, nil + }) + + suspendedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.Connection = copilot.UriConnection{URL: cliURL, ConnectionToken: sharedTcpToken} }) + if !scenario.disconnectOriginalClient { + defer suspendedClient.ForceStop() + } + session1, err := suspendedClient.CreateSession(t.Context(), &copilot.SessionConfig{ + Tools: []copilot.Tool{originalTool}, + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + }) + if err != nil { + t.Fatalf("Failed to create session: %v", err) + } + sessionID := session1.SessionID - suspendedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { - opts.Connection = copilot.UriConnection{URL: cliURL, ConnectionToken: sharedTcpToken} - }) - session1, err := suspendedClient.CreateSession(t.Context(), &copilot.SessionConfig{ - Tools: []copilot.Tool{originalTool}, - OnPermissionRequest: copilot.PermissionHandler.ApproveAll, - }) - if err != nil { - t.Fatalf("Failed to create session: %v", err) - } - sessionID := session1.SessionID - - toolEventCh := waitForExternalToolRequests(session1, []string{"resume_external_tool"}) + toolEventCh := waitForExternalToolRequests(session1, []string{"resume_external_tool"}) - if _, err := session1.Send(t.Context(), copilot.MessageOptions{ - Prompt: "Use resume_external_tool with value 'beta', then reply with the result.", - }); err != nil { - t.Fatalf("Failed to send message: %v", err) - } + if _, err := session1.Send(t.Context(), copilot.MessageOptions{ + Prompt: "Use resume_external_tool with value 'beta', then reply with the result.", + }); err != nil { + t.Fatalf("Failed to send message: %v", err) + } - toolEvents, err := waitForExternalToolResults(toolEventCh, pendingWorkTimeout) - if err != nil { - t.Fatalf("waiting for external tool requests: %v", err) - } - toolEvent := toolEvents["resume_external_tool"] + toolEvents, err := waitForExternalToolResults(toolEventCh, pendingWorkTimeout) + if err != nil { + t.Fatalf("waiting for external tool requests: %v", err) + } + toolEvent := toolEvents["resume_external_tool"] - select { - case v := <-toolStarted: - if v != "beta" { - t.Errorf("Expected original tool started with 'beta', got %q", v) + select { + case v := <-toolStarted: + if v != "beta" { + t.Errorf("Expected original tool started with 'beta', got %q", v) + } + case <-time.After(pendingWorkTimeout): + t.Fatal("Timed out waiting for original tool to start") } - case <-time.After(pendingWorkTimeout): - t.Fatal("Timed out waiting for original tool to start") - } - suspendedClient.ForceStop() + if scenario.disconnectOriginalClient { + suspendedClient.ForceStop() + } - resumedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { - opts.Connection = copilot.UriConnection{URL: cliURL, ConnectionToken: sharedTcpToken} - }) - t.Cleanup(func() { resumedClient.ForceStop() }) + resumedClient := ctx.NewClient(func(opts *copilot.ClientOptions) { + opts.Connection = copilot.UriConnection{URL: cliURL, ConnectionToken: sharedTcpToken} + }) + t.Cleanup(func() { resumedClient.ForceStop() }) + + // In warm mode the original client still owns the tool registration; + // re-registering it from the resumed client would cause a name-clash. In + // cold mode the original is gone, so we register a fresh throwing handler + // to assert the runtime doesn't re-invoke the tool on resume (orphan + // auto-completion happens internally). + resumeConfig := &copilot.ResumeSessionConfig{ + ContinuePendingWork: false, + OnPermissionRequest: copilot.PermissionHandler.ApproveAll, + } + if scenario.disconnectOriginalClient { + resumeConfig.Tools = []copilot.Tool{ + copilot.DefineTool("resume_external_tool", "Looks up a value after resumption", + func(_ ValueParams, _ copilot.ToolInvocation) (string, error) { + t.Errorf("Resumed-session handler should not be invoked") + return "", fmt.Errorf("resumed-session handler should not be invoked") + }), + } + } - session2, err := resumedClient.ResumeSession(t.Context(), sessionID, &copilot.ResumeSessionConfig{ - ContinuePendingWork: false, - OnPermissionRequest: copilot.PermissionHandler.ApproveAll, - }) - if err != nil { - t.Fatalf("Failed to resume session: %v", err) - } + session2, err := resumedClient.ResumeSession(t.Context(), sessionID, resumeConfig) + if err != nil { + t.Fatalf("Failed to resume session: %v", err) + } - // Verify resume event reflects ContinuePendingWork=false and SessionWasActive=true - messages, err := session2.GetEvents(t.Context()) - if err != nil { - t.Fatalf("GetEvents failed: %v", err) - } - var resumeEvent *copilot.SessionResumeData - for _, msg := range messages { - if msg.Type() == copilot.SessionEventTypeSessionResume { - if d, ok := msg.Data.(*copilot.SessionResumeData); ok { - resumeEvent = d - break + messages, err := session2.GetEvents(t.Context()) + if err != nil { + t.Fatalf("GetEvents failed: %v", err) + } + var resumeEvent *copilot.SessionResumeData + for _, msg := range messages { + if msg.Type() == copilot.SessionEventTypeSessionResume { + if d, ok := msg.Data.(*copilot.SessionResumeData); ok { + resumeEvent = d + break + } } } - } - if resumeEvent == nil { - t.Fatal("Expected a session.resume event") - return - } - if resumeEvent.ContinuePendingWork == nil || *resumeEvent.ContinuePendingWork != false { - t.Errorf("Expected ContinuePendingWork=false in resume event, got %v", resumeEvent.ContinuePendingWork) - } - if resumeEvent.SessionWasActive == nil || *resumeEvent.SessionWasActive != true { - t.Errorf("Expected SessionWasActive=true in resume event, got %v", resumeEvent.SessionWasActive) - } + if resumeEvent == nil { + t.Fatal("Expected a session.resume event") + return + } + if resumeEvent.ContinuePendingWork != nil && *resumeEvent.ContinuePendingWork { + t.Errorf("Expected ContinuePendingWork=false in resume event, got %v", resumeEvent.ContinuePendingWork) + } + if resumeEvent.SessionWasActive == nil || *resumeEvent.SessionWasActive != scenario.expectedSessionWasActive { + t.Errorf("Expected SessionWasActive=%t in resume event, got %v", scenario.expectedSessionWasActive, resumeEvent.SessionWasActive) + } - // Even with ContinuePendingWork=false, the pending tool call should still be - // handleable via HandlePendingToolCall. - toolResult, err := session2.RPC.Tools.HandlePendingToolCall(t.Context(), &rpc.HandlePendingToolCallRequest{ - RequestID: toolEvent.RequestID, - Result: rpc.ExternalToolStringResult("EXTERNAL_RESUMED_BETA"), - }) - if err != nil { - t.Fatalf("Failed to handle pending tool call: %v", err) - } - if !toolResult.Success { - t.Errorf("Expected HandlePendingToolCall to succeed, got %+v", toolResult) - } + // In warm mode the runtime still has the pending request; in cold mode the + // runtime auto-completed the orphan with a synthetic interrupt result during + // resume, so HandlePendingToolCall is expected to report Success=false. + toolResult, err := session2.RPC.Tools.HandlePendingToolCall(t.Context(), &rpc.HandlePendingToolCallRequest{ + RequestID: toolEvent.RequestID, + Result: rpc.ExternalToolStringResult("EXTERNAL_RESUMED_BETA"), + }) + if err != nil { + t.Fatalf("Failed to handle pending tool call: %v", err) + } + if toolResult.Success != scenario.expectedHandleResult { + t.Errorf("Expected HandlePendingToolCall Success=%t, got %+v", scenario.expectedHandleResult, toolResult) + } - select { - case releaseTool <- "ORIGINAL_SHOULD_NOT_WIN": - default: - } + if !scenario.expectedHandleResult { + // Cold path: orphan auto-completion does not trigger an LLM turn on its + // own, but the session should remain healthy for new work. + followUp, err := session2.SendAndWait(t.Context(), copilot.MessageOptions{ + Prompt: "Reply with exactly: COLD_RESUMED_FOLLOWUP", + }) + if err != nil { + t.Fatalf("Failed to send follow-up turn: %v", err) + } + if assistant, ok := followUp.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(assistant.Content, "COLD_RESUMED_FOLLOWUP") { + t.Errorf("Expected follow-up answer to contain 'COLD_RESUMED_FOLLOWUP', got %v", followUp.Data) + } + } - session2.Disconnect() - }) + select { + case releaseTool <- "ORIGINAL_SHOULD_NOT_WIN": + default: + } + + session2.Disconnect() + }) + } t.Run("should report continuependingwork true in resume event", func(t *testing.T) { ctx.ConfigureForTest(t) diff --git a/go/rpc/zrpc.go b/go/rpc/zrpc.go index e2c735ada..e26dd2b1d 100644 --- a/go/rpc/zrpc.go +++ b/go/rpc/zrpc.go @@ -2306,7 +2306,8 @@ type ModelBillingTokenPrices struct { BatchSize *int64 `json:"batchSize,omitempty"` // AI Credits cost per billing batch of cached tokens CachePrice *float64 `json:"cachePrice,omitempty"` - // Maximum context window tokens for the default tier + // Prompt token budget (max_prompt_tokens) for the default tier. The total context window is + // this value plus the model's max_output_tokens. ContextMax *int64 `json:"contextMax,omitempty"` // AI Credits cost per billing batch of input tokens InputPrice *float64 `json:"inputPrice,omitempty"` @@ -2320,7 +2321,8 @@ type ModelBillingTokenPrices struct { type ModelBillingTokenPricesLongContext struct { // AI Credits cost per billing batch of cached tokens CachePrice *float64 `json:"cachePrice,omitempty"` - // Maximum context window tokens for the long context tier + // Prompt token budget (max_prompt_tokens) for the long context tier. The total context + // window is this value plus the model's max_output_tokens. ContextMax *int64 `json:"contextMax,omitempty"` // AI Credits cost per billing batch of input tokens InputPrice *float64 `json:"inputPrice,omitempty"` @@ -4187,8 +4189,8 @@ type SessionContextInfo struct { CompactionThreshold int64 `json:"compactionThreshold"` // Tokens consumed by user/assistant/tool messages ConversationTokens int64 `json:"conversationTokens"` - // Total context limit for /context display. promptTokenLimit + min(32k or 64k, - // outputTokenLimit) depending on model. + // Total context limit for /context display: promptTokenLimit + outputTokenLimit (the + // model's full max_output_tokens reserved on top of the prompt budget). Limit int64 `json:"limit"` // Tokens consumed by MCP tool definitions (subset of toolDefinitionsTokens, excludes // deferred tools) diff --git a/nodejs/package-lock.json b/nodejs/package-lock.json index ee96e3e3a..ae94427ca 100644 --- a/nodejs/package-lock.json +++ b/nodejs/package-lock.json @@ -9,7 +9,7 @@ "version": "0.1.8", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.56-2", + "@github/copilot": "^1.0.56", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, @@ -663,9 +663,9 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.56-2.tgz", - "integrity": "sha512-Dpue7utF6PzGS4tPrG3pRXL3d1lMJHFFT8PJegljn7vg64LAbjhk5yNgBXbMg/XbObu755SJTNtbEL/aSdrGNg==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.56.tgz", + "integrity": "sha512-epJ9yRqK1QjU73FDAlxPqZKh+CxkA1TIYbhTvXblturw5wWUhCSRhI2XoamNERohPznY10Wg3tbZC3jUAmQdJw==", "license": "SEE LICENSE IN LICENSE.md", "dependencies": { "detect-libc": "^2.1.2" @@ -674,20 +674,20 @@ "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.56-2", - "@github/copilot-darwin-x64": "1.0.56-2", - "@github/copilot-linux-arm64": "1.0.56-2", - "@github/copilot-linux-x64": "1.0.56-2", - "@github/copilot-linuxmusl-arm64": "1.0.56-2", - "@github/copilot-linuxmusl-x64": "1.0.56-2", - "@github/copilot-win32-arm64": "1.0.56-2", - "@github/copilot-win32-x64": "1.0.56-2" + "@github/copilot-darwin-arm64": "1.0.56", + "@github/copilot-darwin-x64": "1.0.56", + "@github/copilot-linux-arm64": "1.0.56", + "@github/copilot-linux-x64": "1.0.56", + "@github/copilot-linuxmusl-arm64": "1.0.56", + "@github/copilot-linuxmusl-x64": "1.0.56", + "@github/copilot-win32-arm64": "1.0.56", + "@github/copilot-win32-x64": "1.0.56" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.56-2.tgz", - "integrity": "sha512-RHJNhdPSkdPc/nabWVess7BfEda7xfwBQ2X5vq9nq4VjqTbvUHBFwTt792q00TE4DZR/UsWr0sJKJkLcRvTltQ==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.56.tgz", + "integrity": "sha512-vCittEfa/Qys86TxhI5rgxy8L8WTQoooIjEj8kZe7mq62TOOrFGnWJjqaR6mgljmPTxKRFmT6achUxKRVZil9g==", "cpu": [ "arm64" ], @@ -701,9 +701,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.56-2.tgz", - "integrity": "sha512-EqBtGH1I2rX5TzSJ+L9O22SQ8jlSsn1YJeFS6RTtYU+NhC6xLajjfTutkA5DZOr3eQgmeceit/4NDqEdjwANEA==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.56.tgz", + "integrity": "sha512-yO7OvFysG/98s9T8k5cEXzBz++mki7ufkH2S8/jqC7YIKhlj64rh+/vIBU5DQ9RLXbPKm6OjGjJn8iDWXzzuJQ==", "cpu": [ "x64" ], @@ -717,15 +717,12 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.56-2.tgz", - "integrity": "sha512-FmjODKft2tmY5B0B94RDek/TR3QtdDTT7W/+lqkiosnUyLhsNtmzKaDYpiQsCBee68YUuB1umecqiTL1qMo3cw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.56.tgz", + "integrity": "sha512-ukOwSwFOqgpQQs5Nw3GAFRGIn6LqA8KfI6hD+tUeqoWkB0OlXxwQER7sKEfSQZu1vcNnW1+YIM/qT5W5RWdmhA==", "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -736,15 +733,12 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.56-2.tgz", - "integrity": "sha512-aqF4k6mDLU1OXdaAb3gBIRCgdrlXX+1FBtcoLKPMjzVfkA2abEZ/vuYfZWS7ZaxG/aCOScp8D+/E+RaYHsGYOw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.56.tgz", + "integrity": "sha512-C84nduDAeHCTEfjs+mYfIjbBjGRx2huy8XZBu0ETAD08uUBuQpUHn2PYhaaHb1yKoG6LMceKt10PTrqNdOE9IQ==", "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -755,15 +749,12 @@ } }, "node_modules/@github/copilot-linuxmusl-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.56-2.tgz", - "integrity": "sha512-+CztOiU7/nlNLX50jcpOMreMrDr7+DFnq3OV59doDd9UgqTdpjEnZKjkgHpxid117rYF/95cN5EYWD7ermOcjA==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.56.tgz", + "integrity": "sha512-EuDmGVl4fEk7Q+AVhkQkpiRlXpjGGQ5GzfBzMEOWgrvfdCLcT62p1uEaz+AT2UdkJiViruLyVf3pZFUyQwyvjA==", "cpu": [ "arm64" ], - "libc": [ - "musl" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -774,15 +765,12 @@ } }, "node_modules/@github/copilot-linuxmusl-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.56-2.tgz", - "integrity": "sha512-FuBYfN2dX2a5fSEzPImtX6hjtjwiL0kutrq4RuvHYxUu0FR0JRB4vfN2mQ/KN4X5DZgaGkPQk19hkoEgd1tmdg==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.56.tgz", + "integrity": "sha512-qRXub9+1J7mNIzweAaw0tGgztS6XK+ZlwhUjOcFTusbqnED33zw4HzExUNUTTDue/BOUwkYzvXqMqn5N6juIJg==", "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "SEE LICENSE IN LICENSE.md", "optional": true, "os": [ @@ -793,9 +781,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.56-2.tgz", - "integrity": "sha512-mKTzS9HrH+wvOmIgIaRUs+l89o51P7ACVk4P/o1UEWGxDblTxwRZGL+cRBhqNltIxY+8XVIAEwg6CzE+sTH5Hw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.56.tgz", + "integrity": "sha512-/lj/zEezNoewCxvVORLN0JFvvi9WmQTYvtIyyg8kVlA9HZeg0vpRTBM5hdoni2D8mKb7g/8w8VF2Ecy9D3+NpA==", "cpu": [ "arm64" ], @@ -809,9 +797,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.56-2.tgz", - "integrity": "sha512-tacHeeqNiLawmlUpturke10I9d6kkREqTcHGkGRy/MEwrio7A77L45j/IegRcQNjLwHP62R2+5GmNFx6BRwx9w==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.56.tgz", + "integrity": "sha512-062C3lp4nvVl+vkkteYOrYpgnqZ/SAi54NuTQ4k45V2TNmLIpmMybmM0tCluxOfiTY+8EuS72H9RS8NUj1CzhQ==", "cpu": [ "x64" ], diff --git a/nodejs/package.json b/nodejs/package.json index 09011e9df..9569e6816 100644 --- a/nodejs/package.json +++ b/nodejs/package.json @@ -56,7 +56,7 @@ "author": "GitHub", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.56-2", + "@github/copilot": "^1.0.56", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, diff --git a/nodejs/samples/package-lock.json b/nodejs/samples/package-lock.json index 6bb3b8df8..5dedb83e7 100644 --- a/nodejs/samples/package-lock.json +++ b/nodejs/samples/package-lock.json @@ -18,7 +18,7 @@ "version": "0.1.8", "license": "MIT", "dependencies": { - "@github/copilot": "^1.0.56-2", + "@github/copilot": "^1.0.56", "vscode-jsonrpc": "^8.2.1", "zod": "^4.3.6" }, diff --git a/nodejs/src/generated/rpc.ts b/nodejs/src/generated/rpc.ts index 602ee76a0..979a4a3d5 100644 --- a/nodejs/src/generated/rpc.ts +++ b/nodejs/src/generated/rpc.ts @@ -694,7 +694,7 @@ export type SessionContextInfo = { */ compactionThreshold: number; /** - * Total context limit for /context display. promptTokenLimit + min(32k or 64k, outputTokenLimit) depending on model. + * Total context limit for /context display: promptTokenLimit + outputTokenLimit (the model's full max_output_tokens reserved on top of the prompt budget). */ limit: number; /** @@ -4773,7 +4773,7 @@ export interface ModelBillingTokenPrices { */ batchSize?: number; /** - * Maximum context window tokens for the default tier + * Prompt token budget (max_prompt_tokens) for the default tier. The total context window is this value plus the model's max_output_tokens. */ contextMax?: number; longContext?: ModelBillingTokenPricesLongContext; @@ -4798,7 +4798,7 @@ export interface ModelBillingTokenPricesLongContext { */ cachePrice?: number; /** - * Maximum context window tokens for the long context tier + * Prompt token budget (max_prompt_tokens) for the long context tier. The total context window is this value plus the model's max_output_tokens. */ contextMax?: number; } diff --git a/nodejs/test/e2e/pending_work_resume.e2e.test.ts b/nodejs/test/e2e/pending_work_resume.e2e.test.ts index bc1937bad..554412e57 100644 --- a/nodejs/test/e2e/pending_work_resume.e2e.test.ts +++ b/nodejs/test/e2e/pending_work_resume.e2e.test.ts @@ -12,8 +12,7 @@ import type { PermissionRequestedEvent, PermissionRequestResult, } from "../../src/index.js"; -import { createSdkTestContext } from "./harness/sdkTestContext.js"; -import { getFinalAssistantMessage } from "./harness/sdkTestHelper.js"; +import { createSdkTestContext, DEFAULT_GITHUB_TOKEN } from "./harness/sdkTestContext.js"; const PENDING_WORK_TIMEOUT_MS = 60_000; const TEST_TIMEOUT_MS = 180_000; @@ -129,6 +128,7 @@ describe("Pending work resume", async () => { const server = new CopilotClient({ workingDirectory: workDir, env, + gitHubToken: DEFAULT_GITHUB_TOKEN, connection: RuntimeConnection.forTcp({ path: process.env.COPILOT_CLI_PATH, connectionToken: SHARED_TOKEN, @@ -166,13 +166,19 @@ describe("Pending work resume", async () => { return `localhost:${port}`; } - it( + // Skipped after the runtime 1.0.56 bump. Runtime PR #9040 (commit b8e1220b45) + // changed SDKServer.handleConnectionClosed to tear down the session when the + // last RPC client disconnects, so the in-memory pending permission request is + // gone before the resumed client can satisfy it and handlePendingPermissionRequest + // returns success=false. This test models same-process ForceStop+resume; it needs + // to be redesigned to either keep an owner connected (warm resume) or to model + // a true process restart against the persisted session state. + it.skip( "should continue pending permission request after resume", { timeout: TEST_TIMEOUT_MS }, async () => { const originalPermissionRequest = deferred(); const releaseOriginalPermission = deferred(); - let resumedToolInvoked = false; const server = createTcpServer(); await server.start(); @@ -219,10 +225,7 @@ describe("Pending work resume", async () => { defineTool("resume_permission_tool", { description: "Transforms a value after permission is granted", parameters: z.object({ value: z.string() }), - handler: ({ value }) => { - resumedToolInvoked = true; - return `PERMISSION_RESUMED_${value.toUpperCase()}`; - }, + handler: ({ value }) => `PERMISSION_RESUMED_${value.toUpperCase()}`, }), ], }); @@ -234,15 +237,6 @@ describe("Pending work resume", async () => { }); expect(permissionResult.success).toBe(true); - const answer = await waitWithTimeout( - getFinalAssistantMessage(session2), - PENDING_WORK_TIMEOUT_MS, - "final assistant message" - ); - - expect(resumedToolInvoked).toBe(true); - expect(answer.data.content ?? "").toContain("PERMISSION_RESUMED_ALPHA"); - await session2.disconnect(); } finally { if (!releaseOriginalPermission.settled()) { @@ -252,7 +246,13 @@ describe("Pending work resume", async () => { } ); - it( + // Skipped for the same reason as "should continue pending permission request + // after resume": runtime 1.0.56 (copilot-agent-runtime PR #9040) tears down the + // session when the last RPC client disconnects, so the in-memory pending external + // tool call is gone before the resumed client can satisfy it. Needs redesign to + // keep an owner connected (warm) or to model true process-restart resume from + // persisted state. + it.skip( "should continue pending external tool request after resume", { timeout: TEST_TIMEOUT_MS }, async () => { @@ -312,13 +312,6 @@ describe("Pending work resume", async () => { }); expect(toolResult.success).toBe(true); - const answer = await waitWithTimeout( - getFinalAssistantMessage(session2), - PENDING_WORK_TIMEOUT_MS, - "final assistant message" - ); - expect(answer.data.content ?? "").toContain("EXTERNAL_RESUMED_BETA"); - await session2.disconnect(); } finally { if (!releaseOriginalTool.settled()) { @@ -458,93 +451,135 @@ describe("Pending work resume", async () => { } ); - it( - "should keep pending external tool handleable on warm resume when continuePendingWork is false", - { timeout: TEST_TIMEOUT_MS }, - async () => { - const originalToolStarted = deferred(); - const releaseOriginalTool = deferred(); - let invocationCount = 0; - - const server = createTcpServer(); - await server.start(); - const cliUrl = getCliUrl(server); - - const suspendedClient = createConnectingClient(cliUrl); - const session1 = await suspendedClient.createSession({ - tools: [ - defineTool("resume_external_tool", { - description: "Looks up a value after resumption", - parameters: z.object({ value: z.string() }), - handler: async ({ value }) => { - invocationCount++; - originalToolStarted.resolve(value); - return await releaseOriginalTool.promise; - }, - }), - ], - onPermissionRequest: approveAll, - }); - const sessionId = session1.sessionId; - - try { - const toolRequestsP = waitForExternalToolRequests(session1, [ - "resume_external_tool", - ]); - - await session1.send({ - prompt: "Use resume_external_tool with value 'beta', then reply with the result.", - }); - - const toolEvents = await toolRequestsP; - const toolEvent = toolEvents["resume_external_tool"]; - expect( - await waitWithTimeout( - originalToolStarted.promise, - PENDING_WORK_TIMEOUT_MS, - "originalToolStarted" - ) - ).toBe("beta"); - - await suspendedClient.forceStop(); - - const resumedClient = createConnectingClient(cliUrl); - const session2 = await resumedClient.resumeSession(sessionId, { - continuePendingWork: false, + for (const scenario of [ + { + name: "warm", + disconnectOriginalClient: false, + expectedSessionWasActive: true, + expectedHandleResult: true, + }, + { + name: "cold", + disconnectOriginalClient: true, + expectedSessionWasActive: false, + expectedHandleResult: false, + }, + ]) { + it( + `should keep pending external tool handleable on ${scenario.name} resume when continuePendingWork is false`, + { timeout: TEST_TIMEOUT_MS }, + async () => { + const originalToolStarted = deferred(); + const releaseOriginalTool = deferred(); + let invocationCount = 0; + + const server = createTcpServer(); + await server.start(); + const cliUrl = getCliUrl(server); + + const suspendedClient = createConnectingClient(cliUrl); + const session1 = await suspendedClient.createSession({ + tools: [ + defineTool("resume_external_tool", { + description: "Looks up a value after resumption", + parameters: z.object({ value: z.string() }), + handler: async ({ value }) => { + invocationCount++; + originalToolStarted.resolve(value); + return await releaseOriginalTool.promise; + }, + }), + ], onPermissionRequest: approveAll, }); + const sessionId = session1.sessionId; - // Verify resume event has continuePendingWork: false and sessionWasActive: true - const messages = await session2.getEvents(); - const resumeEvent = messages.find((m) => m.type === "session.resume"); - expect(resumeEvent).toBeDefined(); - expect(resumeEvent!.data.continuePendingWork).toBe(false); - expect(resumeEvent!.data.sessionWasActive).toBe(true); - - // Handle the pending tool call directly via RPC - const resumedResult = await session2.rpc.tools.handlePendingToolCall({ - requestId: toolEvent.data.requestId, - result: "EXTERNAL_RESUMED_BETA", - }); - expect(resumedResult.success).toBe(true); + try { + const toolRequestsP = waitForExternalToolRequests(session1, [ + "resume_external_tool", + ]); - const answer = await waitWithTimeout( - getFinalAssistantMessage(session2), - PENDING_WORK_TIMEOUT_MS, - "final assistant message" - ); + await session1.send({ + prompt: "Use resume_external_tool with value 'beta', then reply with the result.", + }); - expect(invocationCount).toBe(1); - expect(answer.data.content ?? "").toContain("EXTERNAL_RESUMED_BETA"); + const toolEvents = await toolRequestsP; + const toolEvent = toolEvents["resume_external_tool"]; + expect( + await waitWithTimeout( + originalToolStarted.promise, + PENDING_WORK_TIMEOUT_MS, + "originalToolStarted" + ) + ).toBe("beta"); + + if (scenario.disconnectOriginalClient) { + await suspendedClient.forceStop(); + } + + const resumedClient = createConnectingClient(cliUrl); + const session2 = await resumedClient.resumeSession(sessionId, { + // In warm mode the original client still owns the tool registration; + // re-registering from the resumed client would cause a name-clash + // error. In cold mode the original is gone, so we register a fresh + // throwing handler to assert the runtime doesn't re-invoke a tool + // handler on resume (orphan auto-completion is internal). + tools: scenario.disconnectOriginalClient + ? [ + defineTool("resume_external_tool", { + description: "Looks up a value after resumption", + parameters: z.object({ value: z.string() }), + handler: async () => { + throw new Error( + "Resumed-session handler should not be invoked" + ); + }, + }), + ] + : undefined, + continuePendingWork: false, + onPermissionRequest: approveAll, + }); - await session2.disconnect(); - } finally { - if (!releaseOriginalTool.settled()) { - releaseOriginalTool.resolve("ORIGINAL_SHOULD_NOT_WIN"); + const messages = await session2.getEvents(); + const resumeEvent = messages.find((m) => m.type === "session.resume"); + expect(resumeEvent).toBeDefined(); + expect(resumeEvent!.data.continuePendingWork).toBe(false); + expect(resumeEvent!.data.sessionWasActive).toBe( + scenario.expectedSessionWasActive + ); + + // Handle the pending tool call directly via RPC. In warm mode the runtime + // still has the pending request; in cold mode the runtime auto-completed + // the orphan with a synthetic interrupt result during resume, so this RPC + // is expected to report success=false. + const resumedResult = await session2.rpc.tools.handlePendingToolCall({ + requestId: toolEvent.data.requestId, + result: "EXTERNAL_RESUMED_BETA", + }); + expect(resumedResult.success).toBe(scenario.expectedHandleResult); + + if (!scenario.expectedHandleResult) { + // Cold path: orphan auto-completion does not trigger an LLM turn on + // its own, but the session should remain healthy for new work. Send + // a follow-up prompt and verify the assistant still produces a reply. + const followUp = await session2.sendAndWait({ + prompt: "Reply with exactly: COLD_RESUMED_FOLLOWUP", + }); + expect(followUp?.data.content ?? "").toContain("COLD_RESUMED_FOLLOWUP"); + } + + expect(invocationCount).toBe(1); + + await session2.disconnect(); + } finally { + if (!releaseOriginalTool.settled()) { + releaseOriginalTool.resolve("ORIGINAL_SHOULD_NOT_WIN"); + } } } - } - ); + ); + } it( "should report continuePendingWork true in resume event", diff --git a/python/copilot/generated/rpc.py b/python/copilot/generated/rpc.py index 694a6a267..97594c914 100644 --- a/python/copilot/generated/rpc.py +++ b/python/copilot/generated/rpc.py @@ -2287,8 +2287,8 @@ class SessionContextInfo: """Tokens consumed by user/assistant/tool messages""" limit: int - """Total context limit for /context display. promptTokenLimit + min(32k or 64k, - outputTokenLimit) depending on model. + """Total context limit for /context display: promptTokenLimit + outputTokenLimit (the + model's full max_output_tokens reserved on top of the prompt budget). """ mcp_tools_tokens: int """Tokens consumed by MCP tool definitions (subset of toolDefinitionsTokens, excludes @@ -2541,8 +2541,9 @@ class ModelBillingTokenPricesLongContext: """AI Credits cost per billing batch of cached tokens""" context_max: int | None = None - """Maximum context window tokens for the long context tier""" - + """Prompt token budget (max_prompt_tokens) for the long context tier. The total context + window is this value plus the model's max_output_tokens. + """ input_price: float | None = None """AI Credits cost per billing batch of input tokens""" @@ -9048,8 +9049,9 @@ class ModelBillingTokenPrices: """AI Credits cost per billing batch of cached tokens""" context_max: int | None = None - """Maximum context window tokens for the default tier""" - + """Prompt token budget (max_prompt_tokens) for the default tier. The total context window is + this value plus the model's max_output_tokens. + """ input_price: float | None = None """AI Credits cost per billing batch of input tokens""" diff --git a/python/e2e/test_pending_work_resume_e2e.py b/python/e2e/test_pending_work_resume_e2e.py index 237da06c6..619a13a67 100644 --- a/python/e2e/test_pending_work_resume_e2e.py +++ b/python/e2e/test_pending_work_resume_e2e.py @@ -11,7 +11,6 @@ from __future__ import annotations import asyncio -import os from typing import Any import pytest @@ -25,7 +24,7 @@ from copilot.session import PermissionHandler from copilot.tools import Tool, ToolInvocation, ToolResult -from .testharness import E2ETestContext, get_final_assistant_message +from .testharness import DEFAULT_GITHUB_TOKEN, E2ETestContext pytestmark = pytest.mark.asyncio(loop_scope="module") @@ -33,9 +32,6 @@ def _make_subprocess_client(ctx: E2ETestContext, *, use_stdio: bool = True) -> CopilotClient: - github_token = ( - "fake-token-for-e2e-tests" if os.environ.get("GITHUB_ACTIONS") == "true" else None - ) if use_stdio: connection = RuntimeConnection.for_stdio(path=ctx.cli_path) else: @@ -46,7 +42,7 @@ def _make_subprocess_client(ctx: E2ETestContext, *, use_stdio: bool = True) -> C connection=connection, working_directory=ctx.work_dir, env=ctx.get_env(), - github_token=github_token, + github_token=DEFAULT_GITHUB_TOKEN, ) @@ -132,6 +128,20 @@ async def _safe_force_stop(client: CopilotClient) -> None: class TestPendingWorkResume: + # Skipped after the runtime 1.0.56 bump. Runtime PR #9040 (commit b8e1220b45) + # changed SDKServer.handleConnectionClosed to tear down the session when the last + # RPC client disconnects, so the in-memory pending permission request is gone + # before the resumed client can satisfy it and handle_pending_permission_request + # returns success=False. This test models same-process force_stop+resume; it + # needs to be redesigned to either keep an owner connected (warm resume) or to + # model a true process restart against the persisted session state. + @pytest.mark.skip( + reason=( + "Runtime 1.0.56 cleans up the session on last-client disconnect " + "(copilot-agent-runtime PR #9040), so the in-memory pending request " + "is gone before resume can satisfy it. Test needs redesign." + ) + ) async def test_should_continue_pending_permission_request_after_resume( self, ctx: E2ETestContext ): @@ -143,7 +153,6 @@ async def test_should_continue_pending_permission_request_after_resume( release_original: asyncio.Future = asyncio.get_event_loop().create_future() captured_request: asyncio.Future = asyncio.get_event_loop().create_future() - resumed_tool_invoked = False async def hold_permission(request, _invocation): if not captured_request.done(): @@ -177,8 +186,6 @@ def original_tool_handler(args): await suspended_client.force_stop() def resumed_tool_handler(args): - nonlocal resumed_tool_invoked - resumed_tool_invoked = True return f"PERMISSION_RESUMED_{args['value'].upper()}" resumed_client = CopilotClient( @@ -206,12 +213,6 @@ def resumed_tool_handler(args): ) assert permission_result.success - answer = await get_final_assistant_message( - session2, timeout=PENDING_WORK_TIMEOUT - ) - - assert resumed_tool_invoked - assert "PERMISSION_RESUMED_ALPHA" in (answer.data.content or "") await session2.disconnect() finally: await _safe_force_stop(resumed_client) @@ -221,6 +222,19 @@ def resumed_tool_handler(args): finally: await _safe_force_stop(server) + # Skipped for the same reason as + # test_should_continue_pending_permission_request_after_resume: runtime 1.0.56 + # (copilot-agent-runtime PR #9040) tears down the session when the last RPC + # client disconnects, so the in-memory pending external tool call is gone before + # the resumed client can satisfy it. Needs redesign to keep an owner connected + # (warm) or to model true process-restart resume from persisted state. + @pytest.mark.skip( + reason=( + "Runtime 1.0.56 cleans up the session on last-client disconnect " + "(copilot-agent-runtime PR #9040), so the in-memory pending tool call " + "is gone before resume can satisfy it. Test needs redesign." + ) + ) async def test_should_continue_pending_external_tool_request_after_resume( self, ctx: E2ETestContext ): @@ -281,11 +295,6 @@ async def blocking_external_tool(args): ) assert tool_result.success - answer = await get_final_assistant_message( - session2, timeout=PENDING_WORK_TIMEOUT - ) - assert "EXTERNAL_RESUMED_BETA" in (answer.data.content or "") - await session2.disconnect() finally: await _safe_force_stop(resumed_client) @@ -438,6 +447,31 @@ async def test_should_resume_successfully_when_no_pending_work_exists( async def test_should_keep_pending_external_tool_handleable_on_warm_resume_when_continuependingwork_is_false( # noqa: E501 self, ctx: E2ETestContext + ): + await self._assert_pending_external_tool_handleable_on_resume( + ctx, + disconnect_original_client=False, + expected_session_was_active=True, + expected_handle_result=True, + ) + + async def test_should_keep_pending_external_tool_handleable_on_cold_resume_when_continuependingwork_is_false( # noqa: E501 + self, ctx: E2ETestContext + ): + await self._assert_pending_external_tool_handleable_on_resume( + ctx, + disconnect_original_client=True, + expected_session_was_active=False, + expected_handle_result=False, + ) + + async def _assert_pending_external_tool_handleable_on_resume( + self, + ctx: E2ETestContext, + *, + disconnect_original_client: bool, + expected_session_was_active: bool, + expected_handle_result: bool, ): from copilot.generated.session_events import SessionResumeData @@ -479,7 +513,8 @@ async def blocking_external_tool(args): tool_events = await tool_request_task assert (await asyncio.wait_for(tool_started, PENDING_WORK_TIMEOUT)) == "beta" - await suspended_client.force_stop() + if disconnect_original_client: + await suspended_client.force_stop() resumed_client = CopilotClient( connection=RuntimeConnection.for_uri( @@ -487,40 +522,61 @@ async def blocking_external_tool(args): ) ) try: + # In warm mode the original client still owns the tool registration; + # re-registering it from the resumed client would cause a name-clash. + # In cold mode the original is gone, so we register a fresh throwing + # handler to assert the runtime doesn't re-invoke the tool on resume + # (orphan auto-completion happens internally). + async def resumed_external_tool(args): + raise AssertionError("Resumed-session handler should not be invoked") + + resume_tools = ( + [_make_pending_tool("resume_external_tool", resumed_external_tool)] + if disconnect_original_client + else None + ) session2 = await resumed_client.resume_session( session_id, on_permission_request=PermissionHandler.approve_all, continue_pending_work=False, + tools=resume_tools, ) - # Verify resume event: continue_pending_work=False and session_was_active=True messages = await session2.get_events() resume_events = [m for m in messages if isinstance(m.data, SessionResumeData)] assert len(resume_events) == 1, "Expected exactly one session.resume event" resume_event = resume_events[0] assert resume_event.data.continue_pending_work is False - assert resume_event.data.session_was_active is True + assert resume_event.data.session_was_active is expected_session_was_active - # The pending tool call should still be satisfiable + # Warm: the runtime still has the pending request, so + # HandlePendingToolCall succeeds. Cold: the runtime auto-completed + # the orphaned tool call with a synthetic interrupt result during + # resume, so HandlePendingToolCall reports success=False. The + # session should still be healthy for new turns. tool_result = await session2.rpc.tools.handle_pending_tool_call( HandlePendingToolCallRequest( request_id=tool_events["resume_external_tool"].data.request_id, result="EXTERNAL_RESUMED_BETA", ) ) - assert tool_result.success - - # continue_pending_work=False may interrupt agent continuation before - # a final assistant message, but the pending call should still accept - # an explicit completion. + assert tool_result.success is expected_handle_result assert invocation_count == 1 + if not expected_handle_result: + follow_up = await session2.send_and_wait( + "Reply with exactly: COLD_RESUMED_FOLLOWUP", + timeout=PENDING_WORK_TIMEOUT, + ) + assert "COLD_RESUMED_FOLLOWUP" in (follow_up.data.content or "") + await session2.disconnect() finally: await _safe_force_stop(resumed_client) finally: if not release_original.done(): release_original.set_result("ORIGINAL_SHOULD_NOT_WIN") + await _safe_force_stop(suspended_client) finally: await _safe_force_stop(server) diff --git a/rust/src/generated/api_types.rs b/rust/src/generated/api_types.rs index 39157f858..131179b4e 100644 --- a/rust/src/generated/api_types.rs +++ b/rust/src/generated/api_types.rs @@ -3510,7 +3510,7 @@ pub struct MetadataContextInfoResultContextInfo { pub compaction_threshold: i64, /// Tokens consumed by user/assistant/tool messages pub conversation_tokens: i64, - /// Total context limit for /context display. promptTokenLimit + min(32k or 64k, outputTokenLimit) depending on model. + /// Total context limit for /context display: promptTokenLimit + outputTokenLimit (the model's full max_output_tokens reserved on top of the prompt budget). pub limit: i64, /// Tokens consumed by MCP tool definitions (subset of toolDefinitionsTokens, excludes deferred tools) pub mcp_tools_tokens: i64, @@ -3733,7 +3733,7 @@ pub struct ModelBillingTokenPricesLongContext { /// AI Credits cost per billing batch of cached tokens #[serde(skip_serializing_if = "Option::is_none")] pub cache_price: Option, - /// Maximum context window tokens for the long context tier + /// Prompt token budget (max_prompt_tokens) for the long context tier. The total context window is this value plus the model's max_output_tokens. #[serde(skip_serializing_if = "Option::is_none")] pub context_max: Option, /// AI Credits cost per billing batch of input tokens @@ -3754,7 +3754,7 @@ pub struct ModelBillingTokenPrices { /// AI Credits cost per billing batch of cached tokens #[serde(skip_serializing_if = "Option::is_none")] pub cache_price: Option, - /// Maximum context window tokens for the default tier + /// Prompt token budget (max_prompt_tokens) for the default tier. The total context window is this value plus the model's max_output_tokens. #[serde(skip_serializing_if = "Option::is_none")] pub context_max: Option, /// AI Credits cost per billing batch of input tokens @@ -6347,7 +6347,7 @@ pub struct SessionContextInfo { pub compaction_threshold: i64, /// Tokens consumed by user/assistant/tool messages pub conversation_tokens: i64, - /// Total context limit for /context display. promptTokenLimit + min(32k or 64k, outputTokenLimit) depending on model. + /// Total context limit for /context display: promptTokenLimit + outputTokenLimit (the model's full max_output_tokens reserved on top of the prompt budget). pub limit: i64, /// Tokens consumed by MCP tool definitions (subset of toolDefinitionsTokens, excludes deferred tools) pub mcp_tools_tokens: i64, @@ -12084,7 +12084,7 @@ pub struct SessionMetadataContextInfoResultContextInfo { pub compaction_threshold: i64, /// Tokens consumed by user/assistant/tool messages pub conversation_tokens: i64, - /// Total context limit for /context display. promptTokenLimit + min(32k or 64k, outputTokenLimit) depending on model. + /// Total context limit for /context display: promptTokenLimit + outputTokenLimit (the model's full max_output_tokens reserved on top of the prompt budget). pub limit: i64, /// Tokens consumed by MCP tool definitions (subset of toolDefinitionsTokens, excludes deferred tools) pub mcp_tools_tokens: i64, diff --git a/test/harness/package-lock.json b/test/harness/package-lock.json index 818e62bf0..8de68d9c0 100644 --- a/test/harness/package-lock.json +++ b/test/harness/package-lock.json @@ -9,7 +9,7 @@ "version": "1.0.0", "license": "ISC", "devDependencies": { - "@github/copilot": "^1.0.56-2", + "@github/copilot": "^1.0.56", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "@types/node-forge": "^1.3.14", @@ -464,9 +464,9 @@ } }, "node_modules/@github/copilot": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.56-2.tgz", - "integrity": "sha512-Dpue7utF6PzGS4tPrG3pRXL3d1lMJHFFT8PJegljn7vg64LAbjhk5yNgBXbMg/XbObu755SJTNtbEL/aSdrGNg==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.56.tgz", + "integrity": "sha512-epJ9yRqK1QjU73FDAlxPqZKh+CxkA1TIYbhTvXblturw5wWUhCSRhI2XoamNERohPznY10Wg3tbZC3jUAmQdJw==", "dev": true, "license": "SEE LICENSE IN LICENSE.md", "dependencies": { @@ -476,20 +476,20 @@ "copilot": "npm-loader.js" }, "optionalDependencies": { - "@github/copilot-darwin-arm64": "1.0.56-2", - "@github/copilot-darwin-x64": "1.0.56-2", - "@github/copilot-linux-arm64": "1.0.56-2", - "@github/copilot-linux-x64": "1.0.56-2", - "@github/copilot-linuxmusl-arm64": "1.0.56-2", - "@github/copilot-linuxmusl-x64": "1.0.56-2", - "@github/copilot-win32-arm64": "1.0.56-2", - "@github/copilot-win32-x64": "1.0.56-2" + "@github/copilot-darwin-arm64": "1.0.56", + "@github/copilot-darwin-x64": "1.0.56", + "@github/copilot-linux-arm64": "1.0.56", + "@github/copilot-linux-x64": "1.0.56", + "@github/copilot-linuxmusl-arm64": "1.0.56", + "@github/copilot-linuxmusl-x64": "1.0.56", + "@github/copilot-win32-arm64": "1.0.56", + "@github/copilot-win32-x64": "1.0.56" } }, "node_modules/@github/copilot-darwin-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.56-2.tgz", - "integrity": "sha512-RHJNhdPSkdPc/nabWVess7BfEda7xfwBQ2X5vq9nq4VjqTbvUHBFwTt792q00TE4DZR/UsWr0sJKJkLcRvTltQ==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.56.tgz", + "integrity": "sha512-vCittEfa/Qys86TxhI5rgxy8L8WTQoooIjEj8kZe7mq62TOOrFGnWJjqaR6mgljmPTxKRFmT6achUxKRVZil9g==", "cpu": [ "arm64" ], @@ -504,9 +504,9 @@ } }, "node_modules/@github/copilot-darwin-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.56-2.tgz", - "integrity": "sha512-EqBtGH1I2rX5TzSJ+L9O22SQ8jlSsn1YJeFS6RTtYU+NhC6xLajjfTutkA5DZOr3eQgmeceit/4NDqEdjwANEA==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.56.tgz", + "integrity": "sha512-yO7OvFysG/98s9T8k5cEXzBz++mki7ufkH2S8/jqC7YIKhlj64rh+/vIBU5DQ9RLXbPKm6OjGjJn8iDWXzzuJQ==", "cpu": [ "x64" ], @@ -521,9 +521,9 @@ } }, "node_modules/@github/copilot-linux-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.56-2.tgz", - "integrity": "sha512-FmjODKft2tmY5B0B94RDek/TR3QtdDTT7W/+lqkiosnUyLhsNtmzKaDYpiQsCBee68YUuB1umecqiTL1qMo3cw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.56.tgz", + "integrity": "sha512-ukOwSwFOqgpQQs5Nw3GAFRGIn6LqA8KfI6hD+tUeqoWkB0OlXxwQER7sKEfSQZu1vcNnW1+YIM/qT5W5RWdmhA==", "cpu": [ "arm64" ], @@ -538,9 +538,9 @@ } }, "node_modules/@github/copilot-linux-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.56-2.tgz", - "integrity": "sha512-aqF4k6mDLU1OXdaAb3gBIRCgdrlXX+1FBtcoLKPMjzVfkA2abEZ/vuYfZWS7ZaxG/aCOScp8D+/E+RaYHsGYOw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.56.tgz", + "integrity": "sha512-C84nduDAeHCTEfjs+mYfIjbBjGRx2huy8XZBu0ETAD08uUBuQpUHn2PYhaaHb1yKoG6LMceKt10PTrqNdOE9IQ==", "cpu": [ "x64" ], @@ -555,9 +555,9 @@ } }, "node_modules/@github/copilot-linuxmusl-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.56-2.tgz", - "integrity": "sha512-+CztOiU7/nlNLX50jcpOMreMrDr7+DFnq3OV59doDd9UgqTdpjEnZKjkgHpxid117rYF/95cN5EYWD7ermOcjA==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-arm64/-/copilot-linuxmusl-arm64-1.0.56.tgz", + "integrity": "sha512-EuDmGVl4fEk7Q+AVhkQkpiRlXpjGGQ5GzfBzMEOWgrvfdCLcT62p1uEaz+AT2UdkJiViruLyVf3pZFUyQwyvjA==", "cpu": [ "arm64" ], @@ -572,9 +572,9 @@ } }, "node_modules/@github/copilot-linuxmusl-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.56-2.tgz", - "integrity": "sha512-FuBYfN2dX2a5fSEzPImtX6hjtjwiL0kutrq4RuvHYxUu0FR0JRB4vfN2mQ/KN4X5DZgaGkPQk19hkoEgd1tmdg==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-linuxmusl-x64/-/copilot-linuxmusl-x64-1.0.56.tgz", + "integrity": "sha512-qRXub9+1J7mNIzweAaw0tGgztS6XK+ZlwhUjOcFTusbqnED33zw4HzExUNUTTDue/BOUwkYzvXqMqn5N6juIJg==", "cpu": [ "x64" ], @@ -589,9 +589,9 @@ } }, "node_modules/@github/copilot-win32-arm64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.56-2.tgz", - "integrity": "sha512-mKTzS9HrH+wvOmIgIaRUs+l89o51P7ACVk4P/o1UEWGxDblTxwRZGL+cRBhqNltIxY+8XVIAEwg6CzE+sTH5Hw==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.56.tgz", + "integrity": "sha512-/lj/zEezNoewCxvVORLN0JFvvi9WmQTYvtIyyg8kVlA9HZeg0vpRTBM5hdoni2D8mKb7g/8w8VF2Ecy9D3+NpA==", "cpu": [ "arm64" ], @@ -606,9 +606,9 @@ } }, "node_modules/@github/copilot-win32-x64": { - "version": "1.0.56-2", - "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.56-2.tgz", - "integrity": "sha512-tacHeeqNiLawmlUpturke10I9d6kkREqTcHGkGRy/MEwrio7A77L45j/IegRcQNjLwHP62R2+5GmNFx6BRwx9w==", + "version": "1.0.56", + "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.56.tgz", + "integrity": "sha512-062C3lp4nvVl+vkkteYOrYpgnqZ/SAi54NuTQ4k45V2TNmLIpmMybmM0tCluxOfiTY+8EuS72H9RS8NUj1CzhQ==", "cpu": [ "x64" ], diff --git a/test/harness/package.json b/test/harness/package.json index fd605ead4..e09b2cd4b 100644 --- a/test/harness/package.json +++ b/test/harness/package.json @@ -11,7 +11,7 @@ "test": "vitest run" }, "devDependencies": { - "@github/copilot": "^1.0.56-2", + "@github/copilot": "^1.0.56", "@modelcontextprotocol/sdk": "^1.26.0", "@types/node": "^25.3.3", "@types/node-forge": "^1.3.14", diff --git a/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_cold_resume_when_continuependingwork_is_false.yaml b/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_cold_resume_when_continuependingwork_is_false.yaml new file mode 100644 index 000000000..8a32e431a --- /dev/null +++ b/test/snapshots/pending_work_resume/should_keep_pending_external_tool_handleable_on_cold_resume_when_continuependingwork_is_false.yaml @@ -0,0 +1,22 @@ +models: + - claude-sonnet-4.5 +conversations: + - messages: + - role: system + content: ${system} + - role: user + content: Use resume_external_tool with value 'beta', then reply with the result. + - role: assistant + tool_calls: + - id: toolcall_0 + type: function + function: + name: resume_external_tool + arguments: '{"value":"beta"}' + - role: tool + tool_call_id: toolcall_0 + content: The execution of this tool, or a previous tool was interrupted. + - role: user + content: "Reply with exactly: COLD_RESUMED_FOLLOWUP" + - role: assistant + content: COLD_RESUMED_FOLLOWUP