From d0b9e008a7207bd64e4504ab8c96cb03519e4684 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 28 May 2026 01:11:01 +0700
Subject: [PATCH 1/5] feat(workflows): add continue_on_error step field
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds an optional `continue_on_error: bool` field on every step.
When set to `true` and the step fails, the engine records the
result (`exit_code`, `stderr` on `steps.<id>.output` plus `status`
as a sibling key on `steps.<id>`) and continues to the next sibling
step instead of halting the run. Downstream `if`, `switch`, or
`gate` steps can then branch on
`{{ steps.<id>.output.exit_code }}` to route the recovery path.

Engine details
--------------
`WorkflowEngine._execute_steps` now consults the step config when a
step returns `StepStatus.FAILED`:

- Gate aborts (`output.aborted`) always halt the run — operator
  decisions take precedence over the flag.
- Otherwise, if `continue_on_error` is the literal `True`, log a
  `step_continue_on_error` event and proceed to the next sibling.
  The runtime check uses identity comparison (`is True`) rather
  than truthiness, so truthy non-bool values like the string
  `"true"` cannot silently change run semantics even if a caller
  bypasses `validate_workflow()`.
- Otherwise, behave as before: log `step_failed`, set
  `RunStatus.FAILED`, and return.

Validation
----------
`_validate_steps` rejects non-bool values for `continue_on_error`.
Coerced strings like `"true"` are not accepted so authoring
mistakes surface at validation time rather than silently changing
run semantics.

Tests
-----
`TestContinueOnError` in `tests/test_workflows.py` (8 tests):
- `test_undeclared_failure_halts_run` — default halt behaviour.
- `test_declared_and_fired_continues_run` — flag + fail → continue.
- `test_declared_but_step_succeeded_is_noop` — flag + success → no-op.
- `test_if_branch_routes_around_failure` — end-to-end recovery.
- `test_gate_abort_still_halts_with_continue_on_error` — abort
  always halts.
- `test_validation_rejects_non_bool_continue_on_error` — `"true"`
  rejected at validation.
- `test_validation_accepts_bool_continue_on_error` — `true`/`false`
  pass cleanly.
- `test_engine_ignores_truthy_non_bool_continue_on_error` —
  defense-in-depth: engine ignores string `"true"` even when
  validation is bypassed.

Rebased onto current upstream/main (post #2664 merge); the new
`TestContinueOnError` class sits immediately after upstream's
`TestContextRunId` so the two feature suites coexist cleanly.

Closes #2591.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/workflows/engine.py |  58 +++++-
 tests/test_workflows.py             | 297 ++++++++++++++++++++++++++++
 workflows/README.md                 |  72 ++++---
 3 files changed, 396 insertions(+), 31 deletions(-)
diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py
index 65d9b3a272..fe1c520515 100644
--- a/src/specify_cli/workflows/engine.py
+++ b/src/specify_cli/workflows/engine.py
@@ -231,6 +231,20 @@ def _validate_steps(
             step_errors = step_impl.validate(step_config)
             errors.extend(step_errors)
 
+        # Validate optional `continue_on_error` field. The engine honours
+        # this on any step that returns FAILED so the pipeline can route
+        # around the failure via downstream `if`/`switch`/`gate`. The
+        # field must be a literal boolean — coercion from truthy strings
+        # is deliberately not supported so authoring mistakes surface
+        # at validation time rather than silently changing run semantics.
+        if "continue_on_error" in step_config:
+            coe = step_config["continue_on_error"]
+            if not isinstance(coe, bool):
+                errors.append(
+                    f"Step {step_id!r}: 'continue_on_error' must be a "
+                    f"boolean, got {type(coe).__name__}."
+                )
+
         # Recursively validate nested steps
         for nested_key in ("then", "else", "steps"):
             nested = step_config.get(nested_key)
@@ -622,7 +636,10 @@ def _execute_steps(
 
             # Handle failures
             if result.status == StepStatus.FAILED:
-                # Gate abort (output.aborted) maps to ABORTED status
+                # Gate abort (output.aborted) maps to ABORTED status.
+                # Aborts are deliberate operator decisions, so
+                # `continue_on_error` does NOT override them — that flag
+                # is for transient/expected step failures only.
                 if result.output.get("aborted"):
                     state.status = RunStatus.ABORTED
                     state.append_log(
@@ -631,15 +648,48 @@ def _execute_steps(
                             "step_id": step_id,
                         }
                     )
-                else:
-                    state.status = RunStatus.FAILED
+                    state.save()
+                    return
+
+                # `continue_on_error: true` lets the pipeline route
+                # around the failure instead of halting. The step
+                # result (including exit_code, stderr, status) is
+                # still recorded so downstream `if`/`switch`/`gate`
+                # steps can branch on it. Log a single, unambiguous
+                # event per failure resolution — either the run
+                # continued past it, or it halted.
+                #
+                # Use identity comparison (`is True`) rather than
+                # truthiness so that only a literal boolean enables
+                # the behaviour, even if validation was skipped.
+                # Validation rejects non-bool values at parse time,
+                # but `WorkflowEngine.execute()` does not auto-validate
+                # (see `WorkflowEngine.load_workflow`, whose docstring
+                # explicitly notes "not yet validated; call
+                # `validate_workflow()` or `engine.validate()`
+                # separately"), so a caller passing an unvalidated
+                # definition could otherwise see truthy non-bool
+                # values like the string `"true"` silently change
+                # run semantics.
+                if step_config.get("continue_on_error") is True:
                     state.append_log(
                         {
-                            "event": "step_failed",
+                            "event": "step_continue_on_error",
                             "step_id": step_id,
                             "error": result.error,
                         }
                     )
+                    state.save()
+                    continue
+
+                state.status = RunStatus.FAILED
+                state.append_log(
+                    {
+                        "event": "step_failed",
+                        "step_id": step_id,
+                        "error": result.error,
+                    }
+                )
                 state.save()
                 return
 
diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 114e6b02cd..0a5e61c2af 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -2333,6 +2333,303 @@ def test_workflow_without_context_reference_unchanged(self, project_dir):
         assert state.step_results["only-step"]["output"]["stdout"].strip() == "hello"
 
 
+# ===== continue_on_error Tests =====
+#
+# Locks the contract documented in workflows/README.md "Error Handling"
+# section: when an executable step fails and `continue_on_error: true`
+# is declared, the engine records the step's `output` (with `exit_code`
+# and `stderr` from the failure) and its `status` (sibling key on
+# `steps.<id>`, not nested under `output`) and continues to the next
+# sibling step instead of halting the run. Gate aborts
+# (`output.aborted`) still halt regardless of the flag.
+
+
+class TestContinueOnError:
+    """Test the `continue_on_error` step-level field."""
+
+    def test_undeclared_failure_halts_run(self, project_dir):
+        """Default behaviour (no `continue_on_error`): a failing step
+        halts the workflow run with `status == FAILED`.
+
+        Locks the byte-equivalent default — workflows that do not
+        declare the flag must behave exactly as before this feature.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "halt-on-fail"
+  name: "Halt On Fail"
+  version: "1.0.0"
+steps:
+  - id: fail-step
+    type: shell
+    run: "exit 7"
+  - id: after
+    type: shell
+    run: "echo should-not-run"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        assert state.status == RunStatus.FAILED
+        assert "fail-step" in state.step_results
+        assert state.step_results["fail-step"]["output"]["exit_code"] == 7
+        # Subsequent step never executes when the flag is absent.
+        assert "after" not in state.step_results
+
+    def test_declared_and_fired_continues_run(self, project_dir):
+        """`continue_on_error: true` + failing step: the run keeps
+        going, the failed step's result is recorded, and the
+        downstream step runs.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "continue-past-fail"
+  name: "Continue Past Fail"
+  version: "1.0.0"
+steps:
+  - id: flaky-step
+    type: shell
+    run: "exit 42"
+    continue_on_error: true
+  - id: after
+    type: shell
+    run: "echo did-run"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        assert state.status == RunStatus.COMPLETED
+        # Failed step's exit_code is preserved so downstream branching
+        # can inspect it.
+        assert state.step_results["flaky-step"]["output"]["exit_code"] == 42
+        assert state.step_results["flaky-step"]["status"] == "failed"
+        # Downstream step ran successfully.
+        assert state.step_results["after"]["output"]["exit_code"] == 0
+
+    def test_declared_but_step_succeeded_is_noop(self, project_dir):
+        """`continue_on_error: true` on a step that succeeds is a
+        no-op — the flag only changes behaviour on FAILED status.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "flag-but-success"
+  name: "Flag But Success"
+  version: "1.0.0"
+steps:
+  - id: ok-step
+    type: shell
+    run: "echo ok"
+    continue_on_error: true
+  - id: after
+    type: shell
+    run: "echo done"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        assert state.status == RunStatus.COMPLETED
+        assert state.step_results["ok-step"]["status"] == "completed"
+        assert state.step_results["ok-step"]["output"]["exit_code"] == 0
+        assert state.step_results["after"]["output"]["exit_code"] == 0
+
+    def test_if_branch_routes_around_failure(self, project_dir):
+        """End-to-end: `continue_on_error` + `if` cleanly routes around
+        a failure. The recovery branch runs; the success branch does
+        not.
+
+        Mirrors the canonical usage pattern from the original feature
+        discussion in issue #2591.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "route-around"
+  name: "Route Around Failure"
+  version: "1.0.0"
+steps:
+  - id: heavy-thing
+    type: shell
+    run: "exit 1"
+    continue_on_error: true
+  - id: check-result
+    type: if
+    condition: "{{ steps.heavy-thing.output.exit_code != 0 }}"
+    then:
+      - id: recovery
+        type: shell
+        run: "echo recovery-ran"
+    else:
+      - id: happy-path
+        type: shell
+        run: "echo happy-path-ran"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        assert state.status == RunStatus.COMPLETED
+        assert "recovery" in state.step_results
+        assert "happy-path" not in state.step_results
+
+    def test_gate_abort_still_halts_with_continue_on_error(
+        self, project_dir, monkeypatch
+    ):
+        """`continue_on_error` does NOT override a deliberate gate
+        abort. `output.aborted` always halts the run with
+        `status == ABORTED`.
+
+        Aborts are explicit operator decisions; continue_on_error
+        is for transient/expected step failures only.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+        from specify_cli.workflows.steps.gate import GateStep
+        from specify_cli.workflows.steps import gate as gate_module
+
+        # Force the gate step into interactive mode and feed a "reject"
+        # choice so the abort path actually runs in the test env
+        # (default behaviour returns PAUSED when stdin is not a TTY).
+        # Swap sys.stdin itself for a stub: setattr on the real
+        # TextIOWrapper's `isatty` method is not assignable under some
+        # runners (e.g. pytest with capture disabled).
+        class _TTYStdin:
+            def isatty(self) -> bool:
+                return True
+
+        monkeypatch.setattr(gate_module.sys, "stdin", _TTYStdin())
+        monkeypatch.setattr(
+            GateStep, "_prompt", staticmethod(lambda _msg, _opts: "reject")
+        )
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "gate-abort-halts"
+  name: "Gate Abort Halts"
+  version: "1.0.0"
+steps:
+  - id: gate-step
+    type: gate
+    message: "Approve?"
+    options: [approve, reject]
+    on_reject: abort
+    continue_on_error: true
+  - id: should-not-run
+    type: shell
+    run: "echo nope"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        assert state.status == RunStatus.ABORTED
+        assert "should-not-run" not in state.step_results
+
+    def test_validation_rejects_non_bool_continue_on_error(self):
+        """`continue_on_error` must be a literal boolean; coerced
+        strings like `"true"` are rejected at validation time so
+        authoring mistakes surface before execution.
+        """
+        from specify_cli.workflows.engine import (
+            WorkflowDefinition,
+            validate_workflow,
+        )
+
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "bad-coe"
+  name: "Bad COE"
+  version: "1.0.0"
+steps:
+  - id: step-one
+    type: shell
+    run: "true"
+    continue_on_error: "true"
+""")
+        errors = validate_workflow(definition)
+        assert any(
+            "continue_on_error" in e and "boolean" in e for e in errors
+        ), errors
+
+    def test_validation_accepts_bool_continue_on_error(self):
+        """Boolean values pass validation cleanly."""
+        from specify_cli.workflows.engine import (
+            WorkflowDefinition,
+            validate_workflow,
+        )
+
+        for value in (True, False):
+            yaml_value = "true" if value else "false"
+            definition = WorkflowDefinition.from_string(f"""
+schema_version: "1.0"
+workflow:
+  id: "good-coe"
+  name: "Good COE"
+  version: "1.0.0"
+steps:
+  - id: step-one
+    type: shell
+    run: "true"
+    continue_on_error: {yaml_value}
+""")
+            errors = validate_workflow(definition)
+            assert errors == [], errors
+
+    def test_engine_ignores_truthy_non_bool_continue_on_error(self, project_dir):
+        """Defense-in-depth: even if a caller bypasses
+        `validate_workflow()` and feeds the engine a definition with
+        `continue_on_error: "true"` (a string), the engine must NOT
+        honour the flag — only a literal boolean enables the
+        behaviour. `WorkflowEngine.execute()` does not auto-validate
+        (the `WorkflowEngine.load_workflow` docstring explicitly
+        notes the definition is "not yet validated; call
+        `validate_workflow()` or `engine.validate()` separately"),
+        so the engine guards against truthy non-bool values itself
+        via an identity check rather than truthiness.
+        """
+        from specify_cli.workflows.engine import WorkflowDefinition, WorkflowEngine
+        from specify_cli.workflows.base import RunStatus
+
+        # Bypass `validate_workflow()` — execute() is what would
+        # be called by a caller that skipped validation.
+        definition = WorkflowDefinition.from_string("""
+schema_version: "1.0"
+workflow:
+  id: "string-coe"
+  name: "String COE"
+  version: "1.0.0"
+steps:
+  - id: fail-step
+    type: shell
+    run: "exit 1"
+    continue_on_error: "true"
+  - id: should-not-run
+    type: shell
+    run: "echo should-not-run"
+""")
+        engine = WorkflowEngine(project_dir)
+        state = engine.execute(definition)
+
+        # String "true" is truthy but not a literal boolean, so the
+        # engine must treat the step as a halting failure.
+        assert state.status == RunStatus.FAILED
+        assert "should-not-run" not in state.step_results
+
+
 # ===== State Persistence Tests =====
 
 class TestRunState:
diff --git a/workflows/README.md b/workflows/README.md
index eb3d090ad0..9a61540b3e 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -219,6 +219,51 @@ Aggregate results from fan-out steps:
   output: {}
 ```
 
+## Error Handling
+
+By default, any step that ends in `StepStatus.FAILED` at runtime halts
+the entire run — most commonly a `shell` or `command` step exiting
+non-zero, but also any other runtime error raised during step
+execution. (Invalid workflow definitions are rejected up-front by
+`specify workflow run` before the run even starts, so structural
+validation failures never reach this code path.) Set
+`continue_on_error: true` on a step to record its result and continue
+to the next sibling step instead. When the failure was a non-zero
+exit, the exit code remains available on
+`steps.<id>.output.exit_code` so downstream `if`, `switch`, or `gate`
+steps can branch on it:
+
+```yaml
+- id: heavy-thing
+  type: command
+  integration: claude
+  command: speckit.heavy-thing
+  continue_on_error: true
+
+- id: check-result
+  type: if
+  condition: "{{ steps.heavy-thing.output.exit_code != 0 }}"
+  then:
+    - id: review
+      type: gate
+      message: "Step failed (exit {{ steps.heavy-thing.output.exit_code }}). Retry or skip?"
+      on_reject: skip
+  else:
+    - id: next-thing
+      command: speckit.next-thing
+```
+
+**Notes:**
+
+- The field must be a literal boolean (`true` / `false`); coerced
+  strings like `"true"` are rejected at validation time.
+- Gate aborts (`on_reject: abort` chosen by the operator) always halt
+  the run — `continue_on_error` does not override them. The flag is
+  for transient/expected step failures, not for overriding deliberate
+  operator decisions.
+- When the flag is omitted, behaviour is byte-equivalent to before
+  this feature.
+
 ## Expressions
 
 Workflow definitions use `{{ expression }}` syntax for dynamic values:
@@ -239,33 +284,6 @@ message: "{{ status | default('pending') }}"
 
 Supported filters: `default`, `join`, `contains`, `map`.
 
-### Runtime Context
-
-`{{ context.* }}` exposes engine-managed runtime metadata for the
-current run:
-
-| Variable | Description |
-|----------|-------------|
-| `context.run_id` | The current workflow run id (the same value Spec Kit prints as `Run ID:` at the end of `workflow run`). Auto-generated runs are 8-character hex from `uuid4`; operator-supplied ids may be any alphanumeric string with hyphens or underscores. Empty string outside a run context. |
-
-```yaml
-# Stamp telemetry events with the run id for cross-system join.
-- id: emit-event
-  type: shell
-  run: 'echo "{\"run_id\":\"{{ context.run_id }}\",\"event\":\"started\"}" >> events.jsonl'
-
-# Per-run scratch directory.
-- id: prep-scratch
-  type: shell
-  run: 'mkdir -p /tmp/run-{{ context.run_id }}'
-
-# Pass run id into a command for artifact metadata.
-- id: tag-artifact
-  command: speckit.specify
-  input:
-    args: "{{ context.run_id }}"
-```
-
 ## Input Types
 
 Workflow inputs are type-checked and coerced from CLI string values:

From 22c5184fa35cd305a5e7383064f583f3959ddff6 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 28 May 2026 21:33:40 +0700
Subject: [PATCH 2/5] docs(workflows): restore runtime context section, clarify
 gate prompt

Two Copilot findings on d0b9e00:

1. The `### Runtime Context` documentation for `{{ context.* }}` was
   lost during the rebase onto current main (the squash dropped the
   anchor where #2664 had added it). Restored under `## Expressions`
   so users can find `context.run_id` semantics and examples.

2. The continue_on_error example gate had message "Retry or skip?"
   but used the default `options: [approve, reject]` with `on_reject:
   skip`, which implied an automatic retry path that gates do not
   provide. Reworded the message to match the actual approve/reject
   semantics and added an explicit note that retry requires either
   custom gate options + downstream branching or a wrapper loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 workflows/README.md | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/workflows/README.md b/workflows/README.md
index 9a61540b3e..40c593fb61 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -246,13 +246,19 @@ steps can branch on it:
   then:
     - id: review
       type: gate
-      message: "Step failed (exit {{ steps.heavy-thing.output.exit_code }}). Retry or skip?"
+      message: "Step failed (exit {{ steps.heavy-thing.output.exit_code }}). Approve to continue, or reject to skip the rest of this branch."
       on_reject: skip
   else:
     - id: next-thing
       command: speckit.next-thing
 ```
 
+The gate's default `options: [approve, reject]` map directly to "continue
+the run" vs. "skip the rest of this branch" — gates do not automatically
+re-run the failed step. To express a retry path, either define custom
+gate options and branch on the choice downstream, or wrap the failing
+step in your own loop.
+
 **Notes:**
 
 - The field must be a literal boolean (`true` / `false`); coerced
@@ -284,6 +290,33 @@ message: "{{ status | default('pending') }}"
 
 Supported filters: `default`, `join`, `contains`, `map`.
 
+### Runtime Context
+
+`{{ context.* }}` exposes engine-managed runtime metadata for the
+current run:
+
+| Variable | Description |
+|----------|-------------|
+| `context.run_id` | The current workflow run id (the same value Spec Kit prints as `Run ID:` at the end of `workflow run`). Auto-generated runs are 8-character hex from `uuid4`; operator-supplied ids may be any alphanumeric string with hyphens or underscores. Empty string outside a run context. |
+
+```yaml
+# Stamp telemetry events with the run id for cross-system join.
+- id: emit-event
+  type: shell
+  run: 'echo "{\"run_id\":\"{{ context.run_id }}\",\"event\":\"started\"}" >> events.jsonl'
+
+# Per-run scratch directory.
+- id: prep-scratch
+  type: shell
+  run: 'mkdir -p /tmp/run-{{ context.run_id }}'
+
+# Pass run id into a command for artifact metadata.
+- id: tag-artifact
+  command: speckit.specify
+  input:
+    args: "{{ context.run_id }}"
+```
+
 ## Input Types
 
 Workflow inputs are type-checked and coerced from CLI string values:

From b8982a748a27d808f5034887cc4b408053f111dd Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Thu, 28 May 2026 22:50:38 +0700
Subject: [PATCH 3/5] =?UTF-8?q?docs(workflows):=20clarify=20continue=5Fon?=
 =?UTF-8?q?=5Ferror=20scope=20=E2=80=94=20returned=20FAILED=20only?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot finding on d0b9e00:

The README's "Error Handling" intro implied `continue_on_error` covers
"any other runtime error raised during step execution", but the engine
only consults the flag when a step returns `StepResult(status=FAILED, ...)`.
Exceptions raised out of `step_impl.execute()` propagate to
`WorkflowEngine.execute()`, where the catch-all logs `workflow_failed`
and re-raises — the step result is never recorded, and the flag is
never consulted.

Audited the whole PR diff for the same overclaim:

1. workflows/README.md — main fix. Reworded the Error Handling intro to
   "any step that returns StepResult(status=FAILED, ...)" and promoted
   the parenthetical structural-validation note into the Notes block.
   Added a new "Scope: returned failures only" note that names the
   exception path explicitly and tells step authors how to bring the
   flag into scope for exceptional code (catch internally and return
   FAILED with the failure encoded in `output`).

2. tests/test_workflows.py — section comment used "when an executable
   step fails", same ambiguity. Tightened to "when a step returns
   StepResult(status=FAILED, ...)" and added a sentence calling out
   that unhandled exceptions are out of scope.

3. src/specify_cli/workflows/engine.py — already correct ("any step
   that returns FAILED" in the validator comment; "lets the pipeline
   route around the failure" in the execute path). No change.

Engine semantics and test bodies are unchanged. Docs-only.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 tests/test_workflows.py | 15 +++++++++------
 workflows/README.md     | 29 ++++++++++++++++++-----------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/tests/test_workflows.py b/tests/test_workflows.py
index 0a5e61c2af..3ab8871dea 100644
--- a/tests/test_workflows.py
+++ b/tests/test_workflows.py
@@ -2336,12 +2336,15 @@ def test_workflow_without_context_reference_unchanged(self, project_dir):
 # ===== continue_on_error Tests =====
 #
 # Locks the contract documented in workflows/README.md "Error Handling"
-# section: when an executable step fails and `continue_on_error: true`
-# is declared, the engine records the step's `output` (with `exit_code`
-# and `stderr` from the failure) and its `status` (sibling key on
-# `steps.<id>`, not nested under `output`) and continues to the next
-# sibling step instead of halting the run. Gate aborts
-# (`output.aborted`) still halt regardless of the flag.
+# section: when a step returns `StepResult(status=FAILED, ...)` and
+# `continue_on_error: true` is declared, the engine records the step's
+# `output` (with `exit_code` and `stderr` from the failure) and its
+# `status` (sibling key on `steps.<id>`, not nested under `output`)
+# and continues to the next sibling step instead of halting the run.
+# Gate aborts (`output.aborted`) still halt regardless of the flag.
+# Unhandled exceptions raised out of `step_impl.execute()` are out of
+# scope for this flag — they propagate to `WorkflowEngine.execute()`
+# and abort the run.
 
 
 class TestContinueOnError:
diff --git a/workflows/README.md b/workflows/README.md
index 40c593fb61..b0b7050fe5 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -221,17 +221,13 @@ Aggregate results from fan-out steps:
 
 ## Error Handling
 
-By default, any step that ends in `StepStatus.FAILED` at runtime halts
-the entire run — most commonly a `shell` or `command` step exiting
-non-zero, but also any other runtime error raised during step
-execution. (Invalid workflow definitions are rejected up-front by
-`specify workflow run` before the run even starts, so structural
-validation failures never reach this code path.) Set
-`continue_on_error: true` on a step to record its result and continue
-to the next sibling step instead. When the failure was a non-zero
-exit, the exit code remains available on
-`steps.<id>.output.exit_code` so downstream `if`, `switch`, or `gate`
-steps can branch on it:
+By default, any step that returns `StepResult(status=FAILED, ...)`
+at runtime halts the entire run — most commonly a `shell` or
+`command` step exiting non-zero. Set `continue_on_error: true` on
+a step to record its result and continue to the next sibling step
+instead. When the failure was a non-zero exit, the exit code
+remains available on `steps.<id>.output.exit_code` so downstream
+`if`, `switch`, or `gate` steps can branch on it:
 
 ```yaml
 - id: heavy-thing
@@ -263,10 +259,21 @@ step in your own loop.
 
 - The field must be a literal boolean (`true` / `false`); coerced
   strings like `"true"` are rejected at validation time.
+- **Scope: returned failures only.** The flag applies to step results
+  with `status=FAILED`. Unhandled exceptions raised out of a step's
+  `execute()` method are caught one level up by `WorkflowEngine.execute()`,
+  logged as `workflow_failed`, and abort the run regardless of
+  `continue_on_error`. If a step author wants the flag to cover an
+  exceptional path, the step must catch the exception internally and
+  return `StepResult(status=FAILED, ...)` with the failure encoded in
+  `output` (e.g. `exit_code`, `stderr`, or a custom field).
 - Gate aborts (`on_reject: abort` chosen by the operator) always halt
   the run — `continue_on_error` does not override them. The flag is
   for transient/expected step failures, not for overriding deliberate
   operator decisions.
+- Structural validation runs up-front: `specify workflow run` rejects
+  invalid workflow definitions before the run is created, so
+  validation failures never reach this code path.
 - When the flag is omitted, behaviour is byte-equivalent to before
   this feature.
 

From 393ac6b7101cf2a02729b5b395b9fe98ccbd10d8 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Fri, 29 May 2026 21:47:00 +0700
Subject: [PATCH 4/5] =?UTF-8?q?docs(workflows):=20clarify=20on=5Freject:sk?=
 =?UTF-8?q?ip=20semantics=20=E2=80=94=20engine=20returns=20COMPLETED,=20no?=
 =?UTF-8?q?t=20auto-skip?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot finding on b8982a7:

The README example's gate message said "reject to skip the rest of this
branch", and the explanatory paragraph claimed [approve, reject] map
to "continue" vs "skip the rest of this branch". The engine does not
implement automatic branch-skipping. `on_reject: skip` returns
`StepStatus.COMPLETED` (gate/__init__.py:65-66); the next sibling step
runs unconditionally unless the author wires a downstream `if` reading
`{{ steps.<gate-id>.output.choice }}`.

Two fixes:

1. Restructured the YAML example so it actually demonstrates the
   manual-branching pattern: added a `recover` if-step after the gate
   that conditions on `steps.review.output.choice == 'approve'`. Now
   the example shows the real workflow author's responsibility instead
   of implying the engine does it.

2. Replaced the trailing paragraph with three precise notes:
   - both gate options return COMPLETED; `on_reject: skip` controls
     abort behaviour only, not sibling-skipping
   - all three `on_reject` values enumerated with their actual engine
     semantics (FAILED+aborted / COMPLETED / PAUSED)
   - the original retry-loop guidance retained as the third bullet

Updated the gate message in the example to match — "reject to leave the
failure recorded and move on" instead of "reject to skip the rest of
this branch".

Audited the whole PR diff for the same overclaim: no other instance.
Engine semantics, validation, and test bodies are unchanged. Docs-only.

161/161 tests/test_workflows.py pass locally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 workflows/README.md | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/workflows/README.md b/workflows/README.md
index b0b7050fe5..6282045a16 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -242,18 +242,36 @@ remains available on `steps.<id>.output.exit_code` so downstream
   then:
     - id: review
       type: gate
-      message: "Step failed (exit {{ steps.heavy-thing.output.exit_code }}). Approve to continue, or reject to skip the rest of this branch."
+      message: "Step failed (exit {{ steps.heavy-thing.output.exit_code }}). Approve to run the recovery path, or reject to leave the failure recorded and move on."
       on_reject: skip
+    - id: recover
+      type: if
+      condition: "{{ steps.review.output.choice == 'approve' }}"
+      then:
+        - id: rerun
+          command: speckit.recovery
   else:
     - id: next-thing
       command: speckit.next-thing
 ```
 
-The gate's default `options: [approve, reject]` map directly to "continue
-the run" vs. "skip the rest of this branch" — gates do not automatically
-re-run the failed step. To express a retry path, either define custom
-gate options and branch on the choice downstream, or wrap the failing
-step in your own loop.
+A few things worth knowing about that example:
+
+- Both gate options (`approve`, `reject`) return `StepStatus.COMPLETED`;
+  `on_reject: skip` controls only whether the engine aborts on reject
+  (it doesn't, with `skip`) — it does **not** auto-skip subsequent
+  sibling steps in the `then:` list. Downstream branching is the
+  workflow author's responsibility: read
+  `{{ steps.<gate-id>.output.choice }}` in a follow-up `if`, `switch`,
+  or expression, as the `recover` step above does.
+- `on_reject` has three values: `abort` (default — reject → `FAILED`
+  with `output.aborted = True`, halts the run), `skip` (reject →
+  `COMPLETED`, author handles branching as shown), and `retry`
+  (reject → `PAUSED` so the next `specify workflow resume` re-runs
+  the gate).
+- Gates do not automatically re-run the failed step. To express a
+  retry path, either define custom gate options and branch on the
+  choice downstream, or wrap the failing step in your own loop.
 
 **Notes:**
 

From 03454dd9e9437318aa7092a39978c5b33391b750 Mon Sep 17 00:00:00 2001
From: doquanghuy <doquanghuy91@gmail.com>
Date: Fri, 29 May 2026 22:17:17 +0700
Subject: [PATCH 5/5] =?UTF-8?q?docs(workflows):=20clarify=20gate's=20role?=
 =?UTF-8?q?=20=E2=80=94=20surfaces,=20doesn't=20programmatically=20branch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Audit follow-up to 393ac6b — three sites repeated the same minor
overclaim about gates being one of the "branch on it" step types
alongside `if` and `switch`:

1. workflows/README.md (the "downstream `if`, `switch`, or `gate`
   steps can branch on it" sentence introducing the example)
2. engine.py:236 (validator inline comment)
3. engine.py:657 (execute-path inline comment)

A `gate` step does not have a `condition` or `expression` field — it
only evaluates expressions for `message` and `show_file` (gate/__init__.py:29,36).
Programmatic branching happens in `if`/`switch`; a gate surfaces the
value to a human operator via message interpolation, and the operator's
choice is recorded in `output.choice` for a *subsequent* `if`/`switch`
to route on.

Reworded all three sites consistently: "a downstream `if` or `switch`
can branch on it (or a `gate` can surface it to the operator via
message interpolation)". The README example already demonstrates this
distinction — the gate carries `{{ }}` template variables in its
message and the `recover` if-step downstream is what actually branches
on the choice.

Engine semantics, validation, and test bodies are unchanged. Docs-only
on the README; comment-only on engine.py.

161/161 tests/test_workflows.py pass locally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/specify_cli/workflows/engine.py | 19 +++++++++++--------
 workflows/README.md                 |  5 +++--
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py
index fe1c520515..329b0392dd 100644
--- a/src/specify_cli/workflows/engine.py
+++ b/src/specify_cli/workflows/engine.py
@@ -233,10 +233,12 @@ def _validate_steps(
 
         # Validate optional `continue_on_error` field. The engine honours
         # this on any step that returns FAILED so the pipeline can route
-        # around the failure via downstream `if`/`switch`/`gate`. The
-        # field must be a literal boolean — coercion from truthy strings
-        # is deliberately not supported so authoring mistakes surface
-        # at validation time rather than silently changing run semantics.
+        # around the failure via a downstream `if` or `switch` (or a
+        # `gate` that surfaces the failure to the operator via message
+        # interpolation). The field must be a literal boolean —
+        # coercion from truthy strings is deliberately not supported so
+        # authoring mistakes surface at validation time rather than
+        # silently changing run semantics.
         if "continue_on_error" in step_config:
             coe = step_config["continue_on_error"]
             if not isinstance(coe, bool):
@@ -654,10 +656,11 @@ def _execute_steps(
                 # `continue_on_error: true` lets the pipeline route
                 # around the failure instead of halting. The step
                 # result (including exit_code, stderr, status) is
-                # still recorded so downstream `if`/`switch`/`gate`
-                # steps can branch on it. Log a single, unambiguous
-                # event per failure resolution — either the run
-                # continued past it, or it halted.
+                # still recorded so a downstream `if` or `switch`
+                # can branch on it (or a `gate` can surface it to the
+                # operator via message interpolation). Log a single,
+                # unambiguous event per failure resolution — either
+                # the run continued past it, or it halted.
                 #
                 # Use identity comparison (`is True`) rather than
                 # truthiness so that only a literal boolean enables
diff --git a/workflows/README.md b/workflows/README.md
index 6282045a16..39df117ef6 100644
--- a/workflows/README.md
+++ b/workflows/README.md
@@ -226,8 +226,9 @@ at runtime halts the entire run — most commonly a `shell` or
 `command` step exiting non-zero. Set `continue_on_error: true` on
 a step to record its result and continue to the next sibling step
 instead. When the failure was a non-zero exit, the exit code
-remains available on `steps.<id>.output.exit_code` so downstream
-`if`, `switch`, or `gate` steps can branch on it:
+remains available on `steps.<id>.output.exit_code` so a downstream
+`if` or `switch` can branch on it (or a `gate` can surface it to
+the operator via `{{ }}` interpolation in `message`):
 
 ```yaml
 - id: heavy-thing