feat(workflows): add --dry-run to specify workflow run

fuleinist · fuleinist · commit c2868d7026cf · 2026-06-05T21:36:05.000+10:00
Implements issue #2661 — preview step execution without AI invocation. The --dry-run flag short-circuits each step in the workflow engine so the user can confirm the resolved inputs, prompts, and command invocations that would be dispatched before running for real. Engine: - StepContext.dry_run (default False) propagated to every step - WorkflowEngine.execute(dry_run=...) persists the flag onto RunState so resume() of an interrupted dry-run stays a dry-run instead of silently becoming a real run - CommandStep and GateStep short-circuit in dry-run: command steps render the invoke_command preview (using the integration's build_command_invocation when available, with a graceful fallback), gate steps return COMPLETED with a 'DRY RUN' message - --dry-run is exposed only on 'specify workflow run' (the step-based invocation path where a preview is meaningful); the per-stage surface (/speckit.specify, /speckit.plan, ...) is intentionally not duplicated into the CLI as 'specify spec' / 'specify plan' per design review. Tests: - Existing dry-run coverage in test_workflows.py - New tests for RunState dry_run persistence and resume() restoring the flag (test_dry_run_persisted_in_run_state, test_resume_restores_dry_run) - New test for the CommandStep preview fallback path - New test for the GateStep dry-run short-circuit Closes #2661
diff --git a/60 b/60
diff --git a/src/specify_cli/__init__.py b/src/specify_cli/__init__.py
@@ -446,6 +446,16 @@ def _print_cli_warning(
 from .commands import init as _init_cmd  # noqa: E402
 _init_cmd.register(app)
 
+# Workflow commands are defined in-module below (see the
+# ``workflow_app = typer.Typer(...)`` block near the end of this file).
+# An earlier draft of #2661 also tried to register the
+# ``src/specify_cli/commands/workflow.py`` module, which defined a second
+# ``workflow`` Typer group with the same name. Typer raises on duplicate
+# command names at startup, so the redundant registration has been
+# removed here and ``commands/workflow.py`` deleted. The in-module
+# commands (``specify workflow run``, ``... resume``, ``... status``,
+# ``... list``, ``... add``, etc.) are the single source of truth.
+
 
 @app.command()
 def check():
@@ -570,6 +580,14 @@ def version(
 app.add_typer(_self_app, name="self")
 
 
+# NOTE: ``specify spec`` / ``specify plan`` were intentionally NOT added
+# to this CLI. The ``specify`` CLI is scaffolding + workflow orchestration
+# only; the per-stage surface (``/speckit.specify``, ``/speckit.plan``,
+# \u2026) belongs to the agent, not the CLI. Adding a CLI shortcut would
+# duplicate that surface with a weaker, second invocation path. See
+# review #4624465842 from @mnriem on PR #2704.
+
+
 # ===== Extension Commands =====
 
 extension_app = typer.Typer(
@@ -2751,6 +2769,9 @@ def workflow_run(
     input_values: list[str] | None = typer.Option(
         None, "--input", "-i", help="Input values as key=value pairs"
     ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Show the rendered prompt/inputs for each step without invoking the AI"
+    ),
     json_output: bool = typer.Option(
         False,
         "--json",
@@ -2805,9 +2826,12 @@ def workflow_run(
         console.print(f"\n[bold cyan]Running workflow:[/bold cyan] {definition.name} ({definition.id})")
         console.print(f"[dim]Version: {definition.version}[/dim]\n")
 
+    if dry_run:
+        console.print("[bold yellow]DRY RUN — no AI invocation will occur[/bold yellow]\n")
+
     try:
         with _stdout_to_stderr_when(json_output):
-            state = engine.execute(definition, inputs)
+            state = engine.execute(definition, inputs, dry_run=dry_run)
     except ValueError as exc:
         console.print(f"[red]Error:[/red] {exc}")
         raise typer.Exit(1)
@@ -2832,6 +2856,21 @@ def workflow_run(
     if state.status.value == "paused":
         console.print(f"\nResume with: [cyan]specify workflow resume {state.run_id}[/cyan]")
 
+    # Print dry-run step outputs so the user sees rendered command details
+    if dry_run and state.status.value == "completed":
+        for step_id, step_data in state.step_results.items():
+            output = step_data.get("output", {})
+            if output.get("dry_run"):
+                msg = output.get("message", "")
+                if msg:
+                    console.print(f"\n[bold cyan]Step:[/bold cyan] {step_id}")
+                    # ``msg`` is plain text from the step implementation
+                    # (e.g. ``[DRY RUN] Command: ...``). Disable Rich
+                    # markup parsing so the literal ``[DRY RUN]`` bracket
+                    # pair is shown verbatim and does not raise a
+                    # ``MarkupError`` for an unknown tag.
+                    console.print(msg, markup=False)
+
 
 @workflow_app.command("resume")
 def workflow_resume(
diff --git a/src/specify_cli/workflows/base.py b/src/specify_cli/workflows/base.py
@@ -73,6 +73,9 @@ class StepContext:
     #: Current run ID.
     run_id: str | None = None
 
+    #: Dry-run mode: preview rendered prompt/inputs without AI invocation.
+    dry_run: bool = False
+
 
 @dataclass
 class StepResult:
diff --git a/src/specify_cli/workflows/engine.py b/src/specify_cli/workflows/engine.py
@@ -331,6 +331,7 @@ def __init__(
         self.current_step_id: str | None = None
         self.step_results: dict[str, dict[str, Any]] = {}
         self.inputs: dict[str, Any] = {}
+        self.dry_run: bool = False
         self.created_at = datetime.now(timezone.utc).isoformat()
         self.updated_at = self.created_at
         self.log_entries: list[dict[str, Any]] = []
@@ -352,6 +353,7 @@ def save(self) -> None:
             "current_step_index": self.current_step_index,
             "current_step_id": self.current_step_id,
             "step_results": self.step_results,
+            "dry_run": self.dry_run,
             "created_at": self.created_at,
             "updated_at": self.updated_at,
         }
@@ -396,6 +398,7 @@ def load(cls, run_id: str, project_root: Path) -> RunState:
         state.current_step_index = state_data.get("current_step_index", 0)
         state.current_step_id = state_data.get("current_step_id")
         state.step_results = state_data.get("step_results", {})
+        state.dry_run = state_data.get("dry_run", False)
         state.created_at = state_data.get("created_at", "")
         state.updated_at = state_data.get("updated_at", "")
 
@@ -478,6 +481,7 @@ def execute(
         definition: WorkflowDefinition,
         inputs: dict[str, Any] | None = None,
         run_id: str | None = None,
+        dry_run: bool = False,
     ) -> RunState:
         """Execute a workflow definition.
 
@@ -489,6 +493,16 @@ def execute(
             User-provided input values.
         run_id:
             Optional run ID (uses SPECKIT_WORKFLOW_RUN_ID when set, otherwise auto-generated).
+        dry_run:
+            If ``True``, each step is executed normally but without
+            invoking the underlying AI integration (e.g. no CLI subprocess
+            is spawned for ``command`` steps, interactive gates return
+            ``COMPLETED`` immediately, etc.).  The workflow state is
+            still persisted to disk so ``specify workflow resume`` works,
+            and the dry-run flag is restored on resume so an interrupted
+            dry-run does not silently become a real run.  Use this to
+            preview the resolved inputs and prompts for a workflow
+            without making any AI API calls.
 
         Returns
         -------
@@ -521,6 +535,7 @@ def execute(
         # Resolve inputs
         resolved_inputs = self._resolve_inputs(definition, inputs or {})
         state.inputs = resolved_inputs
+        state.dry_run = dry_run
         state.status = RunStatus.RUNNING
         state.save()
 
@@ -531,6 +546,7 @@ def execute(
             default_options=definition.default_options,
             project_root=str(self.project_root),
             run_id=state.run_id,
+            dry_run=dry_run,
         )
 
         # Execute steps
@@ -596,6 +612,7 @@ def resume(
             default_options=definition.default_options,
             project_root=str(self.project_root),
             run_id=state.run_id,
+            dry_run=state.dry_run,
         )
 
         from . import STEP_REGISTRY
diff --git a/src/specify_cli/workflows/steps/command/__init__.py b/src/specify_cli/workflows/steps/command/__init__.py
@@ -53,12 +53,6 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
         if step_options:
             options.update(step_options)
 
-        # Attempt CLI dispatch
-        args_str = str(resolved_input.get("args", ""))
-        dispatch_result = self._try_dispatch(
-            command, integration, model, args_str, context
-        )
-
         output: dict[str, Any] = {
             "command": command,
             "integration": integration,
@@ -67,6 +61,54 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
             "input": resolved_input,
         }
 
+        # Dry-run: show the rendered prompt without invoking the AI
+        if context.dry_run:
+            args_str = str(resolved_input.get("args", ""))
+            # Use the integration's own build_command_invocation() so the
+            # preview matches exactly what would be dispatched at runtime
+            invoke_str = f"{command} {args_str}".strip() if command else args_str
+            preview_note: str | None = None
+            if integration:
+                try:
+                    from specify_cli.integrations import get_integration
+                    impl = get_integration(integration)
+                    if impl is not None:
+                        invoke_str = impl.build_command_invocation(command, args_str)
+                except (ImportError, AttributeError, KeyError, TypeError, ValueError) as exc:
+                    # ``build_command_invocation`` is optional in the
+                    # integration protocol — fall back to ``<command> <args>``
+                    # rather than swallowing the error silently. Record the
+                    # reason so dry-run output makes the fallback explicit.
+                    preview_note = (
+                        f"(integration {integration!r} did not provide "
+                        f"build_command_invocation: {type(exc).__name__}: {exc})"
+                    )
+            output["dispatched"] = False
+            output["dry_run"] = True
+            output["exit_code"] = 0
+            output["stdout"] = ""
+            output["stderr"] = ""
+            output["invoke_command"] = invoke_str
+            message_body = (
+                f"[DRY RUN] Command: {invoke_str}\n"
+                f"         Integration: {integration}\n"
+                f"         Model: {model}\n"
+                f"         (AI invocation skipped — use without --dry-run to execute)"
+            )
+            if preview_note:
+                message_body += f"\n         {preview_note}"
+            output["message"] = message_body
+            return StepResult(
+                status=StepStatus.COMPLETED,
+                output=output,
+            )
+
+        # Attempt CLI dispatch
+        args_str = str(resolved_input.get("args", ""))
+        dispatch_result = self._try_dispatch(
+            command, integration, model, args_str, context
+        )
+
         if dispatch_result is not None:
             output["exit_code"] = dispatch_result["exit_code"]
             output["stdout"] = dispatch_result["stdout"]
diff --git a/src/specify_cli/workflows/steps/gate/__init__.py b/src/specify_cli/workflows/steps/gate/__init__.py
@@ -43,6 +43,20 @@ def execute(self, config: dict[str, Any], context: StepContext) -> StepResult:
             "choice": None,
         }
 
+        # Dry-run: skip interactive gates
+        if context.dry_run:
+            output["dry_run"] = True
+            output["choice"] = options[0] if options else None
+            output["message"] = (
+                f"[DRY RUN] Gate: {message}\n"
+                f"         Options: {options}\n"
+                f"         (interactive prompt skipped — use without --dry-run to gate)"
+            )
+            return StepResult(
+                status=StepStatus.COMPLETED,
+                output=output,
+            )
+
         # Non-interactive: pause for later resume
         if not sys.stdin.isatty():
             return StepResult(status=StepStatus.PAUSED, output=output)
diff --git a/tests/test_workflows.py b/tests/test_workflows.py