pi-extensions/session-handoff/index.ts

/**
 * session-handoff — model-invokable autonomous context handoff.
 *
 * Registers ONE LLM-callable tool: `session_handoff(prompt, reason?)`. When
 * invoked, the next agent turn sees a fresh context window containing only
 * the carry-over prompt — no prior conversation, no tool calls, no assistant
 * messages. The model can chain tasks autonomously by calling this whenever
 * one task finishes.
 *
 * Why this is not a true new-session swap
 * ───────────────────────────────────────
 * Tools receive `ExtensionContext`, which does NOT expose `ctx.newSession()`
 * — session swaps are gated to command handlers (`ExtensionCommandContext`).
 * The documented bridge (`pi.sendUserMessage("/cmd", { deliverAs:
 * "followUp" })`) does not actually route the queued text back through
 * command handling in current pi-mono — see analysis in ./state.ts and the
 * call sites we traced in agent-session.js.
 *
 * Workaround: instead of swapping the session file, we rewrite the LLM's
 * view of the message history via the `context` event. The session file on
 * disk keeps growing, but each handoff resets what the LLM actually sees.
 * For the "finish one task, start the next with a clean context window" use
 * case this is functionally equivalent to a new session.
 *
 * Flow
 * ────
 *   1. LLM calls `session_handoff({ prompt: "..." })`.
 *   2. Tool generates a unique sentinel ID and records it as `activeHandoffId`.
 *   3. Tool queues `<pi-handoff-resume:ID>\n<prompt>` as a followUp user
 *      message. Returns `terminate: true` to stop the current agent loop.
 *   4. Agent loop ends. followUp drains, the message is appended.
 *   5. New agent turn begins, LLM call requested.
 *   6. `context` event fires. Our handler finds the active sentinel, slices the
 *      message list to start with that message, and strips the sentinel from
 *      its text. This runs on EVERY subsequent turn — the active id is NOT
 *      consumed — so the context stays pinned to the carry-over prompt for the
 *      whole task, not just the first turn. A later handoff overwrites
 *      `activeHandoffId` and wins, since its sentinel sits further down.
 *   7. LLM receives a context that starts at the carry-over prompt — just that
 *      prompt on the first turn, plus the new task's own work on later turns.
 *      System prompt and tool definitions are unaffected (they're separate from
 *      `messages` in the LLM payload).
 *
 * Known caveats
 * ─────────────
 *   - **Mixed batches delay the handoff.** `terminate: true` only takes
 *     effect when ALL tool results in the batch terminate. If the model
 *     calls session_handoff alongside other tools, the agent finishes the
 *     other tools (potentially with extra LLM round-trips) before the
 *     followUp drains. The `promptGuidelines` warn the model to call
 *     session_handoff alone.
 *   - **Session log keeps growing.** The disk file accumulates a record of
 *     every task. Doesn't affect LLM cost (context resets each handoff) but
 *     does affect `du`. Compaction continues to work normally.
 *   - **Prompt cache misses on the first turn of each task.** Provider
 *     prefix caching breaks at the handoff boundary, same as a true
 *     new-session swap would.
 *   - **The sentinel is visible in the session log.** A line like
 *     `<pi-handoff-resume:018f3a2c-...>` appears in the user-message entry.
 *     The LLM never sees it (we strip before sending). Search/grep-able for
 *     debugging.
 */

import { randomUUID } from "node:crypto";
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { Type } from "typebox";
import {
	buildHandoffMessage,
	normalizePrompt,
	normalizeReason,
	resolveHandoffContext,
} from "./state.js";

export default function (pi: ExtensionAPI) {
	// ID of the handoff currently anchoring the context, or null when none is
	// active. Set when the tool fires; overwritten when a later handoff
	// supersedes it. Closure-scoped — survives across turns within a single
	// extension load, resets to null on /reload or session restart (correctly
	// making historical sentinels in a resumed on-disk log inert: no active id
	// ⇒ no truncation until a NEW handoff fires).
	let activeHandoffId: string | null = null;

	pi.on("context", async (event, _ctx) => {
		// Fast path for normal turns (no active handoff).
		if (activeHandoffId === null) return undefined;

		// Re-applied on EVERY turn, not just the first one after the handoff.
		// The sentinel stays in the on-disk log (our return only rewrites what
		// the LLM sees, never the stored messages), so slicing to it each turn
		// keeps the context window pinned to the carry-over prompt for the whole
		// task. Returns undefined until the sentinel actually appears (followUp
		// not yet drained), leaving the context untouched.
		const messages = resolveHandoffContext(event.messages, activeHandoffId);
		if (!messages) return undefined;

		return { messages };
	});

	pi.registerTool({
		name: "session_handoff",
		label: "Session Handoff",
		description:
			"Reset the LLM's context window and start the next task with a fresh, " +
			"self-contained prompt. Use when a task is complete and the next task " +
			"would benefit from a clean context window. The prompt is the ONLY " +
			"thing the LLM sees in the new context — no prior conversation, no " +
			"tool calls, no assistant messages. Include everything needed.",
		promptSnippet:
			"Reset context and start the next task with a self-contained prompt",
		promptGuidelines: [
			"Use session_handoff when the current task is complete and the next task would benefit " +
				"from a fresh context window. The carry-over prompt must be entirely self-contained: " +
				"include relevant file paths, decisions, and the precise next task. The new turn " +
				"will see ONLY this prompt — no prior conversation, no tool calls, no system messages.",
			"Call session_handoff ALONE — do NOT batch it with other tools in the same assistant " +
				"turn. The handoff fires immediately only if every tool in the batch terminates; " +
				"otherwise it is delayed until the agent finishes the other tools.",
			"Do not use session_handoff for routine transitions where the current context is still " +
				"relevant — only when starting fresh would genuinely help.",
		],
		parameters: Type.Object({
			prompt: Type.String({
				description:
					"The self-contained prompt that becomes the first (and only) user message in " +
					"the fresh context. Must include any file paths, decisions, or task description " +
					"needed — the new turn sees NOTHING else from prior conversation.",
			}),
			reason: Type.Optional(
				Type.String({
					description:
						"Short human-readable explanation of why the handoff is happening. " +
						"Shown in the UI notification before the swap. Optional.",
				}),
			),
		}),
		async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
			const prompt = normalizePrompt(params.prompt);
			if (!prompt) {
				throw new Error("session_handoff requires a non-empty prompt");
			}
			const reason = normalizeReason(params.reason);

			const id = randomUUID();
			activeHandoffId = id;

			// Notify other extensions (logging, metrics, etc.) before we
			// queue the actual handoff message.
			pi.events.emit("session-handoff", { id, prompt, reason });

			pi.sendUserMessage(buildHandoffMessage(id, prompt), {
				deliverAs: "followUp",
			});

			if (ctx.hasUI) {
				const tag = reason ? ` (${reason})` : "";
				ctx.ui.notify(
					`Handoff staged${tag}. Next turn starts with a fresh context.`,
					"info",
				);
			}

			const reasonText = reason ? ` (${reason})` : "";
			return {
				content: [
					{
						type: "text",
						text:
							`Handoff staged${reasonText}. The next agent turn will see only the ` +
							"carry-over prompt — no prior conversation, no tool calls. No further " +
							"action needed in this turn.",
					},
				],
				details: { id, prompt, reason },
				// Hint the agent loop to skip the follow-up LLM call. Only takes
				// effect when every tool in this batch terminates; if the model
				// batched session_handoff with other tools, the handoff fires
				// only after the agent finishes those.
				terminate: true,
			};
		},
	});
}