pi-extensions/tests/session-handoff.test.ts

/**
 * Unit tests for session-handoff/state.ts (pure helpers + sentinel detection).
 *
 *   node --experimental-strip-types --test tests/session-handoff.test.ts
 */

import assert from "node:assert/strict";
import { test } from "node:test";
import {
	buildHandoffMessage,
	buildHandoffSentinel,
	extractSentinelId,
	findPendingHandoff,
	normalizePrompt,
	normalizeReason,
	resolveHandoffContext,
	SENTINEL_RE,
	stripSentinel,
	truncateForHandoff,
} from "../session-handoff/state.ts";

// Helper for constructing message fixtures
function user(text: string) {
	return { role: "user", content: [{ type: "text", text }] };
}
function assistant(text: string) {
	return { role: "assistant", content: [{ type: "text", text }] };
}
function toolResult(text: string) {
	return { role: "toolResult", content: [{ type: "text", text }] };
}

// ── normalizePrompt ─────────────────────────────────────────────────────

test("normalizePrompt: trims surrounding whitespace", () => {
	assert.equal(normalizePrompt("  hello  "), "hello");
	assert.equal(normalizePrompt("\n\ntask\n"), "task");
});

test("normalizePrompt: collapses blank inputs to empty string", () => {
	assert.equal(normalizePrompt(""), "");
	assert.equal(normalizePrompt("   "), "");
	assert.equal(normalizePrompt("\t\n  "), "");
});

test("normalizePrompt: coerces nullish to empty string", () => {
	assert.equal(normalizePrompt(undefined), "");
	assert.equal(normalizePrompt(null), "");
});

test("normalizePrompt: preserves interior whitespace and newlines", () => {
	const multi = "line one\nline two\n\nline four";
	assert.equal(normalizePrompt(`  ${multi}  `), multi);
});

// ── normalizeReason ─────────────────────────────────────────────────────

test("normalizeReason: trims and returns non-empty reasons", () => {
	assert.equal(normalizeReason("  context drift  "), "context drift");
});

test("normalizeReason: blank/whitespace/nullish all collapse to undefined", () => {
	assert.equal(normalizeReason(""), undefined);
	assert.equal(normalizeReason("   "), undefined);
	assert.equal(normalizeReason("\n\t"), undefined);
	assert.equal(normalizeReason(undefined), undefined);
	assert.equal(normalizeReason(null), undefined);
});

// ── buildHandoffSentinel / buildHandoffMessage ──────────────────────────

test("buildHandoffSentinel: produces the expected literal form", () => {
	assert.equal(
		buildHandoffSentinel("abc-123"),
		"<pi-handoff-resume:abc-123>",
	);
});

test("buildHandoffMessage: prepends sentinel + newline to prompt", () => {
	assert.equal(
		buildHandoffMessage("abc-123", "next task"),
		"<pi-handoff-resume:abc-123>\nnext task",
	);
});

test("buildHandoffMessage: preserves multi-line prompts verbatim", () => {
	const prompt = "## Context\nDo X\n## Task\nDo Y";
	assert.equal(
		buildHandoffMessage("id", prompt),
		`<pi-handoff-resume:id>\n${prompt}`,
	);
});

// ── extractSentinelId ───────────────────────────────────────────────────

test("extractSentinelId: returns id when sentinel present at start", () => {
	assert.equal(
		extractSentinelId("<pi-handoff-resume:abc-123>\nthe prompt"),
		"abc-123",
	);
});

test("extractSentinelId: tolerates missing trailing newline", () => {
	assert.equal(extractSentinelId("<pi-handoff-resume:xyz>"), "xyz");
});

test("extractSentinelId: returns undefined when no sentinel", () => {
	assert.equal(extractSentinelId("just a regular prompt"), undefined);
	assert.equal(extractSentinelId(""), undefined);
});

test("extractSentinelId: only matches when sentinel is at start (anchored)", () => {
	// Mid-string match should NOT trigger — anchored regex prevents the LLM
	// or another extension from sneaking in a sentinel via prompt content.
	assert.equal(
		extractSentinelId("preface text\n<pi-handoff-resume:fake>\nrest"),
		undefined,
	);
});

test("extractSentinelId: handles UUID-shaped ids", () => {
	const uuid = "018f3a2c-7b1e-7c4d-9e8a-1234567890ab";
	assert.equal(
		extractSentinelId(`<pi-handoff-resume:${uuid}>\nprompt`),
		uuid,
	);
});

// ── stripSentinel ───────────────────────────────────────────────────────

test("stripSentinel: removes sentinel and trailing newline cleanly", () => {
	assert.equal(
		stripSentinel("<pi-handoff-resume:id>\nthe prompt"),
		"the prompt",
	);
});

test("stripSentinel: leaves text unchanged when no sentinel", () => {
	assert.equal(stripSentinel("just a prompt"), "just a prompt");
	assert.equal(stripSentinel(""), "");
});

test("stripSentinel: does not strip mid-string sentinel (anchored)", () => {
	const text = "preface\n<pi-handoff-resume:fake>\nrest";
	assert.equal(stripSentinel(text), text);
});

// ── SENTINEL_RE direct ──────────────────────────────────────────────────

test("SENTINEL_RE: matches both ASCII and UUID-shaped ids", () => {
	assert.match("<pi-handoff-resume:simple>\n", SENTINEL_RE);
	assert.match(
		"<pi-handoff-resume:018f3a2c-7b1e-7c4d-9e8a-1234567890ab>\n",
		SENTINEL_RE,
	);
});

test("SENTINEL_RE: does not match when prefix differs", () => {
	assert.doesNotMatch("<pi-handoff:id>\n", SENTINEL_RE);
	assert.doesNotMatch("pi-handoff-resume:id\n", SENTINEL_RE);
});

// ── findPendingHandoff ──────────────────────────────────────────────────

test("findPendingHandoff: returns undefined when pendingIds is empty", () => {
	const messages = [user(buildHandoffMessage("id-1", "task"))];
	assert.equal(findPendingHandoff(messages, new Set()), undefined);
});

test("findPendingHandoff: returns undefined when no message has a sentinel", () => {
	const messages = [user("regular prompt"), assistant("response")];
	assert.equal(findPendingHandoff(messages, new Set(["id-1"])), undefined);
});

test("findPendingHandoff: finds a matching sentinel", () => {
	const messages = [
		user("first"),
		assistant("response"),
		user(buildHandoffMessage("id-1", "next task")),
	];
	assert.deepEqual(findPendingHandoff(messages, new Set(["id-1"])), {
		index: 2,
		id: "id-1",
	});
});

test("findPendingHandoff: ignores sentinel whose id is not pending", () => {
	// Past handoff sentinels persist in the session log but are inert when
	// their id is not the active one (e.g. a handoff from before a reload).
	const messages = [user(buildHandoffMessage("inactive", "old task"))];
	assert.equal(findPendingHandoff(messages, new Set(["different"])), undefined);
});

test("findPendingHandoff: picks the LATEST pending sentinel when several exist", () => {
	const messages = [
		user(buildHandoffMessage("id-1", "first")),
		assistant("ack"),
		user(buildHandoffMessage("id-2", "second")),
	];
	const pending = new Set(["id-1", "id-2"]);
	assert.deepEqual(findPendingHandoff(messages, pending), {
		index: 2,
		id: "id-2",
	});
});

test("findPendingHandoff: skips non-user messages even with sentinel-like text", () => {
	const messages = [
		assistant(buildHandoffMessage("id-1", "shouldn't match")),
		toolResult(buildHandoffMessage("id-1", "also shouldn't match")),
	];
	assert.equal(findPendingHandoff(messages, new Set(["id-1"])), undefined);
});

test("findPendingHandoff: skips non-text content (e.g. images)", () => {
	const messages = [
		{
			role: "user",
			content: [
				{ type: "image", data: "..." },
				{ type: "text", text: "no sentinel here" },
			],
		},
	];
	assert.equal(findPendingHandoff(messages, new Set(["id-1"])), undefined);
});

test("findPendingHandoff: only considers anchored sentinels (not mid-text)", () => {
	const messages = [
		user(`some preface\n<pi-handoff-resume:id-1>\nrest of message`),
	];
	assert.equal(findPendingHandoff(messages, new Set(["id-1"])), undefined);
});

// ── truncateForHandoff ──────────────────────────────────────────────────

test("truncateForHandoff: returns undefined when no pending sentinel matches", () => {
	const messages = [user("regular"), assistant("response")];
	assert.equal(truncateForHandoff(messages, new Set(["id-1"])), undefined);
});

test("truncateForHandoff: slices to the handoff message and strips the sentinel", () => {
	const messages = [
		user("task one"),
		assistant("doing task one"),
		toolResult("done"),
		user(buildHandoffMessage("id-1", "task two: do this fresh")),
	];
	const result = truncateForHandoff(messages, new Set(["id-1"]));
	assert.ok(result, "expected truncation result");
	assert.equal(result.matchedId, "id-1");
	assert.equal(result.messages.length, 1);
	assert.equal(result.messages[0].role, "user");
	const content = result.messages[0].content as Array<{
		type: string;
		text: string;
	}>;
	assert.equal(content.length, 1);
	assert.equal(content[0].text, "task two: do this fresh");
});

test("truncateForHandoff: keeps subsequent messages after the handoff verbatim", () => {
	// Unlikely in practice (followUp adds at end) but a sane invariant.
	const messages = [
		user("old task"),
		user(buildHandoffMessage("id-1", "new task")),
		assistant("started"),
	];
	const result = truncateForHandoff(messages, new Set(["id-1"]));
	assert.ok(result);
	assert.equal(result.messages.length, 2);
	assert.equal(
		(result.messages[0].content as Array<{ text: string }>)[0].text,
		"new task",
	);
	assert.equal(result.messages[1].role, "assistant");
});

test("truncateForHandoff: does NOT mutate the pendingIds set (caller's job)", () => {
	const pending = new Set(["id-1"]);
	const messages = [user(buildHandoffMessage("id-1", "task"))];
	truncateForHandoff(messages, pending);
	assert.equal(pending.has("id-1"), true, "set should still contain id");
});

test("truncateForHandoff: does NOT mutate the input messages array", () => {
	const messages = [
		user("first"),
		user(buildHandoffMessage("id-1", "second")),
	];
	const before = JSON.stringify(messages);
	truncateForHandoff(messages, new Set(["id-1"]));
	assert.equal(
		JSON.stringify(messages),
		before,
		"input messages should be untouched",
	);
});

test("truncateForHandoff: preserves non-text content alongside text in the handoff message", () => {
	const messages = [
		{
			role: "user",
			content: [
				{
					type: "text",
					text: buildHandoffMessage("id-1", "next task"),
				},
				{ type: "image", data: "abc" },
			],
		},
	];
	const result = truncateForHandoff(messages, new Set(["id-1"]));
	assert.ok(result);
	const content = result.messages[0].content as Array<{
		type: string;
		text?: string;
		data?: string;
	}>;
	assert.equal(content.length, 2);
	assert.equal(content[0].text, "next task");
	assert.equal(content[1].type, "image");
	assert.equal(content[1].data, "abc");
});

test("truncateForHandoff: picks the LATEST handoff when multiple are pending", () => {
	const messages = [
		user(buildHandoffMessage("id-1", "first task")),
		user(buildHandoffMessage("id-2", "second task")),
	];
	const result = truncateForHandoff(messages, new Set(["id-1", "id-2"]));
	assert.ok(result);
	assert.equal(result.matchedId, "id-2");
	assert.equal(result.messages.length, 1);
	assert.equal(
		(result.messages[0].content as Array<{ text: string }>)[0].text,
		"second task",
	);
});

test("truncateForHandoff: keeps matching across turns while the id stays pending", () => {
	// Regression guard: the id must NOT be removed after the first match. As
	// long as it stays in the pending set, every call keeps slicing to the
	// sentinel — that is what pins the context to the carry-over prompt for the
	// whole task instead of just one turn.
	const pending = new Set<string>();
	const messages = [user("task 1"), assistant("response")];

	// No pending markers: no truncation.
	assert.equal(truncateForHandoff(messages, pending), undefined);

	// Handoff fires; its followUp drains into the log.
	pending.add("id-1");
	messages.push(user(buildHandoffMessage("id-1", "task 2")));

	// Every subsequent turn keeps truncating to the same sentinel.
	for (let turn = 0; turn < 3; turn++) {
		const result = truncateForHandoff(messages, pending);
		assert.ok(result, `turn ${turn}: expected truncation`);
		assert.equal(result.matchedId, "id-1");
		assert.equal(result.messages[0].role, "user");
		assert.equal(
			(result.messages[0].content as Array<{ text: string }>)[0].text,
			"task 2",
		);
		messages.push(assistant(`step ${turn}`));
	}
});

// ── resolveHandoffContext (the per-turn decision used by index.ts) ──────────

test("resolveHandoffContext: returns undefined when no handoff is active", () => {
	const messages = [user(buildHandoffMessage("id-1", "task")), assistant("x")];
	assert.equal(resolveHandoffContext(messages, null), undefined);
});

test("resolveHandoffContext: slices to the active handoff and strips the sentinel", () => {
	const messages = [
		user("old task"),
		assistant("did it"),
		user(buildHandoffMessage("id-1", "new task")),
	];
	const result = resolveHandoffContext(messages, "id-1");
	assert.ok(result);
	assert.equal(result.length, 1);
	assert.equal(
		(result[0].content as Array<{ text: string }>)[0].text,
		"new task",
	);
});

test("resolveHandoffContext: returns undefined when the active sentinel is not in the log", () => {
	// Handoff staged but its followUp has not drained yet (or was aborted):
	// the sentinel isn't a message, so the context is left untouched.
	const messages = [user("ordinary"), assistant("reply")];
	assert.equal(resolveHandoffContext(messages, "id-1"), undefined);
});

test("resolveHandoffContext: PERSISTS truncation across every turn (regression)", () => {
	// The original bug consumed the handoff id after one turn, so the second
	// turn sent the full pre-handoff history again (context ballooned across a
	// task chain instead of resetting). Here we simulate several turns of a task
	// growing after a handoff and assert the context stays pinned to the
	// carry-over prompt on EVERY turn.
	const history = [user("task 11"), assistant("done 11"), toolResult("ok")];
	const messages = [
		...history,
		user(buildHandoffMessage("id-1", "task 12")),
	];
	const activeId = "id-1";

	for (let turn = 0; turn < 5; turn++) {
		const result = resolveHandoffContext(messages, activeId);
		assert.ok(result, `turn ${turn}: expected truncation`);
		// First visible message is always the (stripped) carry-over prompt.
		assert.equal(result[0].role, "user");
		assert.equal(
			(result[0].content as Array<{ text: string }>)[0].text.startsWith(
				"task 12",
			),
			true,
			`turn ${turn}: context not pinned to carry-over prompt`,
		);
		// Stale pre-handoff history must never leak back in.
		assert.equal(
			result.some((m) =>
				(m.content as Array<{ text?: string }>).some(
					(c) => c.text === "task 11",
				),
			),
			false,
			`turn ${turn}: stale pre-handoff history leaked back in`,
		);
		// The task does more work; the log grows.
		messages.push(assistant(`work step ${turn}`));
		messages.push(toolResult(`result ${turn}`));
	}
});

test("resolveHandoffContext: a later handoff supersedes the earlier one", () => {
	const messages = [
		user(buildHandoffMessage("id-1", "first task")),
		assistant("worked on first"),
		toolResult("done"),
		user(buildHandoffMessage("id-2", "second task")),
	];
	// index.ts overwrites activeHandoffId with the newest id on each handoff.
	const result = resolveHandoffContext(messages, "id-2");
	assert.ok(result);
	assert.equal(result.length, 1);
	assert.equal(
		(result[0].content as Array<{ text: string }>)[0].text,
		"second task",
	);
});