fix generation token-rate disappearing on empty completions

A clean completion that emits a single token with no content delta never captured firstOutputMs, so the footer's generation rate (G) computed null while the processing rate (P) survived via the responseEndMs fallback — the stat visibly dropped out on those turns. Add findDisplayableTokenStats, which walks back to the most recent turn that has a usable generation rate so a degenerate turn no longer blanks the display, and point the footer at it. Falls back to the newest turn with any stats so P still shows when no turn has a generation rate. findLatestPiTokenStats (persistence) is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 20:33:22 +02:00
parent 853cef84af
commit c464f6b903
3 changed files with 121 additions and 1 deletions
@@ -125,6 +125,53 @@ export function findLatestPiTokenStats(entries: unknown): PiTokenStats | null {
 	return null;
 }

+/**
+ * Find the most recent assistant turn suitable for footer rate display.
+ *
+ * Unlike findLatestPiTokenStats (which inspects only the single newest
+ * assistant turn), this skips turns that have no usable generation rate — e.g.
+ * an empty / single-token completion where the model emitted no content delta,
+ * so `firstOutputMs` was never captured and `generationTokensPerSecond` is
+ * null. Such a turn would otherwise blank out the generation stat in the footer
+ * even though a good measurement exists one turn back: the processing rate
+ * survives (it falls back to `responseEndMs`), which is exactly why the footer
+ * ends up showing "P …" with the "G …" missing.
+ *
+ * Returns the newest turn whose generation rate is non-null; if no turn in the
+ * branch has one, falls back to the newest turn with ANY stats so the
+ * processing rate still shows; null only when no assistant turn has stats.
+ */
+export function findDisplayableTokenStats(entries: unknown): PiTokenStats | null {
+	const normalizedEntries = normalizePiEntries(entries);
+	let fallback: PiTokenStats | null = null;
+	for (let index = normalizedEntries.length - 1; index >= 0; index--) {
+		const entry = normalizedEntries[index];
+		if (!entry || typeof entry !== "object") {
+			continue;
+		}
+		const candidate = entry as {
+			type?: string;
+			message?: {
+				role?: string;
+			};
+		};
+		if (candidate.type !== "message" || candidate.message?.role !== "assistant") {
+			continue;
+		}
+		const stats = readPiTokenStats(candidate.message);
+		if (!stats) {
+			continue;
+		}
+		if (fallback === null) {
+			fallback = stats;
+		}
+		if (summarizePiTokenStats(stats).generationTokensPerSecond !== null) {
+			return stats;
+		}
+	}
+	return fallback;
+}
+
 function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
 	if (
 		tokens === undefined
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
 import { test } from "node:test";
 import {
 	buildPiTokenStatsEntry,
+	findDisplayableTokenStats,
 	findLatestPiTokenStats,
 	formatPiTokenRateStatus,
 	normalizePiEntries,
@@ -10,6 +11,33 @@ import {
 	summarizePiTokenStats,
 } from "../shared/token-stats.ts";

+// Assistant-message session entry fixture.
+function assistantEntry(piTokenStats: Record<string, number> | undefined) {
+	return {
+		type: "message",
+		message: { role: "assistant", ...(piTokenStats ? { piTokenStats } : {}) },
+	};
+}
+const userEntry = { type: "message", message: { role: "user" } };
+
+// A real "generation stat disappears" turn: the model finished cleanly with a
+// single completion token and no content delta, so firstOutputMs was never
+// captured. Generation rate is null; processing rate survives via responseEndMs.
+const degenerateStats = {
+	requestStartMs: 1000,
+	responseEndMs: 41000,
+	inputTokens: 28962,
+	outputTokens: 1,
+};
+const fullStats = {
+	requestStartMs: 1000,
+	firstOutputMs: 3000,
+	responseEndMs: 7000,
+	inputTokens: 800,
+	outputTokens: 200,
+	thinkingTokens: 300,
+};
+
 test("summarizePiTokenStats reports separate processing and generation rates", () => {
 	const summary = summarizePiTokenStats({
 		requestStartMs: 1000,
@@ -112,6 +140,50 @@ test("findLatestPiTokenStats treats non-array entries as empty", () => {
 	assert.deepEqual(normalizePiEntries(undefined), []);
 });

+test("findDisplayableTokenStats skips a trailing degenerate turn so the generation rate does not disappear", () => {
+	// Newest turn (degenerate, G null) would blank the generation stat; the
+	// footer should instead fall back to the prior turn that has a real rate.
+	const stats = findDisplayableTokenStats([
+		assistantEntry(fullStats),
+		userEntry,
+		assistantEntry(degenerateStats),
+	]);
+	assert.deepEqual(stats, fullStats);
+	assert.equal(
+		summarizePiTokenStats(stats!).generationTokensPerSecond !== null,
+		true,
+	);
+});
+
+test("findDisplayableTokenStats returns the newest turn when it already has a generation rate", () => {
+	assert.deepEqual(
+		findDisplayableTokenStats([
+			assistantEntry(degenerateStats),
+			userEntry,
+			assistantEntry(fullStats),
+		]),
+		fullStats,
+	);
+});
+
+test("findDisplayableTokenStats falls back to processing-only stats when no turn has a generation rate", () => {
+	// No usable generation rate anywhere — still show the newest turn so the
+	// processing rate (P) renders, rather than blanking the whole row.
+	const stats = findDisplayableTokenStats([assistantEntry(degenerateStats)]);
+	assert.ok(stats);
+	assert.equal(stats.firstOutputMs, undefined);
+	assert.equal(stats.outputTokens, 1);
+	const summary = summarizePiTokenStats(stats);
+	assert.equal(summary.generationTokensPerSecond, null);
+	assert.equal(formatPiTokenRateStatus(summary), "P 724.0tok/s");
+});
+
+test("findDisplayableTokenStats returns null when no assistant turn has stats", () => {
+	assert.equal(findDisplayableTokenStats([userEntry, assistantEntry(undefined)]), null);
+	assert.equal(findDisplayableTokenStats(undefined), null);
+	assert.equal(findDisplayableTokenStats({}), null);
+});
+
 test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
 	const built = buildPiTokenStatsEntry("assistant-123", {
 		requestStartMs: 10,
@@ -4,6 +4,7 @@ import { formatTokens } from "../shared/format.js";
 import { readExtensionBooleanSetting } from "../shared/pi-settings.js";
 import {
 	buildPiTokenStatsEntry,
+	findDisplayableTokenStats,
 	findLatestPiTokenStats,
 	formatPiTokenRateStatus,
 	normalizePiEntries,
@@ -73,7 +74,7 @@ function renderFooter(
 	const curTokens = usage?.tokens;
 	const maxTokens = usage?.contextWindow ?? ctx.model?.contextWindow ?? 0;
 	const pct = usage?.percent ?? null;
-	const latestTokenStats = findLatestPiTokenStats(entries);
+	const latestTokenStats = findDisplayableTokenStats(entries);
 	const tokenRateText =
 		latestTokenStats === null
 			? null