fix generation token-rate disappearing on empty completions

A clean completion that emits a single token with no content delta never captured firstOutputMs, so the footer's generation rate (G) computed null while the processing rate (P) survived via the responseEndMs fallback — the stat visibly dropped out on those turns. Add findDisplayableTokenStats, which walks back to the most recent turn that has a usable generation rate so a degenerate turn no longer blanks the display, and point the footer at it. Falls back to the newest turn with any stats so P still shows when no turn has a generation rate. findLatestPiTokenStats (persistence) is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 20:33:22 +02:00
parent 853cef84af
commit c464f6b903
3 changed files with 121 additions and 1 deletions
@@ -125,6 +125,53 @@ export function findLatestPiTokenStats(entries: unknown): PiTokenStats | null {
 	return null;
 }
 /**
 * Find the most recent assistant turn suitable for footer rate display.
 *
 * Unlike findLatestPiTokenStats (which inspects only the single newest
 * assistant turn), this skips turns that have no usable generation rate — e.g.
 * an empty / single-token completion where the model emitted no content delta,
 * so `firstOutputMs` was never captured and `generationTokensPerSecond` is
 * null. Such a turn would otherwise blank out the generation stat in the footer
 * even though a good measurement exists one turn back: the processing rate
 * survives (it falls back to `responseEndMs`), which is exactly why the footer
 * ends up showing "P …" with the "G …" missing.
 *
 * Returns the newest turn whose generation rate is non-null; if no turn in the
 * branch has one, falls back to the newest turn with ANY stats so the
 * processing rate still shows; null only when no assistant turn has stats.
 */
 export function findDisplayableTokenStats(entries: unknown): PiTokenStats | null {
 	const normalizedEntries = normalizePiEntries(entries);
 	let fallback: PiTokenStats | null = null;
 	for (let index = normalizedEntries.length - 1; index >= 0; index--) {
 		const entry = normalizedEntries[index];
 		if (!entry || typeof entry !== "object") {
 			continue;
 		}
 		const candidate = entry as {
 			type?: string;
 			message?: {
 				role?: string;
 			};
 		};
 		if (candidate.type !== "message" || candidate.message?.role !== "assistant") {
 			continue;
 		}
 		const stats = readPiTokenStats(candidate.message);
 		if (!stats) {
 			continue;
 		}
 		if (fallback === null) {
 			fallback = stats;
 		}
 		if (summarizePiTokenStats(stats).generationTokensPerSecond !== null) {
 			return stats;
 		}
 	}
 	return fallback;
 }
 function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
 	if (
 		tokens === undefined
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
 import { test } from "node:test";
 import {
 	buildPiTokenStatsEntry,
 	findDisplayableTokenStats,
 	findLatestPiTokenStats,
 	formatPiTokenRateStatus,
 	normalizePiEntries,
@@ -10,6 +11,33 @@ import {
 	summarizePiTokenStats,
 } from "../shared/token-stats.ts";
 // Assistant-message session entry fixture.
 function assistantEntry(piTokenStats: Record<string, number> | undefined) {
 	return {
 		type: "message",
 		message: { role: "assistant", ...(piTokenStats ? { piTokenStats } : {}) },
 	};
 }
 const userEntry = { type: "message", message: { role: "user" } };
 // A real "generation stat disappears" turn: the model finished cleanly with a
 // single completion token and no content delta, so firstOutputMs was never
 // captured. Generation rate is null; processing rate survives via responseEndMs.
 const degenerateStats = {
 	requestStartMs: 1000,
 	responseEndMs: 41000,
 	inputTokens: 28962,
 	outputTokens: 1,
 };
 const fullStats = {
 	requestStartMs: 1000,
 	firstOutputMs: 3000,
 	responseEndMs: 7000,
 	inputTokens: 800,
 	outputTokens: 200,
 	thinkingTokens: 300,
 };
 test("summarizePiTokenStats reports separate processing and generation rates", () => {
 	const summary = summarizePiTokenStats({
 		requestStartMs: 1000,
@@ -112,6 +140,50 @@ test("findLatestPiTokenStats treats non-array entries as empty", () => {
 	assert.deepEqual(normalizePiEntries(undefined), []);
 });
 test("findDisplayableTokenStats skips a trailing degenerate turn so the generation rate does not disappear", () => {
 	// Newest turn (degenerate, G null) would blank the generation stat; the
 	// footer should instead fall back to the prior turn that has a real rate.
 	const stats = findDisplayableTokenStats([
 		assistantEntry(fullStats),
 		userEntry,
 		assistantEntry(degenerateStats),
 	]);
 	assert.deepEqual(stats, fullStats);
 	assert.equal(
 		summarizePiTokenStats(stats!).generationTokensPerSecond !== null,
 		true,
 	);
 });
 test("findDisplayableTokenStats returns the newest turn when it already has a generation rate", () => {
 	assert.deepEqual(
 		findDisplayableTokenStats([
 			assistantEntry(degenerateStats),
 			userEntry,
 			assistantEntry(fullStats),
 		]),
 		fullStats,
 	);
 });
 test("findDisplayableTokenStats falls back to processing-only stats when no turn has a generation rate", () => {
 	// No usable generation rate anywhere — still show the newest turn so the
 	// processing rate (P) renders, rather than blanking the whole row.
 	const stats = findDisplayableTokenStats([assistantEntry(degenerateStats)]);
 	assert.ok(stats);
 	assert.equal(stats.firstOutputMs, undefined);
 	assert.equal(stats.outputTokens, 1);
 	const summary = summarizePiTokenStats(stats);
 	assert.equal(summary.generationTokensPerSecond, null);
 	assert.equal(formatPiTokenRateStatus(summary), "P 724.0tok/s");
 });
 test("findDisplayableTokenStats returns null when no assistant turn has stats", () => {
 	assert.equal(findDisplayableTokenStats([userEntry, assistantEntry(undefined)]), null);
 	assert.equal(findDisplayableTokenStats(undefined), null);
 	assert.equal(findDisplayableTokenStats({}), null);
 });
 test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
 	const built = buildPiTokenStatsEntry("assistant-123", {
 		requestStartMs: 10,
@@ -4,6 +4,7 @@ import { formatTokens } from "../shared/format.js";
 import { readExtensionBooleanSetting } from "../shared/pi-settings.js";
 import {
 	buildPiTokenStatsEntry,
 	findDisplayableTokenStats,
 	findLatestPiTokenStats,
 	formatPiTokenRateStatus,
 	normalizePiEntries,
@@ -73,7 +74,7 @@ function renderFooter(
 	const curTokens = usage?.tokens;
 	const maxTokens = usage?.contextWindow ?? ctx.model?.contextWindow ?? 0;
 	const pct = usage?.percent ?? null;
-	const latestTokenStats = findLatestPiTokenStats(entries);
+	const latestTokenStats = findDisplayableTokenStats(entries);
 	const tokenRateText =
 		latestTokenStats === null
 			? null