fix generation token-rate disappearing on empty completions

A clean completion that emits a single token with no content delta never
captured firstOutputMs, so the footer's generation rate (G) computed null while
the processing rate (P) survived via the responseEndMs fallback — the stat
visibly dropped out on those turns.

Add findDisplayableTokenStats, which walks back to the most recent turn that has
a usable generation rate so a degenerate turn no longer blanks the display, and
point the footer at it. Falls back to the newest turn with any stats so P still
shows when no turn has a generation rate. findLatestPiTokenStats (persistence)
is unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
shahondin1624
2026-05-28 20:33:22 +02:00
parent 853cef84af
commit c464f6b903
3 changed files with 121 additions and 1 deletions
+47
View File
@@ -125,6 +125,53 @@ export function findLatestPiTokenStats(entries: unknown): PiTokenStats | null {
return null;
}
/**
* Find the most recent assistant turn suitable for footer rate display.
*
* Unlike findLatestPiTokenStats (which inspects only the single newest
* assistant turn), this skips turns that have no usable generation rate — e.g.
* an empty / single-token completion where the model emitted no content delta,
* so `firstOutputMs` was never captured and `generationTokensPerSecond` is
* null. Such a turn would otherwise blank out the generation stat in the footer
* even though a good measurement exists one turn back: the processing rate
* survives (it falls back to `responseEndMs`), which is exactly why the footer
* ends up showing "P …" with the "G …" missing.
*
* Returns the newest turn whose generation rate is non-null; if no turn in the
* branch has one, falls back to the newest turn with ANY stats so the
* processing rate still shows; null only when no assistant turn has stats.
*/
export function findDisplayableTokenStats(entries: unknown): PiTokenStats | null {
const normalizedEntries = normalizePiEntries(entries);
let fallback: PiTokenStats | null = null;
for (let index = normalizedEntries.length - 1; index >= 0; index--) {
const entry = normalizedEntries[index];
if (!entry || typeof entry !== "object") {
continue;
}
const candidate = entry as {
type?: string;
message?: {
role?: string;
};
};
if (candidate.type !== "message" || candidate.message?.role !== "assistant") {
continue;
}
const stats = readPiTokenStats(candidate.message);
if (!stats) {
continue;
}
if (fallback === null) {
fallback = stats;
}
if (summarizePiTokenStats(stats).generationTokensPerSecond !== null) {
return stats;
}
}
return fallback;
}
function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
if (
tokens === undefined
+72
View File
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
import { test } from "node:test";
import {
buildPiTokenStatsEntry,
findDisplayableTokenStats,
findLatestPiTokenStats,
formatPiTokenRateStatus,
normalizePiEntries,
@@ -10,6 +11,33 @@ import {
summarizePiTokenStats,
} from "../shared/token-stats.ts";
// Assistant-message session entry fixture.
function assistantEntry(piTokenStats: Record<string, number> | undefined) {
return {
type: "message",
message: { role: "assistant", ...(piTokenStats ? { piTokenStats } : {}) },
};
}
const userEntry = { type: "message", message: { role: "user" } };
// A real "generation stat disappears" turn: the model finished cleanly with a
// single completion token and no content delta, so firstOutputMs was never
// captured. Generation rate is null; processing rate survives via responseEndMs.
const degenerateStats = {
requestStartMs: 1000,
responseEndMs: 41000,
inputTokens: 28962,
outputTokens: 1,
};
const fullStats = {
requestStartMs: 1000,
firstOutputMs: 3000,
responseEndMs: 7000,
inputTokens: 800,
outputTokens: 200,
thinkingTokens: 300,
};
test("summarizePiTokenStats reports separate processing and generation rates", () => {
const summary = summarizePiTokenStats({
requestStartMs: 1000,
@@ -112,6 +140,50 @@ test("findLatestPiTokenStats treats non-array entries as empty", () => {
assert.deepEqual(normalizePiEntries(undefined), []);
});
test("findDisplayableTokenStats skips a trailing degenerate turn so the generation rate does not disappear", () => {
// Newest turn (degenerate, G null) would blank the generation stat; the
// footer should instead fall back to the prior turn that has a real rate.
const stats = findDisplayableTokenStats([
assistantEntry(fullStats),
userEntry,
assistantEntry(degenerateStats),
]);
assert.deepEqual(stats, fullStats);
assert.equal(
summarizePiTokenStats(stats!).generationTokensPerSecond !== null,
true,
);
});
test("findDisplayableTokenStats returns the newest turn when it already has a generation rate", () => {
assert.deepEqual(
findDisplayableTokenStats([
assistantEntry(degenerateStats),
userEntry,
assistantEntry(fullStats),
]),
fullStats,
);
});
test("findDisplayableTokenStats falls back to processing-only stats when no turn has a generation rate", () => {
// No usable generation rate anywhere — still show the newest turn so the
// processing rate (P) renders, rather than blanking the whole row.
const stats = findDisplayableTokenStats([assistantEntry(degenerateStats)]);
assert.ok(stats);
assert.equal(stats.firstOutputMs, undefined);
assert.equal(stats.outputTokens, 1);
const summary = summarizePiTokenStats(stats);
assert.equal(summary.generationTokensPerSecond, null);
assert.equal(formatPiTokenRateStatus(summary), "P 724.0tok/s");
});
test("findDisplayableTokenStats returns null when no assistant turn has stats", () => {
assert.equal(findDisplayableTokenStats([userEntry, assistantEntry(undefined)]), null);
assert.equal(findDisplayableTokenStats(undefined), null);
assert.equal(findDisplayableTokenStats({}), null);
});
test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
const built = buildPiTokenStatsEntry("assistant-123", {
requestStartMs: 10,
+2 -1
View File
@@ -4,6 +4,7 @@ import { formatTokens } from "../shared/format.js";
import { readExtensionBooleanSetting } from "../shared/pi-settings.js";
import {
buildPiTokenStatsEntry,
findDisplayableTokenStats,
findLatestPiTokenStats,
formatPiTokenRateStatus,
normalizePiEntries,
@@ -73,7 +74,7 @@ function renderFooter(
const curTokens = usage?.tokens;
const maxTokens = usage?.contextWindow ?? ctx.model?.contextWindow ?? 0;
const pct = usage?.percent ?? null;
const latestTokenStats = findLatestPiTokenStats(entries);
const latestTokenStats = findDisplayableTokenStats(entries);
const tokenRateText =
latestTokenStats === null
? null