fix generation token-rate disappearing on empty completions
A clean completion that emits a single token with no content delta never captured firstOutputMs, so the footer's generation rate (G) computed null while the processing rate (P) survived via the responseEndMs fallback — the stat visibly dropped out on those turns. Add findDisplayableTokenStats, which walks back to the most recent turn that has a usable generation rate so a degenerate turn no longer blanks the display, and point the footer at it. Falls back to the newest turn with any stats so P still shows when no turn has a generation rate. findLatestPiTokenStats (persistence) is unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -125,6 +125,53 @@ export function findLatestPiTokenStats(entries: unknown): PiTokenStats | null {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the most recent assistant turn suitable for footer rate display.
|
||||||
|
*
|
||||||
|
* Unlike findLatestPiTokenStats (which inspects only the single newest
|
||||||
|
* assistant turn), this skips turns that have no usable generation rate — e.g.
|
||||||
|
* an empty / single-token completion where the model emitted no content delta,
|
||||||
|
* so `firstOutputMs` was never captured and `generationTokensPerSecond` is
|
||||||
|
* null. Such a turn would otherwise blank out the generation stat in the footer
|
||||||
|
* even though a good measurement exists one turn back: the processing rate
|
||||||
|
* survives (it falls back to `responseEndMs`), which is exactly why the footer
|
||||||
|
* ends up showing "P …" with the "G …" missing.
|
||||||
|
*
|
||||||
|
* Returns the newest turn whose generation rate is non-null; if no turn in the
|
||||||
|
* branch has one, falls back to the newest turn with ANY stats so the
|
||||||
|
* processing rate still shows; null only when no assistant turn has stats.
|
||||||
|
*/
|
||||||
|
export function findDisplayableTokenStats(entries: unknown): PiTokenStats | null {
|
||||||
|
const normalizedEntries = normalizePiEntries(entries);
|
||||||
|
let fallback: PiTokenStats | null = null;
|
||||||
|
for (let index = normalizedEntries.length - 1; index >= 0; index--) {
|
||||||
|
const entry = normalizedEntries[index];
|
||||||
|
if (!entry || typeof entry !== "object") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const candidate = entry as {
|
||||||
|
type?: string;
|
||||||
|
message?: {
|
||||||
|
role?: string;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
if (candidate.type !== "message" || candidate.message?.role !== "assistant") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const stats = readPiTokenStats(candidate.message);
|
||||||
|
if (!stats) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (fallback === null) {
|
||||||
|
fallback = stats;
|
||||||
|
}
|
||||||
|
if (summarizePiTokenStats(stats).generationTokensPerSecond !== null) {
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
|
||||||
function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
|
function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
|
||||||
if (
|
if (
|
||||||
tokens === undefined
|
tokens === undefined
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import assert from "node:assert/strict";
|
|||||||
import { test } from "node:test";
|
import { test } from "node:test";
|
||||||
import {
|
import {
|
||||||
buildPiTokenStatsEntry,
|
buildPiTokenStatsEntry,
|
||||||
|
findDisplayableTokenStats,
|
||||||
findLatestPiTokenStats,
|
findLatestPiTokenStats,
|
||||||
formatPiTokenRateStatus,
|
formatPiTokenRateStatus,
|
||||||
normalizePiEntries,
|
normalizePiEntries,
|
||||||
@@ -10,6 +11,33 @@ import {
|
|||||||
summarizePiTokenStats,
|
summarizePiTokenStats,
|
||||||
} from "../shared/token-stats.ts";
|
} from "../shared/token-stats.ts";
|
||||||
|
|
||||||
|
// Assistant-message session entry fixture.
|
||||||
|
function assistantEntry(piTokenStats: Record<string, number> | undefined) {
|
||||||
|
return {
|
||||||
|
type: "message",
|
||||||
|
message: { role: "assistant", ...(piTokenStats ? { piTokenStats } : {}) },
|
||||||
|
};
|
||||||
|
}
|
||||||
|
const userEntry = { type: "message", message: { role: "user" } };
|
||||||
|
|
||||||
|
// A real "generation stat disappears" turn: the model finished cleanly with a
|
||||||
|
// single completion token and no content delta, so firstOutputMs was never
|
||||||
|
// captured. Generation rate is null; processing rate survives via responseEndMs.
|
||||||
|
const degenerateStats = {
|
||||||
|
requestStartMs: 1000,
|
||||||
|
responseEndMs: 41000,
|
||||||
|
inputTokens: 28962,
|
||||||
|
outputTokens: 1,
|
||||||
|
};
|
||||||
|
const fullStats = {
|
||||||
|
requestStartMs: 1000,
|
||||||
|
firstOutputMs: 3000,
|
||||||
|
responseEndMs: 7000,
|
||||||
|
inputTokens: 800,
|
||||||
|
outputTokens: 200,
|
||||||
|
thinkingTokens: 300,
|
||||||
|
};
|
||||||
|
|
||||||
test("summarizePiTokenStats reports separate processing and generation rates", () => {
|
test("summarizePiTokenStats reports separate processing and generation rates", () => {
|
||||||
const summary = summarizePiTokenStats({
|
const summary = summarizePiTokenStats({
|
||||||
requestStartMs: 1000,
|
requestStartMs: 1000,
|
||||||
@@ -112,6 +140,50 @@ test("findLatestPiTokenStats treats non-array entries as empty", () => {
|
|||||||
assert.deepEqual(normalizePiEntries(undefined), []);
|
assert.deepEqual(normalizePiEntries(undefined), []);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("findDisplayableTokenStats skips a trailing degenerate turn so the generation rate does not disappear", () => {
|
||||||
|
// Newest turn (degenerate, G null) would blank the generation stat; the
|
||||||
|
// footer should instead fall back to the prior turn that has a real rate.
|
||||||
|
const stats = findDisplayableTokenStats([
|
||||||
|
assistantEntry(fullStats),
|
||||||
|
userEntry,
|
||||||
|
assistantEntry(degenerateStats),
|
||||||
|
]);
|
||||||
|
assert.deepEqual(stats, fullStats);
|
||||||
|
assert.equal(
|
||||||
|
summarizePiTokenStats(stats!).generationTokensPerSecond !== null,
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("findDisplayableTokenStats returns the newest turn when it already has a generation rate", () => {
|
||||||
|
assert.deepEqual(
|
||||||
|
findDisplayableTokenStats([
|
||||||
|
assistantEntry(degenerateStats),
|
||||||
|
userEntry,
|
||||||
|
assistantEntry(fullStats),
|
||||||
|
]),
|
||||||
|
fullStats,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("findDisplayableTokenStats falls back to processing-only stats when no turn has a generation rate", () => {
|
||||||
|
// No usable generation rate anywhere — still show the newest turn so the
|
||||||
|
// processing rate (P) renders, rather than blanking the whole row.
|
||||||
|
const stats = findDisplayableTokenStats([assistantEntry(degenerateStats)]);
|
||||||
|
assert.ok(stats);
|
||||||
|
assert.equal(stats.firstOutputMs, undefined);
|
||||||
|
assert.equal(stats.outputTokens, 1);
|
||||||
|
const summary = summarizePiTokenStats(stats);
|
||||||
|
assert.equal(summary.generationTokensPerSecond, null);
|
||||||
|
assert.equal(formatPiTokenRateStatus(summary), "P 724.0tok/s");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("findDisplayableTokenStats returns null when no assistant turn has stats", () => {
|
||||||
|
assert.equal(findDisplayableTokenStats([userEntry, assistantEntry(undefined)]), null);
|
||||||
|
assert.equal(findDisplayableTokenStats(undefined), null);
|
||||||
|
assert.equal(findDisplayableTokenStats({}), null);
|
||||||
|
});
|
||||||
|
|
||||||
test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
|
test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
|
||||||
const built = buildPiTokenStatsEntry("assistant-123", {
|
const built = buildPiTokenStatsEntry("assistant-123", {
|
||||||
requestStartMs: 10,
|
requestStartMs: 10,
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import { formatTokens } from "../shared/format.js";
|
|||||||
import { readExtensionBooleanSetting } from "../shared/pi-settings.js";
|
import { readExtensionBooleanSetting } from "../shared/pi-settings.js";
|
||||||
import {
|
import {
|
||||||
buildPiTokenStatsEntry,
|
buildPiTokenStatsEntry,
|
||||||
|
findDisplayableTokenStats,
|
||||||
findLatestPiTokenStats,
|
findLatestPiTokenStats,
|
||||||
formatPiTokenRateStatus,
|
formatPiTokenRateStatus,
|
||||||
normalizePiEntries,
|
normalizePiEntries,
|
||||||
@@ -73,7 +74,7 @@ function renderFooter(
|
|||||||
const curTokens = usage?.tokens;
|
const curTokens = usage?.tokens;
|
||||||
const maxTokens = usage?.contextWindow ?? ctx.model?.contextWindow ?? 0;
|
const maxTokens = usage?.contextWindow ?? ctx.model?.contextWindow ?? 0;
|
||||||
const pct = usage?.percent ?? null;
|
const pct = usage?.percent ?? null;
|
||||||
const latestTokenStats = findLatestPiTokenStats(entries);
|
const latestTokenStats = findDisplayableTokenStats(entries);
|
||||||
const tokenRateText =
|
const tokenRateText =
|
||||||
latestTokenStats === null
|
latestTokenStats === null
|
||||||
? null
|
? null
|
||||||
|
|||||||
Reference in New Issue
Block a user