Files
pi-extensions/tests/token-stats.test.ts
T
shahondin1624 c464f6b903 fix generation token-rate disappearing on empty completions
A clean completion that emits a single token with no content delta never
captured firstOutputMs, so the footer's generation rate (G) computed null while
the processing rate (P) survived via the responseEndMs fallback — the stat
visibly dropped out on those turns.

Add findDisplayableTokenStats, which walks back to the most recent turn that has
a usable generation rate so a degenerate turn no longer blanks the display, and
point the footer at it. Falls back to the newest turn with any stats so P still
shows when no turn has a generation rate. findLatestPiTokenStats (persistence)
is unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 20:33:22 +02:00

208 lines
5.5 KiB
TypeScript

import assert from "node:assert/strict";
import { test } from "node:test";
import {
buildPiTokenStatsEntry,
findDisplayableTokenStats,
findLatestPiTokenStats,
formatPiTokenRateStatus,
normalizePiEntries,
readPiTokenStatsEntry,
readPiTokenStats,
summarizePiTokenStats,
} from "../shared/token-stats.ts";
// Assistant-message session entry fixture.
function assistantEntry(piTokenStats: Record<string, number> | undefined) {
return {
type: "message",
message: { role: "assistant", ...(piTokenStats ? { piTokenStats } : {}) },
};
}
const userEntry = { type: "message", message: { role: "user" } };
// A real "generation stat disappears" turn: the model finished cleanly with a
// single completion token and no content delta, so firstOutputMs was never
// captured. Generation rate is null; processing rate survives via responseEndMs.
const degenerateStats = {
requestStartMs: 1000,
responseEndMs: 41000,
inputTokens: 28962,
outputTokens: 1,
};
const fullStats = {
requestStartMs: 1000,
firstOutputMs: 3000,
responseEndMs: 7000,
inputTokens: 800,
outputTokens: 200,
thinkingTokens: 300,
};
test("summarizePiTokenStats reports separate processing and generation rates", () => {
const summary = summarizePiTokenStats({
requestStartMs: 1000,
firstOutputMs: 3000,
responseEndMs: 7000,
inputTokens: 800,
outputTokens: 200,
thinkingTokens: 300,
});
assert.equal(summary.processingTokensPerSecond, 400);
assert.equal(summary.generationTokensPerSecond, 125);
assert.equal(formatPiTokenRateStatus(summary), "P 400.0tok/s G 125.0tok/s");
});
test("summarizePiTokenStats falls back to response end when no output token arrived", () => {
const summary = summarizePiTokenStats({
requestStartMs: 1000,
responseEndMs: 5000,
inputTokens: 200,
outputTokens: 0,
});
assert.equal(summary.processingTokensPerSecond, 50);
assert.equal(summary.generationTokensPerSecond, null);
assert.equal(formatPiTokenRateStatus(summary), "P 50.0tok/s");
});
test("readPiTokenStats rejects missing timing metadata", () => {
assert.equal(readPiTokenStats({ role: "assistant" }), null);
assert.deepEqual(
readPiTokenStats({
piTokenStats: {
requestStartMs: 10,
firstOutputMs: 20,
responseEndMs: 30,
inputTokens: 40,
outputTokens: 50,
thinkingTokens: 60,
},
}),
{
requestStartMs: 10,
firstOutputMs: 20,
responseEndMs: 30,
inputTokens: 40,
outputTokens: 50,
thinkingTokens: 60,
},
);
});
test("findLatestPiTokenStats ignores older assistant messages after the latest assistant turn", () => {
assert.deepEqual(
findLatestPiTokenStats([
{
type: "message",
message: {
role: "assistant",
piTokenStats: {
requestStartMs: 1,
responseEndMs: 2,
outputTokens: 3,
},
},
},
{
type: "message",
message: {
role: "user",
},
},
{
type: "message",
message: {
role: "assistant",
piTokenStats: {
requestStartMs: 10,
firstOutputMs: 20,
responseEndMs: 30,
inputTokens: 40,
outputTokens: 50,
thinkingTokens: 60,
},
},
},
]),
{
requestStartMs: 10,
firstOutputMs: 20,
responseEndMs: 30,
inputTokens: 40,
outputTokens: 50,
thinkingTokens: 60,
},
);
});
test("findLatestPiTokenStats treats non-array entries as empty", () => {
assert.equal(findLatestPiTokenStats(undefined), null);
assert.equal(findLatestPiTokenStats({}), null);
assert.deepEqual(normalizePiEntries(undefined), []);
});
test("findDisplayableTokenStats skips a trailing degenerate turn so the generation rate does not disappear", () => {
// Newest turn (degenerate, G null) would blank the generation stat; the
// footer should instead fall back to the prior turn that has a real rate.
const stats = findDisplayableTokenStats([
assistantEntry(fullStats),
userEntry,
assistantEntry(degenerateStats),
]);
assert.deepEqual(stats, fullStats);
assert.equal(
summarizePiTokenStats(stats!).generationTokensPerSecond !== null,
true,
);
});
test("findDisplayableTokenStats returns the newest turn when it already has a generation rate", () => {
assert.deepEqual(
findDisplayableTokenStats([
assistantEntry(degenerateStats),
userEntry,
assistantEntry(fullStats),
]),
fullStats,
);
});
test("findDisplayableTokenStats falls back to processing-only stats when no turn has a generation rate", () => {
// No usable generation rate anywhere — still show the newest turn so the
// processing rate (P) renders, rather than blanking the whole row.
const stats = findDisplayableTokenStats([assistantEntry(degenerateStats)]);
assert.ok(stats);
assert.equal(stats.firstOutputMs, undefined);
assert.equal(stats.outputTokens, 1);
const summary = summarizePiTokenStats(stats);
assert.equal(summary.generationTokensPerSecond, null);
assert.equal(formatPiTokenRateStatus(summary), "P 724.0tok/s");
});
test("findDisplayableTokenStats returns null when no assistant turn has stats", () => {
assert.equal(findDisplayableTokenStats([userEntry, assistantEntry(undefined)]), null);
assert.equal(findDisplayableTokenStats(undefined), null);
assert.equal(findDisplayableTokenStats({}), null);
});
test("buildPiTokenStatsEntry round-trips through readPiTokenStatsEntry", () => {
const built = buildPiTokenStatsEntry("assistant-123", {
requestStartMs: 10,
firstOutputMs: 20,
responseEndMs: 30,
inputTokens: 40,
outputTokens: 50,
thinkingTokens: 60,
});
assert.deepEqual(
readPiTokenStatsEntry(
{
type: "custom",
customType: "token-stats-turn",
data: built,
},
"token-stats-turn",
),
built,
);
});