Files
pi-extensions/shared/token-stats.ts
T
shahondin1624 c464f6b903 fix generation token-rate disappearing on empty completions
A clean completion that emits a single token with no content delta never
captured firstOutputMs, so the footer's generation rate (G) computed null while
the processing rate (P) survived via the responseEndMs fallback — the stat
visibly dropped out on those turns.

Add findDisplayableTokenStats, which walks back to the most recent turn that has
a usable generation rate so a degenerate turn no longer blanks the display, and
point the footer at it. Falls back to the newest turn with any stats so P still
shows when no turn has a generation rate. findLatestPiTokenStats (persistence)
is unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 20:33:22 +02:00

241 lines
6.9 KiB
TypeScript

export interface PiTokenStats {
requestStartMs: number;
firstOutputMs?: number;
responseEndMs?: number;
inputTokens?: number;
outputTokens?: number;
thinkingTokens?: number;
}
export interface PiTokenRateSummary {
processingTokensPerSecond: number | null;
generationTokensPerSecond: number | null;
}
export interface PiTokenStatsEntry {
assistantEntryId: string;
stats: PiTokenStats;
summary: PiTokenRateSummary;
}
function readNumber(value: unknown): number | null {
if (typeof value !== "number" || !Number.isFinite(value)) {
return null;
}
return value;
}
export function captureFirstOutput(stats: PiTokenStats, nowMs: number): void {
if (stats.firstOutputMs === undefined && Number.isFinite(nowMs)) {
stats.firstOutputMs = nowMs;
}
}
export function finalizePiTokenStats(
stats: PiTokenStats,
usage: { input: number; output: number; thinking?: number },
nowMs: number,
): PiTokenStats {
return {
requestStartMs: stats.requestStartMs,
firstOutputMs: stats.firstOutputMs,
responseEndMs: nowMs,
inputTokens: usage.input,
outputTokens: usage.output,
thinkingTokens: usage.thinking,
};
}
export function readPiTokenStats(message: unknown): PiTokenStats | null {
if (!message || typeof message !== "object") {
return null;
}
const candidate = (message as { piTokenStats?: unknown }).piTokenStats;
if (!candidate || typeof candidate !== "object") {
return null;
}
const stats = candidate as Record<string, unknown>;
const requestStartMs = readNumber(stats.requestStartMs);
if (requestStartMs === null) {
return null;
}
const firstOutputMs = readNumber(stats.firstOutputMs) ?? undefined;
const responseEndMs = readNumber(stats.responseEndMs) ?? undefined;
const inputTokens = readNumber(stats.inputTokens) ?? undefined;
const outputTokens = readNumber(stats.outputTokens) ?? undefined;
const thinkingTokens = readNumber(stats.thinkingTokens) ?? undefined;
return {
requestStartMs,
firstOutputMs,
responseEndMs,
inputTokens,
outputTokens,
thinkingTokens,
};
}
export function normalizePiEntries(entries: unknown): readonly unknown[] {
if (Array.isArray(entries)) {
return entries;
}
if (entries && typeof entries === "object") {
const candidate = entries as {
entries?: unknown;
[Symbol.iterator]?: unknown;
};
if (Array.isArray(candidate.entries)) {
return candidate.entries;
}
if (typeof candidate[Symbol.iterator] === "function") {
try {
return Array.from(candidate as Iterable<unknown>);
} catch {
return [];
}
}
}
return [];
}
export function findLatestPiTokenStats(entries: unknown): PiTokenStats | null {
const normalizedEntries = normalizePiEntries(entries);
for (let index = normalizedEntries.length - 1; index >= 0; index--) {
const entry = normalizedEntries[index];
if (!entry || typeof entry !== "object") {
continue;
}
const candidate = entry as {
type?: string;
message?: {
role?: string;
};
};
if (candidate.type !== "message") {
continue;
}
if (candidate.message?.role !== "assistant") {
continue;
}
const stats = readPiTokenStats(candidate.message);
if (stats) {
return stats;
}
break;
}
return null;
}
/**
* Find the most recent assistant turn suitable for footer rate display.
*
* Unlike findLatestPiTokenStats (which inspects only the single newest
* assistant turn), this skips turns that have no usable generation rate — e.g.
* an empty / single-token completion where the model emitted no content delta,
* so `firstOutputMs` was never captured and `generationTokensPerSecond` is
* null. Such a turn would otherwise blank out the generation stat in the footer
* even though a good measurement exists one turn back: the processing rate
* survives (it falls back to `responseEndMs`), which is exactly why the footer
* ends up showing "P …" with the "G …" missing.
*
* Returns the newest turn whose generation rate is non-null; if no turn in the
* branch has one, falls back to the newest turn with ANY stats so the
* processing rate still shows; null only when no assistant turn has stats.
*/
export function findDisplayableTokenStats(entries: unknown): PiTokenStats | null {
const normalizedEntries = normalizePiEntries(entries);
let fallback: PiTokenStats | null = null;
for (let index = normalizedEntries.length - 1; index >= 0; index--) {
const entry = normalizedEntries[index];
if (!entry || typeof entry !== "object") {
continue;
}
const candidate = entry as {
type?: string;
message?: {
role?: string;
};
};
if (candidate.type !== "message" || candidate.message?.role !== "assistant") {
continue;
}
const stats = readPiTokenStats(candidate.message);
if (!stats) {
continue;
}
if (fallback === null) {
fallback = stats;
}
if (summarizePiTokenStats(stats).generationTokensPerSecond !== null) {
return stats;
}
}
return fallback;
}
function computeRate(tokens: number | undefined, startMs: number | undefined, endMs: number | undefined): number | null {
if (
tokens === undefined
|| startMs === undefined
|| endMs === undefined
|| tokens <= 0
|| endMs <= startMs
) {
return null;
}
return tokens / ((endMs - startMs) / 1000);
}
export function summarizePiTokenStats(stats: PiTokenStats): PiTokenRateSummary {
const processingEndMs = stats.firstOutputMs ?? stats.responseEndMs;
const generatedTokens = (stats.outputTokens ?? 0) + (stats.thinkingTokens ?? 0);
return {
processingTokensPerSecond: computeRate(stats.inputTokens, stats.requestStartMs, processingEndMs),
generationTokensPerSecond: computeRate(generatedTokens, stats.firstOutputMs, stats.responseEndMs),
};
}
export function formatPiTokenRateStatus(summary: PiTokenRateSummary): string | null {
const parts: string[] = [];
if (summary.processingTokensPerSecond !== null) {
parts.push(`P ${summary.processingTokensPerSecond.toFixed(1)}tok/s`);
}
if (summary.generationTokensPerSecond !== null) {
parts.push(`G ${summary.generationTokensPerSecond.toFixed(1)}tok/s`);
}
return parts.length > 0 ? parts.join(" ") : null;
}
export function buildPiTokenStatsEntry(assistantEntryId: string, stats: PiTokenStats): PiTokenStatsEntry {
return {
assistantEntryId,
stats,
summary: summarizePiTokenStats(stats),
};
}
export function readPiTokenStatsEntry(entry: unknown, customType: string): PiTokenStatsEntry | null {
if (!entry || typeof entry !== "object") {
return null;
}
const candidate = entry as {
type?: string;
customType?: string;
data?: unknown;
};
if (candidate.type !== "custom" || candidate.customType !== customType) {
return null;
}
if (!candidate.data || typeof candidate.data !== "object") {
return null;
}
const data = candidate.data as Record<string, unknown>;
if (typeof data.assistantEntryId !== "string" || data.assistantEntryId.length === 0) {
return null;
}
const stats = readPiTokenStats({ piTokenStats: data.stats });
if (!stats) {
return null;
}
return buildPiTokenStatsEntry(data.assistantEntryId, stats);
}