Add InferenceStats proto message and InferenceStatsPanel component that displays token counts, throughput, and context window utilization as a collapsible panel below assistant messages after orchestration completes. Closes #43 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
175 lines
5.4 KiB
TypeScript
175 lines
5.4 KiB
TypeScript
import type { ChatMessage } from '$lib/types';
|
|
import type { SubagentResult, InferenceStats } from '$lib/proto/llm_multiverse/v1/common_pb';
|
|
import type { SessionConfig } from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
|
|
import { processRequest, OrchestratorError, friendlyMessage } from '$lib/services/orchestrator';
|
|
import { OrchestrationState } from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
|
|
import { sessionStore } from '$lib/stores/sessions.svelte';
|
|
import { memoryStore } from '$lib/stores/memory.svelte';
|
|
import { auditStore } from '$lib/stores/audit.svelte';
|
|
import { logger } from '$lib/utils/logger';
|
|
|
|
export function createOrchestration() {
|
|
let isStreaming = $state(false);
|
|
let error: string | null = $state(null);
|
|
let lastFailedContent: string | null = $state(null);
|
|
let orchestrationState: OrchestrationState = $state(OrchestrationState.UNSPECIFIED);
|
|
let intermediateResult: string = $state('');
|
|
let finalResult: SubagentResult | null = $state(null);
|
|
let inferenceStats: InferenceStats | null = $state(null);
|
|
|
|
async function send(
|
|
sessionId: string,
|
|
content: string,
|
|
config: SessionConfig,
|
|
messages: ChatMessage[]
|
|
): Promise<ChatMessage[]> {
|
|
error = null;
|
|
lastFailedContent = null;
|
|
orchestrationState = OrchestrationState.UNSPECIFIED;
|
|
intermediateResult = '';
|
|
finalResult = null;
|
|
inferenceStats = null;
|
|
|
|
let lastAuditState = OrchestrationState.UNSPECIFIED;
|
|
|
|
const userMessage: ChatMessage = {
|
|
id: crypto.randomUUID(),
|
|
role: 'user',
|
|
content,
|
|
// eslint-disable-next-line svelte/prefer-svelte-reactivity -- plain data, not reactive state
|
|
timestamp: new Date()
|
|
};
|
|
messages.push(userMessage);
|
|
|
|
const assistantMessage: ChatMessage = {
|
|
id: crypto.randomUUID(),
|
|
role: 'assistant',
|
|
content: '',
|
|
// eslint-disable-next-line svelte/prefer-svelte-reactivity -- plain data, not reactive state
|
|
timestamp: new Date()
|
|
};
|
|
messages.push(assistantMessage);
|
|
|
|
sessionStore.updateMessages(sessionId, messages);
|
|
isStreaming = true;
|
|
|
|
try {
|
|
for await (const response of processRequest(sessionId, content, config)) {
|
|
orchestrationState = response.state;
|
|
|
|
if (response.state !== lastAuditState) {
|
|
const stateLabel = OrchestrationState[response.state] ?? String(response.state);
|
|
logger.debug('useOrchestration', `State: ${stateLabel}`, {
|
|
sessionId,
|
|
state: response.state
|
|
});
|
|
auditStore.addEvent(sessionId, {
|
|
eventType: 'state_change',
|
|
details: response.message || `State changed to ${stateLabel}`,
|
|
state: stateLabel
|
|
});
|
|
lastAuditState = response.state;
|
|
}
|
|
|
|
if (response.intermediateResult) {
|
|
intermediateResult = response.intermediateResult;
|
|
}
|
|
if (response.inferenceStats) {
|
|
inferenceStats = response.inferenceStats;
|
|
}
|
|
if (response.finalResult) {
|
|
finalResult = response.finalResult;
|
|
if (response.finalResult.newMemoryCandidates.length > 0) {
|
|
memoryStore.addCandidates(
|
|
sessionId,
|
|
response.finalResult.newMemoryCandidates.map((mc) => ({
|
|
content: mc.content,
|
|
source: mc.source,
|
|
confidence: mc.confidence
|
|
}))
|
|
);
|
|
}
|
|
}
|
|
const idx = messages.length - 1;
|
|
messages[idx] = {
|
|
...messages[idx],
|
|
content: response.message
|
|
};
|
|
}
|
|
} catch (err) {
|
|
const isOrcErr = err instanceof OrchestratorError;
|
|
const code = isOrcErr ? err.code : 'unknown';
|
|
const details = isOrcErr ? err.details : undefined;
|
|
const friendlyMsg = isOrcErr
|
|
? friendlyMessage(err.code)
|
|
: 'An unexpected error occurred';
|
|
const displayMsg =
|
|
code && code !== 'unknown'
|
|
? `${friendlyMsg} (${code})`
|
|
: friendlyMsg;
|
|
error = displayMsg;
|
|
lastFailedContent = content;
|
|
logger.error('useOrchestration', 'Request failed', { code, details });
|
|
auditStore.addEvent(sessionId, {
|
|
eventType: 'error',
|
|
details: `${friendlyMsg} | code=${code}${details ? ` | details=${details}` : ''}`
|
|
});
|
|
const idx = messages.length - 1;
|
|
messages[idx] = {
|
|
...messages[idx],
|
|
content: `\u26A0 ${displayMsg}`
|
|
};
|
|
} finally {
|
|
isStreaming = false;
|
|
sessionStore.updateMessages(sessionId, messages);
|
|
}
|
|
|
|
return messages;
|
|
}
|
|
|
|
function retry(
|
|
sessionId: string,
|
|
config: SessionConfig,
|
|
messages: ChatMessage[]
|
|
): { messages: ChatMessage[]; sending: boolean } {
|
|
if (!lastFailedContent) return { messages, sending: false };
|
|
const content = lastFailedContent;
|
|
// Remove the failed user+assistant pair before retrying
|
|
if (
|
|
messages.length >= 2 &&
|
|
messages[messages.length - 2].role === 'user' &&
|
|
messages[messages.length - 1].role === 'assistant' &&
|
|
messages[messages.length - 1].content.startsWith('\u26A0')
|
|
) {
|
|
messages = messages.slice(0, -2);
|
|
}
|
|
send(sessionId, content, config, messages);
|
|
return { messages, sending: true };
|
|
}
|
|
|
|
function reset() {
|
|
error = null;
|
|
finalResult = null;
|
|
inferenceStats = null;
|
|
}
|
|
|
|
return {
|
|
get isStreaming() { return isStreaming; },
|
|
get error() { return error; },
|
|
get lastFailedContent() { return lastFailedContent; },
|
|
get orchestrationState() { return orchestrationState; },
|
|
get intermediateResult() { return intermediateResult; },
|
|
get finalResult() { return finalResult; },
|
|
get inferenceStats() { return inferenceStats; },
|
|
send,
|
|
retry,
|
|
reset
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Module-level singleton so orchestration state survives tab/route changes.
|
|
* The chat page uses this instead of calling createOrchestration() per mount.
|
|
*/
|
|
export const orchestrationStore = createOrchestration();
|