Files
llm-multiverse-ui/src/lib/composables/useOrchestration.svelte.ts
shahondin1624 52eaf661c4 feat: display inference statistics in chat UI
Add InferenceStats proto message and InferenceStatsPanel component that
displays token counts, throughput, and context window utilization as a
collapsible panel below assistant messages after orchestration completes.

Closes #43

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-13 14:44:12 +01:00

175 lines
5.4 KiB
TypeScript

import type { ChatMessage } from '$lib/types';
import type { SubagentResult, InferenceStats } from '$lib/proto/llm_multiverse/v1/common_pb';
import type { SessionConfig } from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
import { processRequest, OrchestratorError, friendlyMessage } from '$lib/services/orchestrator';
import { OrchestrationState } from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
import { sessionStore } from '$lib/stores/sessions.svelte';
import { memoryStore } from '$lib/stores/memory.svelte';
import { auditStore } from '$lib/stores/audit.svelte';
import { logger } from '$lib/utils/logger';
export function createOrchestration() {
let isStreaming = $state(false);
let error: string | null = $state(null);
let lastFailedContent: string | null = $state(null);
let orchestrationState: OrchestrationState = $state(OrchestrationState.UNSPECIFIED);
let intermediateResult: string = $state('');
let finalResult: SubagentResult | null = $state(null);
let inferenceStats: InferenceStats | null = $state(null);
async function send(
sessionId: string,
content: string,
config: SessionConfig,
messages: ChatMessage[]
): Promise<ChatMessage[]> {
error = null;
lastFailedContent = null;
orchestrationState = OrchestrationState.UNSPECIFIED;
intermediateResult = '';
finalResult = null;
inferenceStats = null;
let lastAuditState = OrchestrationState.UNSPECIFIED;
const userMessage: ChatMessage = {
id: crypto.randomUUID(),
role: 'user',
content,
// eslint-disable-next-line svelte/prefer-svelte-reactivity -- plain data, not reactive state
timestamp: new Date()
};
messages.push(userMessage);
const assistantMessage: ChatMessage = {
id: crypto.randomUUID(),
role: 'assistant',
content: '',
// eslint-disable-next-line svelte/prefer-svelte-reactivity -- plain data, not reactive state
timestamp: new Date()
};
messages.push(assistantMessage);
sessionStore.updateMessages(sessionId, messages);
isStreaming = true;
try {
for await (const response of processRequest(sessionId, content, config)) {
orchestrationState = response.state;
if (response.state !== lastAuditState) {
const stateLabel = OrchestrationState[response.state] ?? String(response.state);
logger.debug('useOrchestration', `State: ${stateLabel}`, {
sessionId,
state: response.state
});
auditStore.addEvent(sessionId, {
eventType: 'state_change',
details: response.message || `State changed to ${stateLabel}`,
state: stateLabel
});
lastAuditState = response.state;
}
if (response.intermediateResult) {
intermediateResult = response.intermediateResult;
}
if (response.inferenceStats) {
inferenceStats = response.inferenceStats;
}
if (response.finalResult) {
finalResult = response.finalResult;
if (response.finalResult.newMemoryCandidates.length > 0) {
memoryStore.addCandidates(
sessionId,
response.finalResult.newMemoryCandidates.map((mc) => ({
content: mc.content,
source: mc.source,
confidence: mc.confidence
}))
);
}
}
const idx = messages.length - 1;
messages[idx] = {
...messages[idx],
content: response.message
};
}
} catch (err) {
const isOrcErr = err instanceof OrchestratorError;
const code = isOrcErr ? err.code : 'unknown';
const details = isOrcErr ? err.details : undefined;
const friendlyMsg = isOrcErr
? friendlyMessage(err.code)
: 'An unexpected error occurred';
const displayMsg =
code && code !== 'unknown'
? `${friendlyMsg} (${code})`
: friendlyMsg;
error = displayMsg;
lastFailedContent = content;
logger.error('useOrchestration', 'Request failed', { code, details });
auditStore.addEvent(sessionId, {
eventType: 'error',
details: `${friendlyMsg} | code=${code}${details ? ` | details=${details}` : ''}`
});
const idx = messages.length - 1;
messages[idx] = {
...messages[idx],
content: `\u26A0 ${displayMsg}`
};
} finally {
isStreaming = false;
sessionStore.updateMessages(sessionId, messages);
}
return messages;
}
function retry(
sessionId: string,
config: SessionConfig,
messages: ChatMessage[]
): { messages: ChatMessage[]; sending: boolean } {
if (!lastFailedContent) return { messages, sending: false };
const content = lastFailedContent;
// Remove the failed user+assistant pair before retrying
if (
messages.length >= 2 &&
messages[messages.length - 2].role === 'user' &&
messages[messages.length - 1].role === 'assistant' &&
messages[messages.length - 1].content.startsWith('\u26A0')
) {
messages = messages.slice(0, -2);
}
send(sessionId, content, config, messages);
return { messages, sending: true };
}
function reset() {
error = null;
finalResult = null;
inferenceStats = null;
}
return {
get isStreaming() { return isStreaming; },
get error() { return error; },
get lastFailedContent() { return lastFailedContent; },
get orchestrationState() { return orchestrationState; },
get intermediateResult() { return intermediateResult; },
get finalResult() { return finalResult; },
get inferenceStats() { return inferenceStats; },
send,
retry,
reset
};
}
/**
* Module-level singleton so orchestration state survives tab/route changes.
* The chat page uses this instead of calling createOrchestration() per mount.
*/
export const orchestrationStore = createOrchestration();