feat: add error handling, toast notifications, connection status, and retry logic (#20)

Add global error boundary, toast notification system for gRPC errors,
connection status indicator in chat header, and automatic retry with
exponential backoff for transient failures. Map gRPC status codes to
user-friendly messages and add a Retry button on failed requests.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
shahondin1624
2026-03-12 13:27:40 +01:00
parent 284f84bd39
commit 14c83832f5
10 changed files with 493 additions and 23 deletions

View File

@@ -7,6 +7,8 @@ import type {
} from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
import { create } from '@bufbuild/protobuf';
import { ProcessRequestRequestSchema } from '$lib/proto/llm_multiverse/v1/orchestrator_pb';
import { connectionStore } from '$lib/stores/connection.svelte';
import { toastStore } from '$lib/stores/toast.svelte';
/**
* Application-level error wrapping gRPC status codes.
@@ -22,6 +24,45 @@ export class OrchestratorError extends Error {
}
}
/**
* Map gRPC status codes to user-friendly messages.
*/
const GRPC_USER_MESSAGES: Record<string, string> = {
unavailable: 'The server is currently unavailable. Please try again later.',
deadline_exceeded: 'The request timed out. Please try again.',
cancelled: 'The request was cancelled.',
not_found: 'The requested resource was not found.',
already_exists: 'The resource already exists.',
permission_denied: 'You do not have permission to perform this action.',
resource_exhausted: 'Rate limit reached. Please wait a moment and try again.',
failed_precondition: 'The operation cannot be performed in the current state.',
aborted: 'The operation was aborted. Please try again.',
unimplemented: 'This feature is not yet available.',
internal: 'An internal server error occurred. Please try again.',
unauthenticated: 'Authentication required. Please log in.',
data_loss: 'Data loss detected. Please contact support.',
unknown: 'An unexpected error occurred. Please try again.'
};
/**
* Codes considered transient / retriable.
*/
const TRANSIENT_CODES = new Set(['unavailable', 'deadline_exceeded', 'aborted', 'internal']);
/**
* Return a user-friendly message for a gRPC error code.
*/
export function friendlyMessage(code: string): string {
return GRPC_USER_MESSAGES[code.toLowerCase()] ?? GRPC_USER_MESSAGES['unknown'];
}
/**
* Whether the given error code is transient and should be retried.
*/
function isTransient(code: string): boolean {
return TRANSIENT_CODES.has(code.toLowerCase());
}
const DEFAULT_ENDPOINT = '/';
let transport: ReturnType<typeof createGrpcWebTransport> | null = null;
@@ -49,9 +90,56 @@ function getClient(endpoint?: string) {
return createClient(OrchestratorService, getTransport(endpoint));
}
/**
* Sleep for a given number of milliseconds.
*/
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
/**
* Calculate exponential backoff delay with jitter.
* Base delay doubles each attempt: 1s, 2s, 4s (capped at 8s).
*/
function backoffDelay(attempt: number): number {
const base = Math.min(1000 * Math.pow(2, attempt), 8000);
const jitter = Math.random() * base * 0.25;
return base + jitter;
}
const MAX_RETRIES = 3;
/**
* Extract gRPC error code from an error, normalising to lowercase string.
*/
function extractCode(err: unknown): string {
if (err instanceof Error && 'code' in err) {
const raw = (err as { code: unknown }).code;
if (typeof raw === 'string') return raw.toLowerCase();
if (typeof raw === 'number') return String(raw);
}
return 'unknown';
}
/**
* Wrap an error into an OrchestratorError with a friendly message.
*/
function toOrchestratorError(err: unknown): OrchestratorError {
if (err instanceof OrchestratorError) return err;
if (err instanceof Error) {
const code = extractCode(err);
return new OrchestratorError(friendlyMessage(code), code, err.message);
}
return new OrchestratorError(friendlyMessage('unknown'), 'unknown');
}
/**
* Send a request to the orchestrator and yield streaming responses.
*
* Includes automatic retry with exponential backoff for transient failures.
* Updates the connection status store on success or failure and fires
* toast notifications on errors.
*
* Returns an async iterator of `ProcessRequestResponse` messages,
* each containing the current orchestration state, status message,
* and optionally intermediate or final results.
@@ -62,28 +150,48 @@ export async function* processRequest(
sessionConfig?: SessionConfig,
endpoint?: string
): AsyncGenerator<ProcessRequestResponse> {
const client = getClient(endpoint);
const request = create(ProcessRequestRequestSchema, {
sessionId,
userMessage,
sessionConfig
});
try {
for await (const response of client.processRequest(request)) {
yield response;
let lastError: OrchestratorError | null = null;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
if (attempt > 0) {
connectionStore.setReconnecting();
const delay = backoffDelay(attempt - 1);
await sleep(delay);
}
} catch (err: unknown) {
if (err instanceof Error) {
// ConnectError has a `code` property
const code = 'code' in err ? (err as { code: unknown }).code : undefined;
throw new OrchestratorError(
err.message,
typeof code === 'string' ? code : 'unknown',
err.message
);
try {
const client = getClient(endpoint);
for await (const response of client.processRequest(request)) {
connectionStore.reportSuccess();
yield response;
}
// Completed successfully — no retry needed
return;
} catch (err: unknown) {
lastError = toOrchestratorError(err);
const code = lastError.code;
if (isTransient(code) && attempt < MAX_RETRIES) {
// Will retry — continue loop
connectionStore.reportFailure();
continue;
}
// Non-transient or exhausted retries
connectionStore.reportFailure();
toastStore.addToast({ message: lastError.message, type: 'error' });
throw lastError;
}
throw new OrchestratorError('Unknown error', 'unknown');
}
// Should not reach here, but guard against it
if (lastError) {
throw lastError;
}
}