Files
pi-extensions/ai-server/index.ts
T
shahondin1624 f1ceeb4363 Refactor pass: shared utils, non-blocking discovery, safer monkey-patches
Audit produced five concrete improvements:

1) New shared/ module (zero-dep pure utilities)
   - shared/ansi.ts: hexToRgb (throws on malformed input instead of
     silently producing NaN), fgFromHex, stripAnsi, visibleWidth,
     ANSI_RESET_FG / ANSI_RESET_ALL constants.
   - shared/format.ts: formatTokens, formatElapsed.
   - shared/ctx.ts: safely() and safelyAsync() helpers for dealing with
     pi's "stale after session replacement or reload" ExtensionRunner
     semantics.

   Removes duplicate helpers from mechanicus-footer, markdown-body-color,
   dark-mechanicus-indicator.

2) ai-server: non-blocking startup + short-race timeout
   - Factory registers STATIC_MODELS immediately so pi startup isn't
     blocked on the HTTPS round-trip.
   - Races discoverModels() against a 300ms timeout. On LAN (~40ms) the
     live list wins and pi --list-models sees the real models. Past the
     timeout, fallback remains and background discovery updates the
     provider later.
   - listModelsCached() with 5s TTL for tab completions (was firing a
     round-trip on every keystroke).
   - loadModel/unloadModel invalidate the cache.

3) dark-mechanicus-indicator: stale-ctx guard
   - Wrap the setInterval ticker body in safely() so a race between
     session_shutdown and the ticker can't crash node. Same pattern as
     the earlier footer fix.

4) Safer monkey-patches in markdown-body-color and mechanicus-thinking-label
   - Feature-detect Markdown/Editor/AssistantMessageComponent's target
     method before patching. Warn-and-skip rather than silently create
     a broken prototype if a pi-tui upgrade renames the internal method.

5) Minor
   - Replaced five `as any` casts with typed Record<string, unknown>
     access in the monkey-patch sites.
   - ai-server debug log only fires when actual discovery succeeds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 00:05:18 +02:00

277 lines
7.8 KiB
TypeScript

import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import {
discoverModels,
extractCtxSize,
listModels,
listModelsCached,
loadModel,
readPreset,
reloadOneModel,
restartService,
setPresetKey,
unloadModel,
} from "./admin.js";
import {
AI_SERVER_API_BASE,
AI_SERVER_API_ID,
AI_SERVER_PROVIDER_ID,
AI_SERVER_URL,
MODELS as STATIC_MODELS,
type ServerModel,
} from "./config.js";
import { streamAiServer } from "./stream.js";
async function completeModelId(prefix: string) {
try {
// Cached for 5s. Tab-completion calls the completer on every keystroke,
// but the user typically only types one model id per command — caching
// deduplicates the network round-trip without stale-state harm.
const models = await listModelsCached();
const hits = models
.filter((m) => m.id.startsWith(prefix))
.map((m) => ({ value: m.id, label: m.id }));
return hits.length > 0 ? hits : null;
} catch {
return null;
}
}
function registerProviderWithModels(
pi: ExtensionAPI,
models: ServerModel[],
): void {
pi.registerProvider(AI_SERVER_PROVIDER_ID, {
baseUrl: AI_SERVER_API_BASE,
apiKey: "ai-server-mtls",
api: AI_SERVER_API_ID as any,
models: models.map((m) => ({
id: m.id,
name: m.name,
reasoning: m.reasoning,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: m.contextWindow,
maxTokens: m.maxTokens,
})),
streamSimple: streamAiServer,
});
}
const DISCOVERY_FAST_TIMEOUT_MS = 300;
export default async function (pi: ExtensionAPI) {
// Register the provider IMMEDIATELY with the static fallback list so pi
// startup isn't blocked on the HTTPS round-trip in the worst case.
registerProviderWithModels(pi, STATIC_MODELS);
// Then race real discovery against a short timeout. On LAN the router
// answers in ~40ms and pi --list-models sees the live list. On slow
// networks we bail at 300ms and the fallback is what the user sees; the
// background promise keeps running and re-registers later.
const discovery = discoverModels().catch((err) => {
if (process.env.PI_DEBUG) {
console.log(
`[ai-server] Discovery failed (${(err as Error).message}); fallback remains`,
);
}
return null;
});
const timeout = new Promise<null>((r) =>
setTimeout(() => r(null), DISCOVERY_FAST_TIMEOUT_MS),
);
const fastResult = await Promise.race([discovery, timeout]);
if (fastResult && fastResult.length > 0) {
registerProviderWithModels(pi, fastResult);
if (process.env.PI_DEBUG) {
console.log(
`[ai-server] Discovered ${fastResult.length} model(s) on ${AI_SERVER_URL}: ${fastResult.map((m) => m.id).join(", ")}`,
);
}
} else {
// Slow network or discovery still pending — keep waiting in the
// background and update the provider once it arrives.
discovery.then((models) => {
if (models && models.length > 0) {
registerProviderWithModels(pi, models);
if (process.env.PI_DEBUG) {
console.log(
`[ai-server] Late discovery: ${models.length} model(s)`,
);
}
}
});
}
// ─── Admin commands ──────────────────────────────────────────────────
pi.registerCommand("ai-server-status", {
description: "Show ai-server model status and ctx sizes",
handler: async (_args, ctx) => {
try {
const routerModels = await listModels();
const lines = [`AI Server: ${AI_SERVER_URL}`];
for (const m of routerModels) {
const status = m.status?.value ?? "?";
const ctx = extractCtxSize(m);
const hasModel = (m.status?.args ?? []).includes("--model");
const marker = hasModel ? " " : " [no model path]";
lines.push(
` ${m.id} [${status}] ctx=${ctx ?? "?"}${marker}`,
);
}
ctx.ui.notify(lines.join("\n"), "info");
} catch (err) {
ctx.ui.notify(
`ai-server-status failed: ${(err as Error).message}`,
"error",
);
}
},
});
pi.registerCommand("ai-server-refresh", {
description: "Re-discover models from the ai-server router",
handler: async (_args, ctx) => {
try {
const discovered = await discoverModels();
if (discovered.length === 0) {
ctx.ui.notify(
"No runnable models on server (all presets missing --model path)",
"warning",
);
return;
}
registerProviderWithModels(pi, discovered);
ctx.ui.notify(
`Registered ${discovered.length} model(s): ${discovered
.map((m) => m.id)
.join(", ")}`,
"info",
);
} catch (err) {
ctx.ui.notify(
`Refresh failed: ${(err as Error).message}`,
"error",
);
}
},
});
pi.registerCommand("ai-server-load", {
description: "Load a model (usage: /ai-server-load <id>)",
getArgumentCompletions: completeModelId,
handler: async (args, ctx) => {
const id = args.trim();
if (!id) {
ctx.ui.notify("Usage: /ai-server-load <model-id>", "error");
return;
}
try {
ctx.ui.setStatus("ai-server", `Loading ${id}`);
await loadModel(id);
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(`Loaded ${id}`, "info");
} catch (err) {
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(`Load failed: ${(err as Error).message}`, "error");
}
},
});
pi.registerCommand("ai-server-unload", {
description: "Unload a model (usage: /ai-server-unload <id>)",
getArgumentCompletions: completeModelId,
handler: async (args, ctx) => {
const id = args.trim();
if (!id) {
ctx.ui.notify("Usage: /ai-server-unload <model-id>", "error");
return;
}
try {
await unloadModel(id);
ctx.ui.notify(`Unloaded ${id}`, "info");
} catch (err) {
ctx.ui.notify(`Unload failed: ${(err as Error).message}`, "error");
}
},
});
pi.registerCommand("ai-server-ctx", {
description:
"Set ctx-size for a model and reload (usage: /ai-server-ctx <id> <size>)",
getArgumentCompletions: completeModelId,
handler: async (args, ctx) => {
const parts = args.trim().split(/\s+/).filter(Boolean);
if (parts.length !== 2) {
ctx.ui.notify("Usage: /ai-server-ctx <model-id> <size>", "error");
return;
}
const [id, sizeStr] = parts;
const size = Number(sizeStr);
if (!Number.isInteger(size) || size < 512) {
ctx.ui.notify(`Invalid size: ${sizeStr}`, "error");
return;
}
const ok = await ctx.ui.confirm(
"Set ctx-size?",
`Edit preset [${id}] → ctx-size=${size} and reload the model?`,
);
if (!ok) return;
try {
ctx.ui.setStatus("ai-server", "Editing preset…");
await setPresetKey(id, "ctx-size", String(size));
ctx.ui.setStatus("ai-server", `Reloading ${id}`);
await reloadOneModel(id);
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(`${id}: ctx-size=${size}, reloaded`, "info");
} catch (err) {
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(
`ctx update failed: ${(err as Error).message}`,
"error",
);
}
},
});
pi.registerCommand("ai-server-preset", {
description: "Print ~/.llama-models.ini on the ai-server",
handler: async (_args, ctx) => {
try {
const text = await readPreset();
ctx.ui.notify(text, "info");
} catch (err) {
ctx.ui.notify(
`Preset read failed: ${(err as Error).message}`,
"error",
);
}
},
});
pi.registerCommand("ai-server-restart", {
description: "Restart the ai-server llama-server service",
handler: async (_args, ctx) => {
const ok = await ctx.ui.confirm(
"Restart llama-server?",
"This unloads all models and kills in-flight requests.",
);
if (!ok) return;
try {
ctx.ui.setStatus("ai-server", "Restarting…");
const status = await restartService();
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(`Service: ${status.trim()}`, "info");
} catch (err) {
ctx.ui.setStatus("ai-server", undefined);
ctx.ui.notify(
`Restart failed: ${(err as Error).message}`,
"error",
);
}
},
});
}