f1ceeb4363
Audit produced five concrete improvements:
1) New shared/ module (zero-dep pure utilities)
- shared/ansi.ts: hexToRgb (throws on malformed input instead of
silently producing NaN), fgFromHex, stripAnsi, visibleWidth,
ANSI_RESET_FG / ANSI_RESET_ALL constants.
- shared/format.ts: formatTokens, formatElapsed.
- shared/ctx.ts: safely() and safelyAsync() helpers for dealing with
pi's "stale after session replacement or reload" ExtensionRunner
semantics.
Removes duplicate helpers from mechanicus-footer, markdown-body-color,
dark-mechanicus-indicator.
2) ai-server: non-blocking startup + short-race timeout
- Factory registers STATIC_MODELS immediately so pi startup isn't
blocked on the HTTPS round-trip.
- Races discoverModels() against a 300ms timeout. On LAN (~40ms) the
live list wins and pi --list-models sees the real models. Past the
timeout, fallback remains and background discovery updates the
provider later.
- listModelsCached() with 5s TTL for tab completions (was firing a
round-trip on every keystroke).
- loadModel/unloadModel invalidate the cache.
3) dark-mechanicus-indicator: stale-ctx guard
- Wrap the setInterval ticker body in safely() so a race between
session_shutdown and the ticker can't crash node. Same pattern as
the earlier footer fix.
4) Safer monkey-patches in markdown-body-color and mechanicus-thinking-label
- Feature-detect Markdown/Editor/AssistantMessageComponent's target
method before patching. Warn-and-skip rather than silently create
a broken prototype if a pi-tui upgrade renames the internal method.
5) Minor
- Replaced five `as any` casts with typed Record<string, unknown>
access in the monkey-patch sites.
- ai-server debug log only fires when actual discovery succeeds.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
277 lines
7.8 KiB
TypeScript
277 lines
7.8 KiB
TypeScript
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
import {
|
|
discoverModels,
|
|
extractCtxSize,
|
|
listModels,
|
|
listModelsCached,
|
|
loadModel,
|
|
readPreset,
|
|
reloadOneModel,
|
|
restartService,
|
|
setPresetKey,
|
|
unloadModel,
|
|
} from "./admin.js";
|
|
import {
|
|
AI_SERVER_API_BASE,
|
|
AI_SERVER_API_ID,
|
|
AI_SERVER_PROVIDER_ID,
|
|
AI_SERVER_URL,
|
|
MODELS as STATIC_MODELS,
|
|
type ServerModel,
|
|
} from "./config.js";
|
|
import { streamAiServer } from "./stream.js";
|
|
|
|
async function completeModelId(prefix: string) {
|
|
try {
|
|
// Cached for 5s. Tab-completion calls the completer on every keystroke,
|
|
// but the user typically only types one model id per command — caching
|
|
// deduplicates the network round-trip without stale-state harm.
|
|
const models = await listModelsCached();
|
|
const hits = models
|
|
.filter((m) => m.id.startsWith(prefix))
|
|
.map((m) => ({ value: m.id, label: m.id }));
|
|
return hits.length > 0 ? hits : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
function registerProviderWithModels(
|
|
pi: ExtensionAPI,
|
|
models: ServerModel[],
|
|
): void {
|
|
pi.registerProvider(AI_SERVER_PROVIDER_ID, {
|
|
baseUrl: AI_SERVER_API_BASE,
|
|
apiKey: "ai-server-mtls",
|
|
api: AI_SERVER_API_ID as any,
|
|
models: models.map((m) => ({
|
|
id: m.id,
|
|
name: m.name,
|
|
reasoning: m.reasoning,
|
|
input: ["text"],
|
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
contextWindow: m.contextWindow,
|
|
maxTokens: m.maxTokens,
|
|
})),
|
|
streamSimple: streamAiServer,
|
|
});
|
|
}
|
|
|
|
const DISCOVERY_FAST_TIMEOUT_MS = 300;
|
|
|
|
export default async function (pi: ExtensionAPI) {
|
|
// Register the provider IMMEDIATELY with the static fallback list so pi
|
|
// startup isn't blocked on the HTTPS round-trip in the worst case.
|
|
registerProviderWithModels(pi, STATIC_MODELS);
|
|
|
|
// Then race real discovery against a short timeout. On LAN the router
|
|
// answers in ~40ms and pi --list-models sees the live list. On slow
|
|
// networks we bail at 300ms and the fallback is what the user sees; the
|
|
// background promise keeps running and re-registers later.
|
|
const discovery = discoverModels().catch((err) => {
|
|
if (process.env.PI_DEBUG) {
|
|
console.log(
|
|
`[ai-server] Discovery failed (${(err as Error).message}); fallback remains`,
|
|
);
|
|
}
|
|
return null;
|
|
});
|
|
const timeout = new Promise<null>((r) =>
|
|
setTimeout(() => r(null), DISCOVERY_FAST_TIMEOUT_MS),
|
|
);
|
|
const fastResult = await Promise.race([discovery, timeout]);
|
|
|
|
if (fastResult && fastResult.length > 0) {
|
|
registerProviderWithModels(pi, fastResult);
|
|
if (process.env.PI_DEBUG) {
|
|
console.log(
|
|
`[ai-server] Discovered ${fastResult.length} model(s) on ${AI_SERVER_URL}: ${fastResult.map((m) => m.id).join(", ")}`,
|
|
);
|
|
}
|
|
} else {
|
|
// Slow network or discovery still pending — keep waiting in the
|
|
// background and update the provider once it arrives.
|
|
discovery.then((models) => {
|
|
if (models && models.length > 0) {
|
|
registerProviderWithModels(pi, models);
|
|
if (process.env.PI_DEBUG) {
|
|
console.log(
|
|
`[ai-server] Late discovery: ${models.length} model(s)`,
|
|
);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
// ─── Admin commands ──────────────────────────────────────────────────
|
|
|
|
pi.registerCommand("ai-server-status", {
|
|
description: "Show ai-server model status and ctx sizes",
|
|
handler: async (_args, ctx) => {
|
|
try {
|
|
const routerModels = await listModels();
|
|
const lines = [`AI Server: ${AI_SERVER_URL}`];
|
|
for (const m of routerModels) {
|
|
const status = m.status?.value ?? "?";
|
|
const ctx = extractCtxSize(m);
|
|
const hasModel = (m.status?.args ?? []).includes("--model");
|
|
const marker = hasModel ? " " : " [no model path]";
|
|
lines.push(
|
|
` ${m.id} [${status}] ctx=${ctx ?? "?"}${marker}`,
|
|
);
|
|
}
|
|
ctx.ui.notify(lines.join("\n"), "info");
|
|
} catch (err) {
|
|
ctx.ui.notify(
|
|
`ai-server-status failed: ${(err as Error).message}`,
|
|
"error",
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-refresh", {
|
|
description: "Re-discover models from the ai-server router",
|
|
handler: async (_args, ctx) => {
|
|
try {
|
|
const discovered = await discoverModels();
|
|
if (discovered.length === 0) {
|
|
ctx.ui.notify(
|
|
"No runnable models on server (all presets missing --model path)",
|
|
"warning",
|
|
);
|
|
return;
|
|
}
|
|
registerProviderWithModels(pi, discovered);
|
|
ctx.ui.notify(
|
|
`Registered ${discovered.length} model(s): ${discovered
|
|
.map((m) => m.id)
|
|
.join(", ")}`,
|
|
"info",
|
|
);
|
|
} catch (err) {
|
|
ctx.ui.notify(
|
|
`Refresh failed: ${(err as Error).message}`,
|
|
"error",
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-load", {
|
|
description: "Load a model (usage: /ai-server-load <id>)",
|
|
getArgumentCompletions: completeModelId,
|
|
handler: async (args, ctx) => {
|
|
const id = args.trim();
|
|
if (!id) {
|
|
ctx.ui.notify("Usage: /ai-server-load <model-id>", "error");
|
|
return;
|
|
}
|
|
try {
|
|
ctx.ui.setStatus("ai-server", `Loading ${id}…`);
|
|
await loadModel(id);
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(`Loaded ${id}`, "info");
|
|
} catch (err) {
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(`Load failed: ${(err as Error).message}`, "error");
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-unload", {
|
|
description: "Unload a model (usage: /ai-server-unload <id>)",
|
|
getArgumentCompletions: completeModelId,
|
|
handler: async (args, ctx) => {
|
|
const id = args.trim();
|
|
if (!id) {
|
|
ctx.ui.notify("Usage: /ai-server-unload <model-id>", "error");
|
|
return;
|
|
}
|
|
try {
|
|
await unloadModel(id);
|
|
ctx.ui.notify(`Unloaded ${id}`, "info");
|
|
} catch (err) {
|
|
ctx.ui.notify(`Unload failed: ${(err as Error).message}`, "error");
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-ctx", {
|
|
description:
|
|
"Set ctx-size for a model and reload (usage: /ai-server-ctx <id> <size>)",
|
|
getArgumentCompletions: completeModelId,
|
|
handler: async (args, ctx) => {
|
|
const parts = args.trim().split(/\s+/).filter(Boolean);
|
|
if (parts.length !== 2) {
|
|
ctx.ui.notify("Usage: /ai-server-ctx <model-id> <size>", "error");
|
|
return;
|
|
}
|
|
const [id, sizeStr] = parts;
|
|
const size = Number(sizeStr);
|
|
if (!Number.isInteger(size) || size < 512) {
|
|
ctx.ui.notify(`Invalid size: ${sizeStr}`, "error");
|
|
return;
|
|
}
|
|
const ok = await ctx.ui.confirm(
|
|
"Set ctx-size?",
|
|
`Edit preset [${id}] → ctx-size=${size} and reload the model?`,
|
|
);
|
|
if (!ok) return;
|
|
try {
|
|
ctx.ui.setStatus("ai-server", "Editing preset…");
|
|
await setPresetKey(id, "ctx-size", String(size));
|
|
ctx.ui.setStatus("ai-server", `Reloading ${id}…`);
|
|
await reloadOneModel(id);
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(`${id}: ctx-size=${size}, reloaded`, "info");
|
|
} catch (err) {
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(
|
|
`ctx update failed: ${(err as Error).message}`,
|
|
"error",
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-preset", {
|
|
description: "Print ~/.llama-models.ini on the ai-server",
|
|
handler: async (_args, ctx) => {
|
|
try {
|
|
const text = await readPreset();
|
|
ctx.ui.notify(text, "info");
|
|
} catch (err) {
|
|
ctx.ui.notify(
|
|
`Preset read failed: ${(err as Error).message}`,
|
|
"error",
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
pi.registerCommand("ai-server-restart", {
|
|
description: "Restart the ai-server llama-server service",
|
|
handler: async (_args, ctx) => {
|
|
const ok = await ctx.ui.confirm(
|
|
"Restart llama-server?",
|
|
"This unloads all models and kills in-flight requests.",
|
|
);
|
|
if (!ok) return;
|
|
try {
|
|
ctx.ui.setStatus("ai-server", "Restarting…");
|
|
const status = await restartService();
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(`Service: ${status.trim()}`, "info");
|
|
} catch (err) {
|
|
ctx.ui.setStatus("ai-server", undefined);
|
|
ctx.ui.notify(
|
|
`Restart failed: ${(err as Error).message}`,
|
|
"error",
|
|
);
|
|
}
|
|
},
|
|
});
|
|
|
|
}
|