Files
pi-extensions/ai-server/admin.ts
T
shahondin1624 39d0797dc9 Gate startup logs behind PI_DEBUG + skip GGUF-shard phantom entries
- All [ai-server] / [markdown-body-color] / [mechanicus-thinking-label]
  console.log calls now fire only when PI_DEBUG is set. Default boot is
  clean.
- ai-server's discoverModels now filters out ids matching
  /-\d+-of-\d+$/ — llama.cpp's --models-autoload registers every GGUF
  shard as its own id, duplicating the preset's consolidated model.
  These shard-named phantoms are no longer surfaced to pi.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 22:38:09 +02:00

208 lines
6.0 KiB
TypeScript

import { exec as execCb } from "node:child_process";
import * as https from "node:https";
import { URL } from "node:url";
import { promisify } from "node:util";
import {
AI_SERVER_PRESET_PATH,
AI_SERVER_SSH_HOST,
AI_SERVER_URL,
type ServerModel,
loadCerts,
} from "./config.js";
const exec = promisify(execCb);
// ─── HTTP router API (via Caddy mTLS) ────────────────────────────────────
async function routerRequest(
method: "GET" | "POST",
path: string,
body?: unknown,
): Promise<any> {
const certs = loadCerts();
const url = new URL(AI_SERVER_URL + path);
const data = body === undefined ? undefined : JSON.stringify(body);
return new Promise((resolve, reject) => {
const req = https.request(
{
hostname: url.hostname,
port: url.port ? Number(url.port) : 443,
path: url.pathname + url.search,
method,
headers: {
Accept: "application/json",
...(data !== undefined
? {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(data),
}
: {}),
},
ca: certs.ca,
cert: certs.cert,
key: certs.key,
timeout: 30_000,
},
(res) => {
let buf = "";
res.setEncoding("utf-8");
res.on("data", (chunk: string) => {
buf += chunk;
});
res.on("end", () => {
const sc = res.statusCode ?? 0;
if (sc < 200 || sc >= 300) {
reject(new Error(`HTTP ${sc}: ${buf.slice(0, 400).trim()}`));
return;
}
try {
resolve(buf.trim() ? JSON.parse(buf) : {});
} catch {
resolve({ raw: buf });
}
});
res.on("error", reject);
},
);
req.on("error", reject);
req.on("timeout", () => req.destroy(new Error("Router request timed out")));
if (data !== undefined) req.write(data);
req.end();
});
}
export interface RouterModel {
id: string;
status: { value: "loaded" | "unloaded" | "loading"; args: string[] };
}
export async function listModels(): Promise<RouterModel[]> {
const data = await routerRequest("GET", "/models");
return (data?.data ?? []) as RouterModel[];
}
export async function loadModel(id: string): Promise<unknown> {
// The router's handler reads `body["model"]`; passing `{id}` yields a 404.
return routerRequest("POST", "/models/load", { model: id });
}
export async function unloadModel(id: string): Promise<unknown> {
return routerRequest("POST", "/models/unload", { model: id });
}
export function extractCtxSize(m: RouterModel): number | null {
const args = m.status?.args ?? [];
const i = args.indexOf("--ctx-size");
if (i < 0 || i + 1 >= args.length) return null;
const n = Number(args[i + 1]);
return Number.isFinite(n) ? n : null;
}
// A preset is "runnable" only if it has a --model path. Placeholder sections
// like [small-7b] without model = ... show up in /models but have no --model
// arg and would fail on load.
function isRunnable(m: RouterModel): boolean {
return (m.status?.args ?? []).includes("--model");
}
// llama.cpp's --models-autoload scans the --models-dir and registers every
// .gguf (and GGUF-split shard) it finds. Multi-shard models surface as
// one id per shard with names like "<name>-00001-of-00003". These are
// duplicates of whatever preset section points at the first shard. Skip them.
function isShardArtefact(id: string): boolean {
return /-\d+-of-\d+$/.test(id);
}
export async function discoverModels(): Promise<ServerModel[]> {
const models = await listModels();
return models
.filter(isRunnable)
.filter((m) => !isShardArtefact(m.id))
.map((m) => {
const ctx = extractCtxSize(m) ?? 32768;
return {
id: m.id,
name: `${m.id} (AI Server)`,
reasoning: true,
contextWindow: ctx,
maxTokens: Math.min(16384, Math.max(2048, Math.floor(ctx / 2))),
};
});
}
// ─── SSH helpers ─────────────────────────────────────────────────────────
function shQuote(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
async function runSsh(remoteCmd: string, timeoutMs = 60_000): Promise<string> {
const cmd = `ssh -o BatchMode=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new ${shQuote(
AI_SERVER_SSH_HOST,
)} ${shQuote(remoteCmd)}`;
try {
const { stdout } = await exec(cmd, {
timeout: timeoutMs,
maxBuffer: 4 * 1024 * 1024,
});
return stdout;
} catch (err: any) {
const stderr = (err?.stderr ?? "").toString().trim();
const msg = stderr || err?.message || String(err);
throw new Error(`ssh ${AI_SERVER_SSH_HOST}: ${msg}`);
}
}
export async function readPreset(): Promise<string> {
return runSsh(`cat ${AI_SERVER_PRESET_PATH}`);
}
/**
* Set a `key = value` line inside a named [section] of the preset file.
* Preserves comments and all other lines. Errors if the key is absent.
*/
export async function setPresetKey(
section: string,
key: string,
value: string,
): Promise<void> {
const awkScript = `
awk -v sec="[${section}]" -v key=${shQuote(key)} -v val=${shQuote(value)} '
BEGIN { in_s = 0; found = 0 }
/^\\[/ { in_s = ($0 == sec) }
in_s && $1 == key && $2 == "=" { print key " = " val; found = 1; next }
{ print }
END { if (!found) exit 2 }
' ${AI_SERVER_PRESET_PATH} > ${AI_SERVER_PRESET_PATH}.tmp && mv ${AI_SERVER_PRESET_PATH}.tmp ${AI_SERVER_PRESET_PATH}
`.trim();
try {
await runSsh(awkScript);
} catch (err: any) {
const msg = err?.message ?? String(err);
if (msg.includes("exit code 2") || msg.match(/exit.*2/)) {
throw new Error(
`Key "${key}" not found in [${section}] — add it to the preset manually first.`,
);
}
throw err;
}
}
export async function restartService(): Promise<string> {
return runSsh(
"systemctl --user restart llama-server.service && systemctl --user is-active llama-server.service",
);
}
export async function reloadOneModel(id: string): Promise<void> {
try {
await unloadModel(id);
} catch {
// Ignore unload errors (model may not be loaded).
}
// Router needs a beat to clear the slot.
await new Promise((r) => setTimeout(r, 2000));
await loadModel(id);
}