Files
pi-extensions/llama.cpp/llama.cpp.test.mjs
T
shahondin1624 3876968bfa make llama.cpp base URL configurable via settings + document live-symlink dev setup
Resolve the local llama.cpp provider's server URL from LLAMA_BASE_URL env →
localLlama.baseUrl in settings.json → built-in default, reading settings inline
(node:fs) so the flat-copy test build stays self-contained. A PI_SETTINGS_PATH
override keeps the suite deterministic across hosts.

Document the live-development workflow of symlinking each extension dir AND
shared/ into ~/.pi/agent/extensions/, with a warning that a symlinked extension
paired with a stale copied shared/ silently loads the wrong helpers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 08:23:25 +02:00

938 lines
34 KiB
JavaScript

import assert from "node:assert/strict";
import { fileURLToPath } from "node:url";
import http from "node:http";
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { stripTypeScriptTypes } from "node:module";
import { tmpdir } from "node:os";
import { dirname, join } from "node:path";
import test from "node:test";
import { pathToFileURL } from "node:url";
const __dirname = dirname(fileURLToPath(import.meta.url));
// ─── Helpers ────────────────────────────────────────────────────────────────
/** Build a compiled .mjs module with a unique output dir. */
function buildCompiledModule() {
const outputDir = mkdtempSync(join(tmpdir(), "llama-test-"));
const modules = {};
for (const srcFile of ["config.ts", "discovery.ts", "model-utils.ts", "index.ts"]) {
const source = readFileSync(join(__dirname, srcFile), "utf8");
const compiled = stripTypeScriptTypes(source, { mode: "transform" });
const baseName = srcFile.replace(/\.ts$/, ".js");
const destPath = join(outputDir, baseName);
writeFileSync(destPath, compiled, "utf8");
modules[baseName.replace(/\.js$/, "")] = destPath;
}
return { outputDir, modules };
}
/** Import the compiled index.js, forcing fresh load. */
async function importModule(outputDir) {
return import(`${pathToFileURL(join(outputDir, "index.js")).href}?v=${Date.now()}-${Math.random()}`);
}
/**
* Start an HTTP server on a random port that responds to /v1/models.
* Returns { server, port, url }.
*/
function startMockServer(models, statusCode = 200) {
return new Promise((resolve, reject) => {
const server = http.createServer((req, res) => {
if (req.url === "/v1/models" && req.method === "GET") {
res.writeHead(statusCode, { "Content-Type": "application/json" });
res.end(JSON.stringify({ data: models }));
} else {
res.writeHead(404);
res.end("not found");
}
});
server.listen(0, "127.0.0.1", () => {
const addr = server.address();
resolve({ server, port: addr.port, url: `http://127.0.0.1:${addr.port}/v1` });
});
server.on("error", reject);
});
}
async function stopMockServer(server) {
return new Promise((resolve) => server.close(resolve));
}
function createSSEChatResponse(events) {
const encoder = new TextEncoder();
const payload = events.map((event) => `data: ${event}`).join("\n\n") + "\n\n";
return new Response(
new ReadableStream({
start(controller) {
controller.enqueue(encoder.encode(payload));
controller.close();
},
}),
{
status: 200,
headers: { "Content-Type": "text/event-stream" },
},
);
}
/** Clean the llama env vars so the compiled module picks up fresh values. */
function cleanLlamaEnv() {
delete process.env.LLAMA_BASE_URL;
delete process.env.LLAMA_MODEL_ID;
delete process.env.LLAMA_CTX;
delete process.env.LLAMA_MAX_OUT;
// Point settings resolution at a nonexistent file so BASE_URL falls through
// to the built-in default, independent of the developer's real settings.json.
process.env.PI_SETTINGS_PATH = join(tmpdir(), "llama-test-no-such-settings.json");
}
// ─── Mock PI ────────────────────────────────────────────────────────────────
function createMockPI() {
const state = { providers: [], commands: new Map() };
const pi = {
registerProvider(name, config) { state.providers.push({ name, config }); },
registerCommand(name, options) { state.commands.set(name, options); },
registerShortcut() {},
registerFlag() {},
getFlag() { return undefined; },
registerMessageRenderer() {},
on() {},
sendMessage() {},
sendUserMessage() {},
appendEntry() {},
setSessionName() {},
getSessionName() { return undefined; },
setLabel() {},
exec: async () => ({ stdout: "", stderr: "", code: 0 }),
getActiveTools() { return []; },
getAllTools() { return []; },
setActiveTools() {},
refreshTools() {},
getCommands() { return []; },
setModel: async () => false,
getThinkingLevel() { return "off"; },
setThinkingLevel() {},
unregisterProvider() {},
events: { on() {}, off() {} },
};
return { pi, state };
}
function createMockCtx() {
return {
ui: {
notify: (msg, type) => {},
setStatus: (key, text) => {},
},
hasUI: true,
cwd: process.cwd(),
sessionManager: {},
modelRegistry: {},
model: undefined,
isIdle() { return true; },
signal: undefined,
abort() {},
hasPendingMessages() { return false; },
shutdown() {},
getContextUsage() { return undefined; },
compact() {},
getSystemPrompt() { return ""; },
waitForIdle: async () => {},
newSession: async () => ({ cancelled: false }),
fork: async () => ({ cancelled: false }),
navigateTree: async () => ({ cancelled: false }),
switchSession: async () => ({ cancelled: false }),
reload: async () => {},
};
}
// ─── isReasoningModel tests ────────────────────────────────────────────────
test("isReasoningModel: matches Qwen3.6", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("Qwen3.6-35B"), true);
assert.equal(mod.isReasoningModel("Qwen3.6-35B-A3B"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches Qwen3-Coder", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("Qwen3-Coder-48B"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches Qwen3-VL", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("Qwen3-VL-32B"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches MiniMax", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("MiniMax-M1"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches gpt-oss", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("gpt-oss-20b"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches MiMo", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("MiMo-Large"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: matches Devstral", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("Devstral-72B"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: non-matches", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("gpt-4"), false);
assert.equal(mod.isReasoningModel("claude-sonnet"), false);
assert.equal(mod.isReasoningModel("llama-3.1"), false);
assert.equal(mod.isReasoningModel(""), false);
assert.equal(mod.isReasoningModel("unknown-model"), false);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("isReasoningModel: case insensitive", async () => {
const { outputDir } = buildCompiledModule();
try {
const mod = await importModule(outputDir);
assert.equal(mod.isReasoningModel("qwen3.6-35b"), true);
assert.equal(mod.isReasoningModel("QWEN3.6-35B"), true);
assert.equal(mod.isReasoningModel("qwen3-coder"), true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
// ─── discoverModels tests ──────────────────────────────────────────────────
test("discoverModels: returns models from server", async () => {
const { outputDir } = buildCompiledModule();
const mockModels = [{ id: "model-a", name: "Model A", context_window: 32768, max_tokens: 8192 }];
const { server, url } = await startMockServer(mockModels);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const mod = await importModule(outputDir);
const discovered = await mod.discoverModels();
assert.equal(discovered.length, 1);
assert.equal(discovered[0].id, "model-a");
assert.equal(discovered[0].name, "Model A");
assert.equal(discovered[0].context_window, 32768);
assert.equal(discovered[0].max_tokens, 8192);
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("discoverModels: returns empty array when server has no models", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([]);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const mod = await importModule(outputDir);
const discovered = await mod.discoverModels();
assert.equal(discovered.length, 0);
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("discoverModels: throws on HTTP error", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([], 500);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const mod = await importModule(outputDir);
try {
await mod.discoverModels();
assert.fail("Should have thrown");
} catch (err) {
assert.ok(err.message.includes("HTTP 500") || err.message.includes("500"), `Expected HTTP 500 error, got: ${err.message}`);
}
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("discoverModels: handles missing data field in response", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([]);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const mod = await importModule(outputDir);
const discovered = await mod.discoverModels();
assert.equal(discovered.length, 0);
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("discoverModels: handles partial model data (missing name)", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([{ id: "bare-model" }]);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const mod = await importModule(outputDir);
const discovered = await mod.discoverModels();
assert.equal(discovered.length, 1);
assert.equal(discovered[0].id, "bare-model");
assert.equal(discovered[0].name, undefined);
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
// ─── registerProviderWithModels tests ───────────────────────────────────────
test("registerProviderWithModels: registers provider with all model fields", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
const models = [
{ id: "model-1", name: "Model 1", context_window: 65536, max_tokens: 16384 },
{ id: "model-2", name: "Model 2" },
];
mod.registerProviderWithModels(pi, models);
assert.equal(state.providers.length, 1);
const provider = state.providers[0];
assert.equal(provider.name, "local-llama");
assert.equal(provider.config.baseUrl, "http://192.168.2.35:8123/v1");
assert.equal(provider.config.apiKey, "sk-no-key");
assert.equal(provider.config.api, "local-llama");
assert.equal(provider.config.authHeader, true);
assert.equal(provider.config.models.length, 2);
assert.equal(provider.config.models[0].id, "model-1");
assert.equal(provider.config.models[0].name, "Model 1");
assert.equal(provider.config.models[0].contextWindow, 65536);
assert.equal(provider.config.models[0].maxTokens, 16384);
assert.equal(provider.config.models[0].cost.input, 0);
assert.equal(provider.config.models[0].cost.output, 0);
assert.equal(provider.config.models[1].contextWindow, 262144);
assert.equal(provider.config.models[1].maxTokens, 65536);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: streamSimple emits assistant messages with piTokenStats", async () => {
const { outputDir } = buildCompiledModule();
const originalFetch = globalThis.fetch;
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = "http://127.0.0.1:8123/v1";
const { pi, state } = createMockPI();
let requestedUrl = "";
globalThis.fetch = async (input) => {
requestedUrl = String(input);
return createSSEChatResponse([
JSON.stringify({
id: "chatcmpl-local-1",
choices: [{ delta: { reasoning_content: "ponder" }, finish_reason: null }],
}),
JSON.stringify({
choices: [{ delta: { content: "Hello" }, finish_reason: null }],
}),
JSON.stringify({
usage: {
prompt_tokens: 12,
completion_tokens: 3,
completion_tokens_details: { reasoning_tokens: 1 },
prompt_tokens_details: { cached_tokens: 4 },
},
choices: [{ delta: {}, finish_reason: "stop" }],
}),
"[DONE]",
]);
};
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "test-model", name: "Test Model" }]);
const provider = state.providers[0];
const stream = provider.config.streamSimple(
{
...provider.config.models[0],
api: provider.config.api,
provider: provider.name,
},
{
systemPrompt: "system",
messages: [{ role: "user", content: "hello" }],
},
{ temperature: 0.1 },
);
assert.equal(typeof stream.result, "function");
assert.equal(typeof stream[Symbol.asyncIterator], "function");
const events = [];
for await (const event of stream) {
events.push(event);
}
const finalMessage = await stream.result();
assert.equal(events[0].type, "start");
assert.deepEqual(
events.filter((event) => event.type === "thinking_delta").map((event) => event.delta),
["ponder"],
);
assert.deepEqual(
events.filter((event) => event.type === "text_delta").map((event) => event.delta),
["Hello"],
);
assert.equal(events[events.length - 1].type, "done");
assert.equal(requestedUrl, "http://127.0.0.1:8123/v1/chat/completions");
assert.equal(finalMessage.role, "assistant");
assert.equal(finalMessage.api, "local-llama");
assert.equal(finalMessage.provider, "local-llama");
assert.equal(finalMessage.model, "test-model");
assert.equal(finalMessage.responseId, "chatcmpl-local-1");
assert.equal(finalMessage.usage.input, 12);
assert.equal(finalMessage.usage.output, 3);
assert.equal(finalMessage.usage.cacheRead, 4);
assert.equal(finalMessage.usage.totalTokens, 15);
assert.equal(finalMessage.content[0].type, "thinking");
assert.equal(finalMessage.content[0].thinking, "ponder");
assert.equal(finalMessage.content[1].type, "text");
assert.equal(finalMessage.content[1].text, "Hello");
assert.equal(finalMessage.piTokenStats.inputTokens, 12);
assert.equal(finalMessage.piTokenStats.outputTokens, 3);
assert.equal(finalMessage.piTokenStats.thinkingTokens, 1);
assert.equal(typeof finalMessage.piTokenStats.requestStartMs, "number");
assert.equal(typeof finalMessage.piTokenStats.responseEndMs, "number");
} finally {
globalThis.fetch = originalFetch;
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: streamSimple converts context tools to OpenAI payload", async () => {
const { outputDir } = buildCompiledModule();
const originalFetch = globalThis.fetch;
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = "http://127.0.0.1:8123/v1";
const { pi, state } = createMockPI();
let requestedBody = null;
globalThis.fetch = async (_input, init) => {
requestedBody = JSON.parse(String(init?.body ?? "{}"));
return createSSEChatResponse([
JSON.stringify({
usage: {
prompt_tokens: 1,
completion_tokens: 1,
prompt_tokens_details: { cached_tokens: 0 },
},
choices: [{ delta: { content: "ok" }, finish_reason: "stop" }],
}),
"[DONE]",
]);
};
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "test-model", name: "Test Model" }]);
const provider = state.providers[0];
const stream = provider.config.streamSimple(
{
...provider.config.models[0],
api: provider.config.api,
provider: provider.name,
},
{
systemPrompt: "system",
messages: [{ role: "user", content: "hello" }],
tools: [
{
name: "read",
description: "Read a file",
parameters: {
type: "object",
properties: { path: { type: "string" } },
},
},
],
},
{ toolChoice: "auto" },
);
for await (const _event of stream) {
// consume stream
}
assert.deepEqual(requestedBody.tools, [
{
type: "function",
function: {
name: "read",
description: "Read a file",
parameters: {
type: "object",
properties: { path: { type: "string" } },
},
},
},
]);
assert.equal(requestedBody.tool_choice, "auto");
} finally {
globalThis.fetch = originalFetch;
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: marks reasoning models correctly", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
const models = [
{ id: "Qwen3.6-35B", name: "Qwen3.6" },
{ id: "gpt-oss-20b", name: "GPT OSS" },
{ id: "MiniMax-M1", name: "MiniMax" },
{ id: "llama-3.1", name: "Llama" },
{ id: "claude-sonnet", name: "Claude" },
];
mod.registerProviderWithModels(pi, models);
assert.equal(state.providers[0].config.models[0].reasoning, true);
assert.equal(state.providers[0].config.models[1].reasoning, true);
assert.equal(state.providers[0].config.models[2].reasoning, true);
assert.equal(state.providers[0].config.models[3].reasoning, false);
assert.equal(state.providers[0].config.models[4].reasoning, false);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: sets compat block", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "test", name: "Test" }]);
const compat = state.providers[0].config.models[0].compat;
assert.equal(compat.thinkingFormat, "qwen-chat-template");
assert.equal(compat.maxTokensField, "max_tokens");
assert.equal(compat.supportsDeveloperRole, false);
assert.equal(compat.supportsReasoningEffort, false);
assert.equal(compat.supportsUsageInStreaming, true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: input is always [\"text\"]", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "test" }]);
assert.deepEqual(state.providers[0].config.models[0].input, ["text"]);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("registerProviderWithModels: cost is zero for all fields", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "test" }]);
const cost = state.providers[0].config.models[0].cost;
assert.equal(cost.input, 0);
assert.equal(cost.output, 0);
assert.equal(cost.cacheRead, 0);
assert.equal(cost.cacheWrite, 0);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
// ─── Slash command: /local-llama-refresh ────────────────────────────────────
test("/local-llama-refresh: succeeds with models", async () => {
const { outputDir } = buildCompiledModule();
const mockModels = [{ id: "discovered-1", name: "D1" }];
const { server, url } = await startMockServer(mockModels);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const cmd = state.commands.get("local-llama-refresh");
assert.ok(cmd, "/local-llama-refresh should be registered");
assert.ok(cmd.description?.includes("discover"), "refresh command description mentions discover");
assert.ok(typeof cmd.handler === "function");
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("/local-llama-refresh: calls discover and re-registers on success", async () => {
const { outputDir } = buildCompiledModule();
const mockModels = [{ id: "new-model-1", name: "New 1" }];
const { server, url } = await startMockServer(mockModels);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.ok(state.providers.length >= 1, "Should have at least 1 provider after load");
assert.equal(state.providers[0].config.models[0].id, "qwen-local", "First provider should be fallback");
const ctx = createMockCtx();
const cmd = state.commands.get("local-llama-refresh");
await cmd.handler("", ctx);
const lastProvider = state.providers[state.providers.length - 1];
assert.equal(lastProvider.config.models[0].id, "new-model-1", "Last provider should have discovered model");
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("/local-llama-refresh: notifies on no models", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([]);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const ctx = createMockCtx();
const cmd = state.commands.get("local-llama-refresh");
await cmd.handler("", ctx);
assert.ok(true, "refresh command handled empty models gracefully");
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("/local-llama-refresh: reports error when server unreachable", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = "http://127.0.0.1:1/v1";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const ctx = createMockCtx();
const cmd = state.commands.get("local-llama-refresh");
await cmd.handler("", ctx);
assert.ok(true, "refresh command handled unreachable server gracefully");
} finally {
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
// ─── Slash command: /local-llama-status ─────────────────────────────────────
test("/local-llama-status: shows models", async () => {
const { outputDir } = buildCompiledModule();
const mockModels = [{ id: "status-model", name: "Status Model" }];
const { server, url } = await startMockServer(mockModels);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const cmd = state.commands.get("local-llama-status");
assert.ok(cmd, "/local-llama-status should be registered");
assert.ok(cmd.description?.includes("status"), "status command description mentions status");
assert.ok(typeof cmd.handler === "function");
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("/local-llama-status: handles no models", async () => {
const { outputDir } = buildCompiledModule();
const { server, url } = await startMockServer([]);
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = url;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const ctx = createMockCtx();
const cmd = state.commands.get("local-llama-status");
await cmd.handler("", ctx);
assert.ok(true, "status command handled empty model list");
} finally {
await stopMockServer(server);
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
test("/local-llama-status: handles server unreachable", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = "http://127.0.0.1:1/v1";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const ctx = createMockCtx();
const cmd = state.commands.get("local-llama-status");
await cmd.handler("", ctx);
assert.ok(true, "status command handled unreachable server gracefully");
} finally {
cleanLlamaEnv();
rmSync(outputDir, { recursive: true, force: true });
}
});
// ─── Extension entry point tests ────────────────────────────────────────────
test("extension entry: registers fallback provider immediately", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.ok(state.providers.length > 0, "fallback provider should be registered");
const fallback = state.providers[0];
assert.equal(fallback.name, "local-llama");
assert.equal(fallback.config.models[0].id, "qwen-local");
assert.ok(fallback.config.models[0].name.includes("Qwen3.6"), "fallback model name includes Qwen3.6");
assert.equal(state.commands.size, 2, "should register 2 slash commands");
assert.ok(state.commands.has("local-llama-refresh"));
assert.ok(state.commands.has("local-llama-status"));
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: fallback model has correct defaults", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const model = state.providers[0].config.models[0];
assert.equal(model.contextWindow, 262144);
assert.equal(model.maxTokens, 65536);
assert.equal(model.reasoning, false);
assert.equal(model.input.length, 1);
assert.equal(model.input[0], "text");
assert.equal(model.cost.input, 0);
assert.equal(model.cost.output, 0);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: registers slash commands", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
const refreshCmd = state.commands.get("local-llama-refresh");
const statusCmd = state.commands.get("local-llama-status");
assert.ok(refreshCmd.description?.includes("discover") || refreshCmd.description?.includes("Discover"), "refresh command description");
assert.ok(statusCmd.description?.includes("status") || statusCmd.description?.includes("Status"), "status command description");
assert.ok(typeof refreshCmd.handler === "function", "refresh has handler");
assert.ok(typeof statusCmd.handler === "function", "status has handler");
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});
test("config: reads baseUrl from localLlama settings when env unset", async () => {
const { outputDir } = buildCompiledModule();
const settingsDir = mkdtempSync(join(tmpdir(), "llama-settings-"));
const settingsPath = join(settingsDir, "settings.json");
writeFileSync(
settingsPath,
JSON.stringify({ localLlama: { baseUrl: "http://10.0.0.9:8123/v1" } }),
"utf8",
);
try {
cleanLlamaEnv();
process.env.PI_SETTINGS_PATH = settingsPath;
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "m" }]);
assert.equal(state.providers[0].config.baseUrl, "http://10.0.0.9:8123/v1");
} finally {
cleanLlamaEnv();
rmSync(settingsDir, { recursive: true, force: true });
rmSync(outputDir, { recursive: true, force: true });
}
});
test("config: LLAMA_BASE_URL env overrides localLlama settings", async () => {
const { outputDir } = buildCompiledModule();
const settingsDir = mkdtempSync(join(tmpdir(), "llama-settings-"));
const settingsPath = join(settingsDir, "settings.json");
writeFileSync(
settingsPath,
JSON.stringify({ localLlama: { baseUrl: "http://10.0.0.9:8123/v1" } }),
"utf8",
);
try {
cleanLlamaEnv();
process.env.PI_SETTINGS_PATH = settingsPath;
process.env.LLAMA_BASE_URL = "http://env-host:9999/v1";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
mod.registerProviderWithModels(pi, [{ id: "m" }]);
assert.equal(state.providers[0].config.baseUrl, "http://env-host:9999/v1");
} finally {
cleanLlamaEnv();
rmSync(settingsDir, { recursive: true, force: true });
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: uses env overrides for BASE_URL", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_BASE_URL = "http://custom-host:9999/v1";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.equal(state.providers[0].config.baseUrl, "http://custom-host:9999/v1");
} finally {
delete process.env.LLAMA_BASE_URL;
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: uses env overrides for MODEL_ID and fallback name", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_MODEL_ID = "my-custom-model";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.equal(state.providers[0].config.models[0].id, "my-custom-model");
} finally {
delete process.env.LLAMA_MODEL_ID;
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: uses env overrides for context window", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_CTX = "131072";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.equal(state.providers[0].config.models[0].contextWindow, 131072);
} finally {
delete process.env.LLAMA_CTX;
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: uses env overrides for max output tokens", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
process.env.LLAMA_MAX_OUT = "32768";
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.equal(state.providers[0].config.models[0].maxTokens, 32768);
} finally {
delete process.env.LLAMA_MAX_OUT;
rmSync(outputDir, { recursive: true, force: true });
}
});
test("extension entry: reasoning model correctly detected from ID", async () => {
const { outputDir } = buildCompiledModule();
try {
cleanLlamaEnv();
const { pi, state } = createMockPI();
const mod = await importModule(outputDir);
await mod.default(pi);
assert.equal(state.providers[0].config.models[0].reasoning, false);
mod.registerProviderWithModels(pi, [{ id: "Qwen3.6-35B" }]);
assert.equal(state.providers[1].config.models[0].reasoning, true);
} finally {
rmSync(outputDir, { recursive: true, force: true });
}
});