Files
pi-extensions/llama.cpp/config.ts
T
shahondin1624 3876968bfa make llama.cpp base URL configurable via settings + document live-symlink dev setup
Resolve the local llama.cpp provider's server URL from LLAMA_BASE_URL env →
localLlama.baseUrl in settings.json → built-in default, reading settings inline
(node:fs) so the flat-copy test build stays self-contained. A PI_SETTINGS_PATH
override keeps the suite deterministic across hosts.

Document the live-development workflow of symlinking each extension dir AND
shared/ into ~/.pi/agent/extensions/, with a warning that a symlinked extension
paired with a stale copied shared/ silently loads the wrong helpers.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 08:23:25 +02:00

74 lines
3.2 KiB
TypeScript

/**
* Configuration constants for the llama.cpp provider extension.
*
* The server base URL resolves in this order:
* 1. LLAMA_BASE_URL environment variable
* 2. `localLlama.baseUrl` in ~/.pi/agent/settings.json
* 3. Built-in default
* All other values are configurable via environment variables. Defaults are
* suitable for a typical LAN-based llama.cpp server.
*/
import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";
// ─── Settings lookup ────────────────────────────────────────────────────
const HOME = process.env.HOME ?? process.env.USERPROFILE ?? "";
// PI_SETTINGS_PATH lets tests point at an isolated settings file (or a
// nonexistent one) so resolution is deterministic regardless of the host.
const SETTINGS_PATH = process.env.PI_SETTINGS_PATH ?? join(HOME, ".pi", "agent", "settings.json");
/** Read `localLlama.baseUrl` (or `local-llama.baseUrl`) from pi's settings.json. */
function baseUrlFromSettings(): string | undefined {
try {
if (!SETTINGS_PATH || !existsSync(SETTINGS_PATH)) {
return undefined;
}
const settings = JSON.parse(readFileSync(SETTINGS_PATH, "utf8")) as Record<string, unknown>;
const section = (settings.localLlama ?? settings["local-llama"]) as
| Record<string, unknown>
| undefined;
const url = section?.baseUrl;
return typeof url === "string" && url.length > 0 ? url : undefined;
} catch {
return undefined;
}
}
// ─── Server configuration ───────────────────────────────────────────────
export const BASE_URL =
process.env.LLAMA_BASE_URL ?? baseUrlFromSettings() ?? "http://192.168.2.35:8123/v1";
// ─── Fallback model ─────────────────────────────────────────────────────
export const FALLBACK_MODEL_ID = process.env.LLAMA_MODEL_ID ?? "qwen-local";
export const FALLBACK_CTX = Number(process.env.LLAMA_CTX ?? 262144);
export const FALLBACK_MAX_OUT = Number(process.env.LLAMA_MAX_OUT ?? 65536);
export const FALLBACK_NAME = "Qwen3.6-35B-A3B (local, llama.cpp)";
// ─── Provider identity ──────────────────────────────────────────────────
export const PROVIDER_ID = "local-llama";
// Distinct api id so registering streamSimple does NOT overwrite pi-ai's
// built-in openai-completions provider implementation.
export const API_ID = "local-llama";
export const API_KEY = "sk-no-key";
// ─── Discovery timing ───────────────────────────────────────────────────
export const DISCOVERY_TIMEOUT_MS = 300; // Race discovery against this timeout
// ─── Reasoning model patterns ───────────────────────────────────────────
export const REASONING_PATTERNS: RegExp[] = [
/Qwen3\.6/i,
/Qwen3-Coder/i,
/Qwen3-VL/i,
/MiniMax/i,
/gpt-oss/i,
/MiMo/i,
/Devstral/i,
];