pi-extensions/llama.cpp/config.ts

/**
 * Configuration constants for the llama.cpp provider extension.
 *
 * The server base URL resolves in this order:
 *   1. LLAMA_BASE_URL environment variable
 *   2. `localLlama.baseUrl` in ~/.pi/agent/settings.json
 *   3. Built-in default
 * All other values are configurable via environment variables. Defaults are
 * suitable for a typical LAN-based llama.cpp server.
 */

import { existsSync, readFileSync } from "node:fs";
import { join } from "node:path";

// ─── Settings lookup ────────────────────────────────────────────────────

const HOME = process.env.HOME ?? process.env.USERPROFILE ?? "";
// PI_SETTINGS_PATH lets tests point at an isolated settings file (or a
// nonexistent one) so resolution is deterministic regardless of the host.
const SETTINGS_PATH = process.env.PI_SETTINGS_PATH ?? join(HOME, ".pi", "agent", "settings.json");

/** Read `localLlama.baseUrl` (or `local-llama.baseUrl`) from pi's settings.json. */
function baseUrlFromSettings(): string | undefined {
	try {
		if (!SETTINGS_PATH || !existsSync(SETTINGS_PATH)) {
			return undefined;
		}
		const settings = JSON.parse(readFileSync(SETTINGS_PATH, "utf8")) as Record<string, unknown>;
		const section = (settings.localLlama ?? settings["local-llama"]) as
			| Record<string, unknown>
			| undefined;
		const url = section?.baseUrl;
		return typeof url === "string" && url.length > 0 ? url : undefined;
	} catch {
		return undefined;
	}
}

// ─── Server configuration ───────────────────────────────────────────────

export const BASE_URL =
	process.env.LLAMA_BASE_URL ?? baseUrlFromSettings() ?? "http://192.168.2.35:8123/v1";

// ─── Fallback model ─────────────────────────────────────────────────────

export const FALLBACK_MODEL_ID = process.env.LLAMA_MODEL_ID ?? "qwen-local";
export const FALLBACK_CTX = Number(process.env.LLAMA_CTX ?? 262144);
export const FALLBACK_MAX_OUT = Number(process.env.LLAMA_MAX_OUT ?? 65536);
export const FALLBACK_NAME = "Qwen3.6-35B-A3B (local, llama.cpp)";

// ─── Provider identity ──────────────────────────────────────────────────

export const PROVIDER_ID = "local-llama";
// Distinct api id so registering streamSimple does NOT overwrite pi-ai's
// built-in openai-completions provider implementation.
export const API_ID = "local-llama";
export const API_KEY = "sk-no-key";

// ─── Discovery timing ───────────────────────────────────────────────────

export const DISCOVERY_TIMEOUT_MS = 300; // Race discovery against this timeout

// ─── Reasoning model patterns ───────────────────────────────────────────

export const REASONING_PATTERNS: RegExp[] = [
	/Qwen3\.6/i,
	/Qwen3-Coder/i,
	/Qwen3-VL/i,
	/MiniMax/i,
	/gpt-oss/i,
	/MiMo/i,
	/Devstral/i,
];