diff --git a/tests/router-utils.test.ts b/tests/router-utils.test.ts index 86af028..0173e72 100644 --- a/tests/router-utils.test.ts +++ b/tests/router-utils.test.ts @@ -7,19 +7,102 @@ import assert from "node:assert/strict"; import { test } from "node:test"; import { - extractCtxSize, + parseCtxMapFromYaml, + extractCtxFromRunningCmd, isReasoningModel, isShardArtefact, } from "../ai-server/router-utils.ts"; -// ── extractCtxSize (llama-swap: ctx-size not exposed via API) ── +// ── parseCtxMapFromYaml ───────────────────────────────────────────────── -// llama-swap does not expose worker command lines, so extractCtxSize -// always returns null. Callers use a default (32768) instead. -test("extractCtxSize: always returns null for llama-swap models", () => { - assert.equal(extractCtxSize({ id: "Qwen_Qwen3.6-35B-A3B-Q8_0" }), null); - assert.equal(extractCtxSize({ id: "MiniMax-M2.7-IQ3_XXS", running: true }), null); - assert.equal(extractCtxSize({ id: "x", object: "model", running: false }), null); +test("parseCtxMapFromYaml: extracts ctx-size from model blocks", () => { + const yaml = ` +models: + Qwen_Qwen3.6-35B-A3B-Q8_0: + cmd: | + /home/ai-server/llama.cpp/build/bin/llama-server + --model /home/ai-server/models/Qwen_Qwen3.6-35B-A3B-Q8_0.gguf + --ctx-size 262144 + --temp 0.7 + MiniMax-M2.7-IQ3_XXS: + cmd: | + /home/ai-server/llama.cpp/build/bin/llama-server + --model /home/ai-server/models/MiniMax-M2.7-UD-IQ3_XXS.gguf + --ctx-size 131072 + --temp 1.0 +`; + const map = parseCtxMapFromYaml(yaml); + assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 262144); + assert.equal(map.get("MiniMax-M2.7-IQ3_XXS"), 131072); + assert.equal(map.size, 2); +}); + +test("parseCtxMapFromYaml: skips comments and blank lines", () => { + const yaml = ` +# This is a comment +models: + + # Model with large context + Qwen_Qwen3.6-35B-A3B-Q8_0: + cmd: | + /path/to/server + --ctx-size 65536 + --temp 0.7 +`; + const map = parseCtxMapFromYaml(yaml); + assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 65536); +}); + +test("parseCtxMapFromYaml: resets on top-level keys", () => { + const yaml = ` +models: + Qwen_Qwen3.6-35B-A3B-Q8_0: + cmd: | + /path/to/server + --ctx-size 262144 +hooks: + on_startup: + preload: + - Qwen_Qwen3.6-35B-A3B-Q8_0 +`; + const map = parseCtxMapFromYaml(yaml); + assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 262144); + // "preload" is not a valid model id pattern, but even if it were, + // it's under hooks: so should not be included. + assert.ok(!map.has("preload")); +}); + +test("parseCtxMapFromYaml: empty yaml returns empty map", () => { + const map = parseCtxMapFromYaml(""); + assert.equal(map.size, 0); +}); + +test("parseCtxMapFromYaml: model without ctx-size is skipped", () => { + const yaml = ` +models: + SmallModel: + cmd: | + /path/to/server + --temp 0.7 +`; + const map = parseCtxMapFromYaml(yaml); + assert.equal(map.get("SmallModel"), undefined); + assert.equal(map.size, 0); +}); + +// ── extractCtxFromRunningCmd ──────────────────────────────────────────── + +test("extractCtxFromRunningCmd: parses --ctx-size from cmd string", () => { + const cmd = "/home/ai-server/llama.cpp/build/bin/llama-server --model /home/ai-server/models/Qwen.gguf --ctx-size 262144 --temp 0.7"; + assert.equal(extractCtxFromRunningCmd(cmd), 262144); +}); + +test("extractCtxFromRunningCmd: undefined cmd returns null", () => { + assert.equal(extractCtxFromRunningCmd(undefined), null); +}); + +test("extractCtxFromRunningCmd: cmd without --ctx-size returns null", () => { + assert.equal(extractCtxFromRunningCmd("/path/to/server --temp 0.7"), null); }); // ── isShardArtefact ─────────────────────────────────────────────────────