f7af660727
Endpoint rewrites:
- GET /v1/models + /running → merged listModels() with running flag
- POST /models/load → GET /upstream/<id>/health (warm load)
- POST /models/unload → POST /api/models/unload/<id> (no body)
- Added POST /api/models/unload for unloadAll()
Config migration:
- Preset path: ~/.llama-models.ini → ~/.config/llama-swap/config.yaml
- Service unit: llama-server.service → llama-swap.service
- setPresetKey() rewritten from INI awk to YAML-aware awk for
editing --ctx-size/--temp/--n-gpu-layers in cmd: blocks
Per-model ctx-size (fixes 0/33k bug):
- parseCtxMapFromYaml(): walks config.yaml, extracts --ctx-size N per
model block → Map<id, ctxSize>
- extractCtxFromRunningCmd(): parses --ctx-size from /running cmd string
- discoverModels(): Promise.all(listModels, listRunning, readPreset),
ctx priority: running cmd → yaml → 32768 fallback
- Removed broken extractCtxSize stub and dangling imports
Tests: 14 passing (parseCtxMapFromYaml ×5, extractCtxFromRunningCmd ×3,
isShardArtefact ×3, isReasoningModel ×3)
README: full rewrite covering llama-swap architecture, YAML config format,
new endpoints, troubleshooting table updated.
157 lines
5.9 KiB
TypeScript
157 lines
5.9 KiB
TypeScript
/**
|
|
* Unit tests for ai-server/router-utils.ts (pure helpers).
|
|
*
|
|
* node --experimental-strip-types --test tests/router-utils.test.ts
|
|
*/
|
|
|
|
import assert from "node:assert/strict";
|
|
import { test } from "node:test";
|
|
import {
|
|
parseCtxMapFromYaml,
|
|
extractCtxFromRunningCmd,
|
|
isReasoningModel,
|
|
isShardArtefact,
|
|
} from "../ai-server/router-utils.ts";
|
|
|
|
// ── parseCtxMapFromYaml ─────────────────────────────────────────────────
|
|
|
|
test("parseCtxMapFromYaml: extracts ctx-size from model blocks", () => {
|
|
const yaml = `
|
|
models:
|
|
Qwen_Qwen3.6-35B-A3B-Q8_0:
|
|
cmd: |
|
|
/home/ai-server/llama.cpp/build/bin/llama-server
|
|
--model /home/ai-server/models/Qwen_Qwen3.6-35B-A3B-Q8_0.gguf
|
|
--ctx-size 262144
|
|
--temp 0.7
|
|
MiniMax-M2.7-IQ3_XXS:
|
|
cmd: |
|
|
/home/ai-server/llama.cpp/build/bin/llama-server
|
|
--model /home/ai-server/models/MiniMax-M2.7-UD-IQ3_XXS.gguf
|
|
--ctx-size 131072
|
|
--temp 1.0
|
|
`;
|
|
const map = parseCtxMapFromYaml(yaml);
|
|
assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 262144);
|
|
assert.equal(map.get("MiniMax-M2.7-IQ3_XXS"), 131072);
|
|
assert.equal(map.size, 2);
|
|
});
|
|
|
|
test("parseCtxMapFromYaml: skips comments and blank lines", () => {
|
|
const yaml = `
|
|
# This is a comment
|
|
models:
|
|
|
|
# Model with large context
|
|
Qwen_Qwen3.6-35B-A3B-Q8_0:
|
|
cmd: |
|
|
/path/to/server
|
|
--ctx-size 65536
|
|
--temp 0.7
|
|
`;
|
|
const map = parseCtxMapFromYaml(yaml);
|
|
assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 65536);
|
|
});
|
|
|
|
test("parseCtxMapFromYaml: resets on top-level keys", () => {
|
|
const yaml = `
|
|
models:
|
|
Qwen_Qwen3.6-35B-A3B-Q8_0:
|
|
cmd: |
|
|
/path/to/server
|
|
--ctx-size 262144
|
|
hooks:
|
|
on_startup:
|
|
preload:
|
|
- Qwen_Qwen3.6-35B-A3B-Q8_0
|
|
`;
|
|
const map = parseCtxMapFromYaml(yaml);
|
|
assert.equal(map.get("Qwen_Qwen3.6-35B-A3B-Q8_0"), 262144);
|
|
// "preload" is not a valid model id pattern, but even if it were,
|
|
// it's under hooks: so should not be included.
|
|
assert.ok(!map.has("preload"));
|
|
});
|
|
|
|
test("parseCtxMapFromYaml: empty yaml returns empty map", () => {
|
|
const map = parseCtxMapFromYaml("");
|
|
assert.equal(map.size, 0);
|
|
});
|
|
|
|
test("parseCtxMapFromYaml: model without ctx-size is skipped", () => {
|
|
const yaml = `
|
|
models:
|
|
SmallModel:
|
|
cmd: |
|
|
/path/to/server
|
|
--temp 0.7
|
|
`;
|
|
const map = parseCtxMapFromYaml(yaml);
|
|
assert.equal(map.get("SmallModel"), undefined);
|
|
assert.equal(map.size, 0);
|
|
});
|
|
|
|
// ── extractCtxFromRunningCmd ────────────────────────────────────────────
|
|
|
|
test("extractCtxFromRunningCmd: parses --ctx-size from cmd string", () => {
|
|
const cmd = "/home/ai-server/llama.cpp/build/bin/llama-server --model /home/ai-server/models/Qwen.gguf --ctx-size 262144 --temp 0.7";
|
|
assert.equal(extractCtxFromRunningCmd(cmd), 262144);
|
|
});
|
|
|
|
test("extractCtxFromRunningCmd: undefined cmd returns null", () => {
|
|
assert.equal(extractCtxFromRunningCmd(undefined), null);
|
|
});
|
|
|
|
test("extractCtxFromRunningCmd: cmd without --ctx-size returns null", () => {
|
|
assert.equal(extractCtxFromRunningCmd("/path/to/server --temp 0.7"), null);
|
|
});
|
|
|
|
// ── isShardArtefact ─────────────────────────────────────────────────────
|
|
|
|
test("isShardArtefact: GGUF shard pattern returns true", () => {
|
|
assert.equal(isShardArtefact("MiniMax-M2.7-UD-IQ3_XXS-00001-of-00003"), true);
|
|
assert.equal(isShardArtefact("Big-Model-00042-of-00099"), true);
|
|
assert.equal(isShardArtefact("tiny-1-of-5"), true, "zero-padding not required");
|
|
});
|
|
|
|
test("isShardArtefact: clean preset names return false", () => {
|
|
assert.equal(isShardArtefact("MiniMax-M2.7-IQ3_XXS"), false);
|
|
assert.equal(isShardArtefact("Qwen_Qwen3.6-35B-A3B-Q8_0"), false);
|
|
assert.equal(isShardArtefact("small-7b"), false);
|
|
});
|
|
|
|
test("isShardArtefact: non-shard numeric patterns are not matched", () => {
|
|
assert.equal(isShardArtefact("foo-of-3"), false, "'of' must be preceded by digits");
|
|
assert.equal(isShardArtefact("model-001"), false, "trailing digits alone don't match");
|
|
assert.equal(isShardArtefact("00001-of-00003-mid"), false, "must be at the end of the id");
|
|
});
|
|
|
|
// ── isReasoningModel ────────────────────────────────────────────────────
|
|
|
|
test("isReasoningModel: known reasoning families return true", () => {
|
|
assert.equal(isReasoningModel("MiniMax-M2.7-IQ3_XXS"), true);
|
|
assert.equal(isReasoningModel("MiniMax-M2.7-IQ4_XS"), true);
|
|
assert.equal(isReasoningModel("Qwen3.6-35B-Claude-Opus-Distilled-Q5_K_M"), true);
|
|
assert.equal(isReasoningModel("Qwen_Qwen3.6-35B-A3B-Q8_0"), true, "underscore-prefixed variant still matches");
|
|
assert.equal(isReasoningModel("Qwen3-Coder-30B-Q8_0"), true);
|
|
assert.equal(isReasoningModel("Qwen3-Coder-Next-IQ4_XS"), true);
|
|
assert.equal(isReasoningModel("Qwen3-VL-30B-Q8_0"), true);
|
|
assert.equal(isReasoningModel("MiMo-V2-Flash-IQ2_M"), true);
|
|
assert.equal(isReasoningModel("MiMo-V2-Flash-IQ2_XXS"), true);
|
|
assert.equal(isReasoningModel("gpt-oss-120b-MXFP4"), true);
|
|
assert.equal(isReasoningModel("Devstral-2-123B-IQ3_XXS"), true);
|
|
});
|
|
|
|
test("isReasoningModel: non-reasoning families return false", () => {
|
|
assert.equal(isReasoningModel("Anubis-70B-v1.2-Q5_K_M"), false);
|
|
assert.equal(isReasoningModel("Euryale-v2.3-IQ4_XS"), false);
|
|
assert.equal(isReasoningModel("Gemma-4-31B-Q8_0"), false);
|
|
assert.equal(isReasoningModel("Skyfall-31B-v4.2-Q8_0"), false);
|
|
assert.equal(isReasoningModel("Voxtral-Small-24B-Q8_0"), false);
|
|
});
|
|
|
|
test("isReasoningModel: unknown model id returns false (conservative default)", () => {
|
|
assert.equal(isReasoningModel("Mistral-7B-Instruct-Q4"), false);
|
|
assert.equal(isReasoningModel("RandomModel-Q8"), false);
|
|
assert.equal(isReasoningModel(""), false);
|
|
});
|