Update local qwen
This commit is contained in:
+14
-14
@@ -1,7 +1,7 @@
|
||||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||
|
||||
// Point to your llama-server. Override with env vars if needed.
|
||||
const BASE_URL = process.env.LLAMA_BASE_URL ?? "http://127.0.0.1:8088/v1";
|
||||
// Point to your qwen-server. Override with env vars if needed.
|
||||
const BASE_URL = process.env.LLAMA_BASE_URL ?? "http://127.0.0.1:8123/v1";
|
||||
const MODEL_ID = process.env.LLAMA_MODEL_ID ?? "qwen-local";
|
||||
const CTX = Number(process.env.LLAMA_CTX ?? 262144);
|
||||
const MAX_OUT = Number(process.env.LLAMA_MAX_OUT ?? 16384);
|
||||
@@ -16,18 +16,18 @@ export default function (pi: ExtensionAPI) {
|
||||
{
|
||||
id: MODEL_ID,
|
||||
name: "Qwen3.6-35B-A3B (local, llama.cpp)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: CTX,
|
||||
maxTokens: MAX_OUT,
|
||||
compat: {
|
||||
thinkingFormat: "qwen-chat-template",
|
||||
maxTokensField: "max_tokens",
|
||||
supportsDeveloperRole: false,
|
||||
supportsReasoningEffort: false,
|
||||
supportsUsageInStreaming: true,
|
||||
},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: CTX,
|
||||
maxTokens: MAX_OUT,
|
||||
compat: {
|
||||
thinkingFormat: "qwen-chat-template",
|
||||
maxTokensField: "max_tokens",
|
||||
supportsDeveloperRole: false,
|
||||
supportsReasoningEffort: false,
|
||||
supportsUsageInStreaming: true,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user