Files
shahondin1624 139dcbce74 messages: drop assistant entries with only thinking content
Pi's session can contain assistant entries whose AssistantMessage.content
is entirely thinking blocks (no text, no tool calls) — typical after an
aborted turn or when reasoning is edited out. Our contextToOpenAIMessages
was emitting those as { role: "assistant", content: null }.

When such a message is at the end of the context, llama.cpp's chat
template interprets the trailing assistant entry as an "assistant
response prefill" attempt. Reasoning-model templates (MiniMax M2.7,
Qwen, etc.) have enable_thinking set, and the server rejects this
combination with HTTP 400:
    "Assistant response prefill is incompatible with enable_thinking."

Fix: skip assistant entries where extractAssistantText and
extractToolCalls both return empty. Thinking blocks aren't re-fed to
the model anyway, so dropping the wrapper message loses no information.

+ two regression tests in tests/messages.test.ts.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 23:48:16 +02:00

127 lines
3.1 KiB
TypeScript

import type {
AssistantMessage,
Context,
TextContent,
Tool,
ToolCall,
ToolResultMessage,
UserMessage,
} from "@mariozechner/pi-ai";
export type OpenAIRole = "system" | "user" | "assistant" | "tool";
export interface OpenAIToolCall {
id: string;
type: "function";
function: { name: string; arguments: string };
}
export interface OpenAIMessage {
role: OpenAIRole;
content?: string | null;
tool_calls?: OpenAIToolCall[];
tool_call_id?: string;
}
export interface OpenAITool {
type: "function";
function: {
name: string;
description: string;
parameters: unknown;
};
}
function extractUserText(content: UserMessage["content"]): string {
if (typeof content === "string") return content;
return content
.filter((c): c is TextContent => c.type === "text")
.map((c) => c.text)
.join("\n");
}
function extractAssistantText(content: AssistantMessage["content"]): string {
return content
.filter((c): c is TextContent => c.type === "text")
.map((c) => c.text)
.join("\n");
}
function extractToolCalls(content: AssistantMessage["content"]): ToolCall[] {
return content.filter((c): c is ToolCall => c.type === "toolCall");
}
function extractToolResultText(msg: ToolResultMessage): string {
return msg.content
.filter((c): c is TextContent => c.type === "text")
.map((c) => c.text)
.join("\n");
}
export function contextToOpenAIMessages(context: Context): OpenAIMessage[] {
const out: OpenAIMessage[] = [];
if (context.systemPrompt) {
out.push({ role: "system", content: context.systemPrompt });
}
for (const msg of context.messages) {
if (msg.role === "user") {
const text = extractUserText(msg.content);
if (text) out.push({ role: "user", content: text });
continue;
}
if (msg.role === "assistant") {
const text = extractAssistantText(msg.content);
const toolCalls = extractToolCalls(msg.content);
// Skip assistant messages that contain only thinking content.
// Thinking blocks are not re-fed to the model on subsequent turns;
// emitting `{role:"assistant", content:null}` would be interpreted
// as a "response prefill" attempt and is rejected by llama.cpp with
// "Assistant response prefill is incompatible with enable_thinking"
// on reasoning-model chat templates.
if (!text && toolCalls.length === 0) continue;
const openaiMsg: OpenAIMessage = {
role: "assistant",
content: text || null,
};
if (toolCalls.length > 0) {
openaiMsg.tool_calls = toolCalls.map((tc) => ({
id: tc.id,
type: "function",
function: {
name: tc.name,
arguments: JSON.stringify(tc.arguments ?? {}),
},
}));
}
out.push(openaiMsg);
continue;
}
if (msg.role === "toolResult") {
out.push({
role: "tool",
tool_call_id: msg.toolCallId,
content: extractToolResultText(msg),
});
continue;
}
}
return out;
}
export function toolsToOpenAI(tools?: Tool[]): OpenAITool[] | undefined {
if (!tools || tools.length === 0) return undefined;
return tools.map((t) => ({
type: "function",
function: {
name: t.name,
description: t.description,
parameters: t.parameters as unknown,
},
}));
}