common : delegate assistant continuation to underlying template handlers (#23089)

* common : delegate assistant continuation to template handler

* server : implement echo parameter to exclude assistant prefill in the response

* server : fix tests for prefill

* server : use existing llama template

* cont : clean up
This commit is contained in:
Aldehir Rojas
2026-05-17 07:36:05 -04:00
committed by GitHub
parent a6d6183dbc
commit 39cf5d6191
10 changed files with 1112 additions and 191 deletions
+28 -6
View File
@@ -43,11 +43,33 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
const autoparser & autoparser) {
// Create the result structure
common_chat_params data;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = autoparser.preserved_tokens;
data.prompt = common_chat_template_direct_apply(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = autoparser.preserved_tokens;
auto parser = autoparser.build_parser(inputs);
std::string parser_generation_prompt = data.generation_prompt;
if (inputs.continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !inputs.continue_msg.empty()) {
// Build up generation prompt manually
const auto & msg = inputs.continue_msg;
if (!autoparser.reasoning.start.empty()) {
data.generation_prompt = data.generation_prompt.substr(0, data.generation_prompt.find(autoparser.reasoning.start));
data.generation_prompt += autoparser.reasoning.start + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += autoparser.reasoning.end;
}
}
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = autoparser.build_parser(inputs, parser_generation_prompt);
data.parser = parser.save();
// Build grammar if tools are present
@@ -87,7 +109,7 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
return data;
}
common_peg_arena autoparser::build_parser(const generation_params & inputs) const {
common_peg_arena autoparser::build_parser(const generation_params & inputs, const std::string & generation_prompt) const {
if (!analysis_complete) {
throw std::invalid_argument("Cannot call build_parser on autoparser without performing analysis first, call analyze_template(...)");
}
@@ -121,7 +143,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
} else {
parser = content.build_parser(ctx);
}
return pure_content ? p.prefix(inputs.generation_prompt, reasoning.start) + parser : p.prefix(inputs.generation_prompt, reasoning.start) << parser;
return pure_content ? p.prefix(generation_prompt, reasoning.start) + parser : p.prefix(generation_prompt, reasoning.start) << parser;
});
}
+10 -5
View File
@@ -60,16 +60,21 @@ struct generation_params {
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_AUTO;
bool stream = true;
std::string grammar;
bool add_generation_prompt = false;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
std::string generation_prompt;
bool add_generation_prompt = false;
common_chat_continuation continue_final_message = COMMON_CHAT_CONTINUATION_NONE;
common_chat_msg continue_msg;
bool enable_thinking = true;
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
json extra_context;
bool add_bos = false;
bool add_eos = false;
bool is_inference = true;
bool add_inference = false;
bool mark_input = true; // whether to mark input strings in the jinja context
bool has_continuation() const {
return continue_final_message != COMMON_CHAT_CONTINUATION_NONE && !continue_msg.empty();
}
};
// ============================================================================
@@ -386,7 +391,7 @@ struct autoparser {
void analyze_template(const common_chat_template & tmpl);
// Build the PEG parser for this template
common_peg_arena build_parser(const generation_params & inputs) const;
common_peg_arena build_parser(const generation_params & inputs, const std::string & generation_prompt) const;
private:
// Collect tokens from entire analysis to preserve
+1 -1
View File
@@ -785,7 +785,7 @@ common_peg_parser common_chat_peg_builder::prefix(const std::string & s, const s
if (delimiter.empty()) {
return literal(s);
}
return literal(s.substr(0, s.rfind(delimiter)));
return literal(s.substr(0, s.find(delimiter)));
}
common_peg_parser common_chat_peg_builder::optspace(const std::string & tag) {
+214 -41
View File
@@ -70,6 +70,26 @@ static bool has_content_or_tool_calls(const common_chat_msg & msg) {
return !msg.content.empty() || !msg.tool_calls.empty();
}
std::string common_chat_msg::render_content(const std::string & delimiter) const {
if (!content.empty() && !content_parts.empty()) {
throw std::runtime_error("Cannot specify both content and content_parts");
}
if (!content.empty()) {
return content;
}
std::string text;
for (const auto & part : content_parts) {
if (part.type == "text") {
if (!text.empty()) {
text += delimiter;
}
text += part.text;
}
}
return text;
}
json common_chat_msg::to_json_oaicompat(bool concat_typed_text) const {
if (!content.empty() && !content_parts.empty()) {
throw std::runtime_error("Cannot specify both content and content_parts");
@@ -451,6 +471,22 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
return result;
}
common_chat_continuation common_chat_continuation_parse(const nlohmann::ordered_json & value) {
if (value.is_boolean() && value.get<bool>()) {
return COMMON_CHAT_CONTINUATION_AUTO;
}
if (value.is_string()) {
auto value_str = value.get<std::string>();
if (value_str == "reasoning_content") {
return COMMON_CHAT_CONTINUATION_REASONING;
}
if (value_str == "content") {
return COMMON_CHAT_CONTINUATION_CONTENT;
}
}
return COMMON_CHAT_CONTINUATION_NONE;
}
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
if (use_jinja) {
try {
@@ -811,6 +847,36 @@ std::string common_chat_template_direct_apply(
return common_chat_template_direct_apply_impl(tmpl, inputs, std::nullopt, std::nullopt, std::nullopt);
}
static std::string common_chat_template_generation_prompt_impl(
const common_chat_template & tmpl,
const autoparser::generation_params & inputs,
const std::optional<json> & messages_override = std::nullopt,
const std::optional<json> & tools_override = std::nullopt,
const std::optional<json> & additional_context = std::nullopt) {
auto adjusted_messages = messages_override ? *messages_override : inputs.messages;
autoparser::generation_params params = inputs;
params.add_generation_prompt = false;
params.continue_final_message = COMMON_CHAT_CONTINUATION_NONE;
std::string no_gen_prompt = common_chat_template_direct_apply_impl(tmpl, params, adjusted_messages, tools_override, additional_context);
params.add_generation_prompt = true;
std::string gen_prompt = common_chat_template_direct_apply_impl(tmpl, params, adjusted_messages, tools_override, additional_context);
size_t prefix_len = 0;
size_t min_size = std::min(no_gen_prompt.size(), gen_prompt.size());
while (prefix_len < min_size && no_gen_prompt[prefix_len] == gen_prompt[prefix_len]) {
prefix_len++;
}
return gen_prompt.substr(prefix_len);
}
std::string common_chat_template_generation_prompt(
const common_chat_template & tmpl,
const autoparser::generation_params & inputs) {
return common_chat_template_generation_prompt_impl(tmpl, inputs, std::nullopt, std::nullopt, std::nullopt);
}
static common_chat_params common_chat_params_init_ministral_3(const common_chat_template & tmpl,
const autoparser::generation_params & inputs) {
common_chat_params data;
@@ -863,6 +929,7 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
data.thinking_start_tag = "[THINK]";
data.thinking_end_tag = "[/THINK]";
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs, /* messages_override = */ adjusted_messages);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, /* messages_override = */ adjusted_messages);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = {
"[THINK]",
@@ -871,8 +938,19 @@ static common_chat_params common_chat_params_init_ministral_3(const common_chat_
"[ARGS]",
};
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = "[THINK]" + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += "[/THINK]" + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto generation_prompt = p.prefix(inputs.generation_prompt, "[THINK]");
auto generation_prompt = p.eps();
auto reasoning =
extract_reasoning ? p.optional("[THINK]" + p.reasoning(p.until("[/THINK]")) + "[/THINK]") : p.eps();
@@ -963,6 +1041,7 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
}
data.prompt = prompt;
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, /* messages_override= */ adjusted_messages);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
@@ -972,6 +1051,18 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
"<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
};
// Adjust prompt for continuation
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = "<|start|>assistant<|channel|>analysis<|message|>" + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += "<|end|><|start|>assistant<|channel|>final<|message|>" + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
auto include_grammar = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
@@ -1080,12 +1171,14 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
if (inputs.add_generation_prompt && string_ends_with(data.prompt, "<turn|>\n")) {
// This may happen if the model generates content + tool_call, the
// template does not add the model's next turn and confuses the model
// from emitting its proper reasoning token sequence.
data.prompt += "<|turn>model\n";
data.generation_prompt = "<|turn>model\n";
data.prompt += data.generation_prompt;
}
data.format = COMMON_CHAT_FORMAT_PEG_GEMMA4;
@@ -1101,13 +1194,25 @@ static common_chat_params common_chat_params_init_gemma4(const common_chat_templ
"<|turn>",
};
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = string_ends_with(data.prompt, "<turn|>\n") ? "<|turn>model\n" : "";
data.generation_prompt += "<|channel>thought\n" + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += "<channel|>" + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
auto include_grammar = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto start = p.rule("start", p.prefix(inputs.generation_prompt, "<|channel>"));
auto start = p.rule("start", p.optional(p.literal("<|turn>model\n")));
if (extract_reasoning) {
p.rule("thought", p.literal("<|channel>thought") + p.space() + p.reasoning(p.until("<channel|>")) + p.literal("<channel|>"));
@@ -1224,15 +1329,22 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = {
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.preserved_tokens = {
">>>all",
};
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = "<|start_header_id|>assistant<|end_header_id|>\n\n>>>all\n" + msg.render_content();
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
// Functionary v3.2 format:
// - Normal content: >>>all\n{content}
@@ -1244,7 +1356,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
// When no tools, content goes until end
auto content_until_tool = p.literal("all\n") + p.content(p.until(">>>"));
auto content_until_end = p.literal("all\n") + p.content(p.rest());
auto generation_prompt = p.literal(inputs.generation_prompt);
auto generation_prompt = p.literal("<|start_header_id|>assistant<|end_header_id|>\n\n>>>");
// If no tools or tool_choice is NONE, just parse content
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
@@ -1318,9 +1430,10 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.preserved_tokens = {
"<|tool_calls_section_begin|>",
"<|tool_calls_section_end|>",
@@ -1343,10 +1456,22 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
const std::string GEN_PROMPT = "<|im_assistant|>assistant<|im_middle|>";
data.thinking_start_tag = THINK_START;
data.thinking_end_tag = THINK_END;
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = GEN_PROMPT + THINK_START + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += THINK_END + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
// Kimi K2 Thinking format:
// - Reasoning: <think>{reasoning}</think>
@@ -1366,7 +1491,7 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
auto reasoning = extract_reasoning ? p.optional(THINK_START + p.reasoning(
p.until_one_of({ THINK_END, "<|tool_calls_section_begin|>", "<|tool_call_begin|>" })) +
p.optional(p.literal(THINK_END))) : p.eps();
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
auto generation_prompt = p.literal(GEN_PROMPT);
// Content only parser (no tools)
@@ -1442,6 +1567,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.preserved_tokens = {
@@ -1461,12 +1587,24 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
const std::string TOOL_CALL_END = "<|tool_call_end|>";
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
data.thinking_start_tag = THINK_START;
data.thinking_end_tag = THINK_END;
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = GEN_PROMPT + THINK_START + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += THINK_END + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
auto generation_prompt = p.literal(GEN_PROMPT);
auto end = p.end();
auto reasoning = p.eps();
@@ -1521,6 +1659,7 @@ static common_chat_params common_chat_params_init_lfm2_5(const common_chat_templ
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.preserved_tokens = {
@@ -1536,12 +1675,24 @@ static common_chat_params common_chat_params_init_lfm2_5(const common_chat_templ
const std::string THINK_START = "<think>";
const std::string THINK_END = "</think>";
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
data.thinking_start_tag = THINK_START;
data.thinking_end_tag = THINK_END;
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = GEN_PROMPT + THINK_START + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += THINK_END + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
auto generation_prompt = p.literal(GEN_PROMPT);
auto end = p.end();
auto reasoning = p.eps();
@@ -1592,6 +1743,7 @@ static common_chat_params common_chat_params_init_gigachat_v3(
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = false;
data.preserved_tokens = {
@@ -1599,6 +1751,12 @@ static common_chat_params common_chat_params_init_gigachat_v3(
"<|role_sep|>\n",
};
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = "assistant<|role_sep|>\n" + msg.render_content();
data.prompt += data.generation_prompt;
}
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
const auto *tool_call_start_prefix = "<|message_sep|>\n\nfunction call<|role_sep|>\n";
@@ -1634,7 +1792,7 @@ static common_chat_params common_chat_params_init_gigachat_v3(
ret = p.content(p.rest());
}
return p.literal(inputs.generation_prompt) + ret;
return p.literal("assistant<|role_sep|>\n") + ret;
});
data.parser = parser.save();
@@ -1662,12 +1820,13 @@ static common_chat_params common_chat_params_init_deepseek_v3_2(const common_cha
const autoparser::generation_params & inputs) {
common_chat_params data;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.thinking_start_tag = "<think>";
data.thinking_end_tag = "</think>";
data.preserved_tokens = {
data.preserved_tokens = {
"DSML",
"<think>",
"</think>",
@@ -1687,9 +1846,21 @@ static common_chat_params common_chat_params_init_deepseek_v3_2(const common_cha
const std::string INVOKE_END = "</" + DSML + "invoke>";
const std::string PARAM_START = "<" + DSML + "parameter";
const std::string PARAM_END = "</" + DSML + "parameter>";
const std::string GEN_PROMPT = "<Assistant>";
if (inputs.has_continuation()) {
const auto & msg = inputs.continue_msg;
data.generation_prompt = GEN_PROMPT + THINK_START + msg.reasoning_content;
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
data.generation_prompt += THINK_END + msg.render_content();
}
data.prompt += data.generation_prompt;
}
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
auto generation_prompt = p.literal(GEN_PROMPT);
auto end = p.end();
auto reasoning = p.eps();
@@ -2116,21 +2287,6 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
return std::nullopt;
}
static std::string common_chat_templates_generation_prompt(const common_chat_template & tmpl, const autoparser::generation_params & inputs) {
autoparser::generation_params params = inputs;
params.add_generation_prompt = false;
std::string no_gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);
params.add_generation_prompt = true;
std::string gen_prompt = common_chat_template_direct_apply_impl(tmpl, params);
size_t prefix_len = 0;
size_t min_size = std::min(no_gen_prompt.size(), gen_prompt.size());
while (prefix_len < min_size && no_gen_prompt[prefix_len] == gen_prompt[prefix_len]) {
prefix_len++;
}
return gen_prompt.substr(prefix_len);
}
static common_chat_params common_chat_templates_apply_jinja(const struct common_chat_templates * tmpls,
const struct common_chat_templates_inputs & inputs) {
autoparser::generation_params params;
@@ -2149,6 +2305,27 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
params.add_bos = tmpls->add_bos;
params.add_eos = tmpls->add_eos;
params.continue_final_message = inputs.continue_final_message;
if (params.continue_final_message != COMMON_CHAT_CONTINUATION_NONE) {
params.add_generation_prompt = false;
if (!inputs.messages.empty()) {
// Render messages[:-1] and store continuation message separately
params.continue_msg = inputs.messages.back();
params.messages.erase(params.messages.size() - 1);
}
if (params.continue_final_message == COMMON_CHAT_CONTINUATION_AUTO && !inputs.messages.empty()) {
// Resolve based on message content
params.continue_final_message = COMMON_CHAT_CONTINUATION_CONTENT;
if (!params.continue_msg.reasoning_content.empty() &&
params.continue_msg.content.empty() &&
params.continue_msg.content_parts.empty()) {
params.continue_final_message = COMMON_CHAT_CONTINUATION_REASONING;
}
}
}
if (src.find("<|channel|>") == std::string::npos) {
// map developer to system for all models except for GPT-OSS
workaround::map_developer_role_to_system(params.messages);
@@ -2169,8 +2346,6 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
workaround::func_args_not_string(params.messages);
}
params.generation_prompt = common_chat_templates_generation_prompt(tmpl, params);
params.extra_context = common_chat_extra_context();
for (auto el : inputs.chat_template_kwargs) {
params.extra_context[el.first] = json::parse(el.second);
@@ -2200,17 +2375,16 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
auto params_copy = params;
params_copy.reasoning_format = COMMON_REASONING_FORMAT_NONE;
data.prompt = common_chat_template_direct_apply_impl(tmpl, params_copy);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, params);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.generation_prompt = params.generation_prompt;
auto parser = build_chat_peg_parser([&params](common_chat_peg_builder &p) {
return p.prefix(params.generation_prompt) << p.content(p.rest());
auto parser = build_chat_peg_parser([&data](common_chat_peg_builder &p) {
return p.literal(data.generation_prompt) << p.content(p.rest());
});
data.parser = parser.save();
return data;
}
if (auto result = common_chat_try_specialized_template(tmpl, src, params)) {
result->generation_prompt = params.generation_prompt;
return *result;
}
@@ -2224,7 +2398,6 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
auto_params.thinking_start_tag = trim_whitespace(autoparser.reasoning.start);
auto_params.thinking_end_tag = trim_whitespace(autoparser.reasoning.end);
}
auto_params.generation_prompt = params.generation_prompt;
common_peg_arena arena;
arena.load(auto_params.parser);
LOG_DBG("%s: generated parser:\n%s\n\nparser generation prompt: %s\n", __func__, arena.dump(arena.root()).c_str(), auto_params.generation_prompt.c_str());
+21 -2
View File
@@ -89,6 +89,8 @@ struct common_chat_msg {
nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
std::string render_content(const std::string & delimiter = "\n\n") const;
bool empty() const {
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() &&
tool_name.empty() && tool_call_id.empty();
@@ -164,12 +166,22 @@ enum common_chat_format {
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
};
// Continuation method provided via `continue_final_message`
enum common_chat_continuation {
COMMON_CHAT_CONTINUATION_NONE,
COMMON_CHAT_CONTINUATION_AUTO,
COMMON_CHAT_CONTINUATION_REASONING,
COMMON_CHAT_CONTINUATION_CONTENT,
};
struct common_chat_templates_inputs {
std::vector<common_chat_msg> messages;
std::string grammar;
std::string json_schema;
bool add_generation_prompt = true;
bool use_jinja = true;
bool add_generation_prompt = true;
common_chat_continuation continue_final_message = COMMON_CHAT_CONTINUATION_NONE;
bool use_jinja = true;
// Parameters below only supported when use_jinja is true
std::vector<common_chat_tool> tools;
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
@@ -207,6 +219,7 @@ struct common_chat_parser_params {
bool reasoning_in_content = false;
std::string generation_prompt;
bool parse_tool_calls = true;
bool echo = false; // Include assistant prefilled msg in output
bool debug = false; // Enable debug output for PEG parser
common_peg_arena parser = {};
common_chat_parser_params() = default;
@@ -267,6 +280,8 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::or
std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools);
common_chat_continuation common_chat_continuation_parse(const nlohmann::ordered_json & value);
// DEPRECATED: only used in tests
nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
@@ -279,6 +294,10 @@ std::string common_chat_template_direct_apply(
const common_chat_template & tmpl,
const autoparser::generation_params & inputs);
std::string common_chat_template_generation_prompt(
const common_chat_template & tmpl,
const autoparser::generation_params & inputs);
std::optional<common_chat_params> common_chat_try_specialized_template(
const common_chat_template & tmpl,
const std::string & src,