DeepSeek-V3/tokenizer_config.json
Ritk Patel d3de9e8d1f Fix apply_chat_template for function calling (Issue #860)
This commit fixes the chat template to properly handle function calls by:
1. Using safe dictionary access with .get()
2. Converting function arguments to JSON with |tojson filter
3. Adding better empty content handling

Fixes #860
2025-05-21 14:13:26 +05:30

35 lines
2.8 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"add_bos_token": false,
"add_eos_token": false,
"bos_token": {
"__type": "AddedToken",
"content": "<begin of sentence>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"chat_template": "{% set ns = namespace(found=false, is_first=false, is_tool=false) %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{ '<begin of sentence><system>' + message['content'] + '<end of sentence>' }}\n {%- elif message['role'] == 'user' %}\n {%- if message.get('content') and message.get('tool_results') %}\n {{ '<begin of sentence><User>' + message['content'] }}\n {%- for result in message['tool_results'] %}\n {{ '\n<tool result begin>' + result['name'] + '\n```json\n' + result['result'] + '\n```<tool result end>' }}\n {%- endfor %}\n {{ '<end of sentence>' }}\n {%- elif message.get('content') %}\n {{ '<begin of sentence><User>' + message['content'] + '<end of sentence>' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {%- if message.get('content') is none or message.get('content') == '' %}\n {%- set ns.is_tool = false -%}\n {%- for tool in message.get('tool_calls', []) %}\n {%- if not ns.is_first %}\n {{ '<Assistant><tool calls begin><tool call begin>' + tool['type'] + '<tool sep>' + tool['function']['name'] + '\n```json\n' + (tool['function']['arguments']|tojson) + '\n```<tool call end>' }}\n {%- set ns.is_first = true -%}\n {%- else %}\n {{ '\n<tool call begin>' + tool['type'] + '<tool sep>' + tool['function']['name'] + '\n```json\n' + (tool['function']['arguments']|tojson) + '\n```<tool call end>' }}\n {{ '<tool calls end><end of sentence>' }}\n {%- endif %}\n {%- endfor %}\n {%- else %}\n {{ '<begin of sentence><Assistant>' + message['content'] + '<end of sentence>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{ '<begin of sentence><Assistant>' }}\n{%- endif %}\n",
"clean_up_tokenization_spaces": true,
"model_max_length": 131072,
"eos_token": {
"__type": "AddedToken",
"content": "<end of sentence>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"tokenizer_class": "CodeLlamaTokenizer",
"pad_token": null,
"special_tokens_map_file": null,
"tokenizer_file": "tokenizer.json",
"unk_token": {
"__type": "AddedToken",
"content": "<unknown>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}