mirror of
https://github.com/deepseek-ai/DeepSeek-V3.git
synced 2025-05-22 02:06:45 -04:00
Fix apply_chat_template for function calling (Issue #860)
This commit fixes the chat template to properly handle function calls by: 1. Using safe dictionary access with .get() 2. Converting function arguments to JSON with |tojson filter 3. Adding better empty content handling Fixes #860
This commit is contained in:
parent
4cc6253d5c
commit
d3de9e8d1f
107
demo_function_calling.py
Normal file
107
demo_function_calling.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
"""
|
||||||
|
DeepSeek-V3 Function Calling Demo
|
||||||
|
|
||||||
|
This script demonstrates how to use function calling with the DeepSeek-V3 model
|
||||||
|
after applying the fix for the chat template.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from transformers import AutoTokenizer
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_temperature(location: str) -> float:
|
||||||
|
"""
|
||||||
|
Get the current temperature at a location.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: Get the location of the temperature in the format of "city, country"
|
||||||
|
Returns:
|
||||||
|
Displays the current temperature at the specified location as a floating point number.
|
||||||
|
"""
|
||||||
|
# This is a mock function that would normally call a weather API
|
||||||
|
print(f"Getting temperature for {location}")
|
||||||
|
return 22.0
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_time(timezone: str) -> str:
|
||||||
|
"""
|
||||||
|
Get the current time in a specific timezone.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
timezone: The timezone to get the current time for (e.g., "UTC", "America/New_York")
|
||||||
|
Returns:
|
||||||
|
The current time as a string.
|
||||||
|
"""
|
||||||
|
# This is a mock function that would normally get the current time
|
||||||
|
print(f"Getting time for timezone {timezone}")
|
||||||
|
return "12:30 PM"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Test DeepSeek-V3 function calling")
|
||||||
|
parser.add_argument("--model_path", type=str, required=True,
|
||||||
|
help="Path to the DeepSeek-V3 model")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print(f"Loading tokenizer from {args.model_path}")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
args.model_path, trust_remote_code=True)
|
||||||
|
|
||||||
|
# Example 1: Simple weather query
|
||||||
|
print("\nExample 1: Weather query")
|
||||||
|
tool_call = {"name": "get_current_temperature",
|
||||||
|
"arguments": {"location": "Paris, France"}}
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant that provides weather information."},
|
||||||
|
{"role": "user", "content": "What is the temperature in Paris now?"},
|
||||||
|
{"role": "assistant", "tool_calls": [
|
||||||
|
{"type": "function", "function": tool_call}]},
|
||||||
|
{"role": "user", "content": "Thanks for checking! Is that warm for this time of year?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
print("Processing chat template...")
|
||||||
|
chat_input = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tools=[get_current_temperature],
|
||||||
|
add_generation_prompt=True,
|
||||||
|
tokenize=False,
|
||||||
|
tools_in_user_message=False
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nGenerated chat template:")
|
||||||
|
print("-" * 50)
|
||||||
|
print(chat_input)
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
# Example 2: Multiple tool calls
|
||||||
|
print("\nExample 2: Multiple function calls")
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "What's the time in New York and temperature in Tokyo?"},
|
||||||
|
{"role": "assistant", "tool_calls": [
|
||||||
|
{"type": "function", "function": {"name": "get_current_time",
|
||||||
|
"arguments": {"timezone": "America/New_York"}}},
|
||||||
|
{"type": "function", "function": {
|
||||||
|
"name": "get_current_temperature", "arguments": {"location": "Tokyo, Japan"}}}
|
||||||
|
]},
|
||||||
|
]
|
||||||
|
|
||||||
|
print("Processing chat template...")
|
||||||
|
chat_input = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tools=[get_current_time, get_current_temperature],
|
||||||
|
add_generation_prompt=True,
|
||||||
|
tokenize=False,
|
||||||
|
tools_in_user_message=False
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\nGenerated chat template:")
|
||||||
|
print("-" * 50)
|
||||||
|
print(chat_input)
|
||||||
|
print("-" * 50)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
80
docs/function_calling.md
Normal file
80
docs/function_calling.md
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
# Function Calling with DeepSeek-V3
|
||||||
|
|
||||||
|
This document provides guidance on using function calling with DeepSeek-V3 models.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Function calling allows the model to call external functions through a structured interface. It's particularly useful for:
|
||||||
|
|
||||||
|
- Retrieving real-time information (weather, time, data from APIs)
|
||||||
|
- Performing calculations
|
||||||
|
- Executing actions based on user requests
|
||||||
|
|
||||||
|
## Usage with Transformers
|
||||||
|
|
||||||
|
DeepSeek-V3 supports function calling through the Hugging Face Transformers library. The example below demonstrates how to use this feature:
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
|
||||||
|
# Define your function
|
||||||
|
def get_weather(location: str) -> str:
|
||||||
|
"""Get the weather for a location."""
|
||||||
|
# In a real application, this would call a weather API
|
||||||
|
return f"Sunny, 22°C in {location}"
|
||||||
|
|
||||||
|
# Load the model and tokenizer
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-V3", trust_remote_code=True)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained("deepseek-ai/DeepSeek-V3", trust_remote_code=True)
|
||||||
|
|
||||||
|
# Create a conversation with function calling
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "What's the weather like in Tokyo?"},
|
||||||
|
{"role": "assistant", "tool_calls": [
|
||||||
|
{"type": "function", "function": {"name": "get_weather", "arguments": {"location": "Tokyo, Japan"}}}
|
||||||
|
]},
|
||||||
|
{"role": "user", "content": "Thanks! And what about New York?"}
|
||||||
|
]
|
||||||
|
|
||||||
|
# Apply the chat template
|
||||||
|
inputs = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tools=[get_weather],
|
||||||
|
add_generation_prompt=True,
|
||||||
|
tokenize=True,
|
||||||
|
tools_in_user_message=False
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate a response
|
||||||
|
output_ids = model.generate(inputs, max_new_tokens=100)
|
||||||
|
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Function Definitions
|
||||||
|
|
||||||
|
Functions must have type annotations and docstrings following the OpenAI format:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def function_name(param1: type, param2: type) -> return_type:
|
||||||
|
"""
|
||||||
|
Brief description of what the function does.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
param1: Description of parameter 1
|
||||||
|
param2: Description of parameter 2
|
||||||
|
Returns:
|
||||||
|
Description of what is returned
|
||||||
|
"""
|
||||||
|
# Function implementation
|
||||||
|
pass
|
||||||
|
```
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- Function parameters must be JSON-serializable types
|
||||||
|
- Function return values should also be JSON-serializable
|
||||||
|
- Complex object types are not directly supported
|
||||||
|
|
||||||
|
For more advanced use cases, please refer to the Hugging Face documentation on function calling.
|
126
test_fix.py
Normal file
126
test_fix.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
from transformers import AutoTokenizer
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Function to test
|
||||||
|
|
||||||
|
|
||||||
|
def get_current_temperature(location: str) -> float:
|
||||||
|
"""
|
||||||
|
Get the current temperature at a location.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: Get the location of the temperature in the format of "city, country"
|
||||||
|
Returns:
|
||||||
|
Displays the current temperature at the specified location as a floating point number (in the specified unit).
|
||||||
|
"""
|
||||||
|
return 22.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_original_tokenizer(model_path):
|
||||||
|
print("Testing with original tokenizer...")
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
model_path, trust_remote_code=True)
|
||||||
|
|
||||||
|
tool_call = {"name": "get_current_temperature",
|
||||||
|
"arguments": {"location": "Paris, France"}}
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a robot that responds to weather queries."},
|
||||||
|
{"role": "user", "content": "What is the temperature in Paris now?"},
|
||||||
|
{"role": "assistant", "tool_calls": [
|
||||||
|
{"type": "function", "function": tool_call}]},
|
||||||
|
]
|
||||||
|
|
||||||
|
inputs = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tools=[get_current_temperature],
|
||||||
|
add_generation_prompt=False,
|
||||||
|
tokenize=False,
|
||||||
|
tools_in_user_message=False
|
||||||
|
)
|
||||||
|
print("Success with original tokenizer!")
|
||||||
|
print(inputs)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with original tokenizer: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_fixed_tokenizer(model_path, fixed_config_path):
|
||||||
|
print("Testing with fixed tokenizer config...")
|
||||||
|
try:
|
||||||
|
# Read the original tokenizer files
|
||||||
|
tokenizer_json_path = os.path.join(model_path, "tokenizer.json")
|
||||||
|
if not os.path.exists(tokenizer_json_path):
|
||||||
|
print(f"Error: tokenizer.json not found at {tokenizer_json_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Copy the tokenizer.json and use our fixed config
|
||||||
|
fixed_dir = "fixed_tokenizer"
|
||||||
|
os.makedirs(fixed_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Copy tokenizer.json
|
||||||
|
import shutil
|
||||||
|
shutil.copy(tokenizer_json_path, os.path.join(
|
||||||
|
fixed_dir, "tokenizer.json"))
|
||||||
|
|
||||||
|
# Create fixed tokenizer_config.json
|
||||||
|
with open(fixed_config_path, 'r') as f:
|
||||||
|
fixed_config = f.read()
|
||||||
|
|
||||||
|
with open(os.path.join(fixed_dir, "tokenizer_config.json"), 'w') as f:
|
||||||
|
f.write(fixed_config)
|
||||||
|
|
||||||
|
# Load the fixed tokenizer
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
fixed_dir, trust_remote_code=True)
|
||||||
|
|
||||||
|
tool_call = {"name": "get_current_temperature",
|
||||||
|
"arguments": {"location": "Paris, France"}}
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a robot that responds to weather queries."},
|
||||||
|
{"role": "user", "content": "What is the temperature in Paris now?"},
|
||||||
|
{"role": "assistant", "tool_calls": [
|
||||||
|
{"type": "function", "function": tool_call}]},
|
||||||
|
]
|
||||||
|
|
||||||
|
inputs = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tools=[get_current_temperature],
|
||||||
|
add_generation_prompt=False,
|
||||||
|
tokenize=False,
|
||||||
|
tools_in_user_message=False
|
||||||
|
)
|
||||||
|
print("Success with fixed tokenizer!")
|
||||||
|
print(inputs)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error with fixed tokenizer: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: python test_fix.py /path/to/DeepSeek-V3 /path/to/fixed_config.json")
|
||||||
|
return
|
||||||
|
|
||||||
|
model_path = sys.argv[1]
|
||||||
|
fixed_config_path = sys.argv[2] if len(
|
||||||
|
sys.argv) > 2 else "tokenizer_config.json"
|
||||||
|
|
||||||
|
# Test with original tokenizer (should fail)
|
||||||
|
original_success = test_with_original_tokenizer(model_path)
|
||||||
|
|
||||||
|
# Test with fixed tokenizer (should succeed)
|
||||||
|
fixed_success = test_with_fixed_tokenizer(model_path, fixed_config_path)
|
||||||
|
|
||||||
|
if not original_success and fixed_success:
|
||||||
|
print("\n✅ Fix was successful! The issue has been resolved.")
|
||||||
|
else:
|
||||||
|
print("\n❌ Testing did not confirm the fix. Please check the logs above.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
35
tokenizer_config.json
Normal file
35
tokenizer_config.json
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": false,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"bos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|begin of sentence|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"chat_template": "{% set ns = namespace(found=false, is_first=false, is_tool=false) %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{ '<|begin of sentence|><|system|>' + message['content'] + '<|end of sentence|>' }}\n {%- elif message['role'] == 'user' %}\n {%- if message.get('content') and message.get('tool_results') %}\n {{ '<|begin of sentence|><|User|>' + message['content'] }}\n {%- for result in message['tool_results'] %}\n {{ '\n<|tool result begin|>' + result['name'] + '\n```json\n' + result['result'] + '\n```<|tool result end|>' }}\n {%- endfor %}\n {{ '<|end of sentence|>' }}\n {%- elif message.get('content') %}\n {{ '<|begin of sentence|><|User|>' + message['content'] + '<|end of sentence|>' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {%- if message.get('content') is none or message.get('content') == '' %}\n {%- set ns.is_tool = false -%}\n {%- for tool in message.get('tool_calls', []) %}\n {%- if not ns.is_first %}\n {{ '<|Assistant|><|tool calls begin|><|tool call begin|>' + tool['type'] + '<|tool sep|>' + tool['function']['name'] + '\n```json\n' + (tool['function']['arguments']|tojson) + '\n```<|tool call end|>' }}\n {%- set ns.is_first = true -%}\n {%- else %}\n {{ '\n<|tool call begin|>' + tool['type'] + '<|tool sep|>' + tool['function']['name'] + '\n```json\n' + (tool['function']['arguments']|tojson) + '\n```<|tool call end|>' }}\n {{ '<|tool calls end|><|end of sentence|>' }}\n {%- endif %}\n {%- endfor %}\n {%- else %}\n {{ '<|begin of sentence|><|Assistant|>' + message['content'] + '<|end of sentence|>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{ '<|begin of sentence|><|Assistant|>' }}\n{%- endif %}\n",
|
||||||
|
"clean_up_tokenization_spaces": true,
|
||||||
|
"model_max_length": 131072,
|
||||||
|
"eos_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|end of sentence|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"tokenizer_class": "CodeLlamaTokenizer",
|
||||||
|
"pad_token": null,
|
||||||
|
"special_tokens_map_file": null,
|
||||||
|
"tokenizer_file": "tokenizer.json",
|
||||||
|
"unk_token": {
|
||||||
|
"__type": "AddedToken",
|
||||||
|
"content": "<|unknown|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": true,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user