函数调用与工具使用
本文档整理函数调用(Function Calling)和工具使用的最佳实践。
核心概念
txt
┌─────────────────────────────────────────────────────┐
│ 函数调用流程 │
├─────────────────────────────────────────────────────┤
│ │
│ ┌─────────┐ │
│ │ 用户请求 │ │
│ └────┬────┘ │
│ ↓ │
│ ┌─────────────────┐ │
│ │ 模型分析请求 │ │
│ │ 判断是否需要调用 │ │
│ └────────┬────────┘ │
│ │ │
│ ┌─────┴─────┐ │
│ ↓ ↓ │
│ 需要 不需要 │
│ 调用 调用 │
│ │ │ │
│ ↓ ↓ │
│ ┌─────────┐ ┌─────────┐ │
│ │生成调用 │ │直接响应 │ │
│ │参数 │ │ │ │
│ └────┬────┘ └────┬────┘ │
│ │ │ │
│ ↓ │ │
│ ┌─────────┐ │ │
│ │执行函数 │ │ │
│ └────┬────┘ │ │
│ │ │ │
│ ↓ │ │
│ ┌─────────┐ │ │
│ │返回结果 │ │ │
│ └────┬────┘ │ │
│ │ │ │
│ ↓ │ │
│ ┌─────────────────┴─────────┐ │
│ │ 模型生成最终响应 │ │
│ └───────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────┘函数定义最佳实践
清晰的函数 Schema
python
from typing import Literal, Optional, List
from pydantic import BaseModel, Field
# 使用 Pydantic 定义参数类型
class WeatherQuery(BaseModel):
"""天气查询参数"""
location: str = Field(
...,
description="城市名称,如:北京、上海"
)
date: Optional[str] = Field(
None,
description="日期,格式:YYYY-MM-DD,默认今天"
)
unit: Literal["celsius", "fahrenheit"] = Field(
"celsius",
description="温度单位"
)
# 函数定义
FUNCTION_DEFINITIONS = [
{
"name": "get_weather",
"description": "获取指定城市的天气信息。用于回答关于天气的问题。",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "城市名称,如:北京、上海"
},
"date": {
"type": "string",
"description": "日期,格式:YYYY-MM-DD,默认今天",
"default": None
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "温度单位",
"default": "celsius"
}
},
"required": ["location"]
}
},
{
"name": "search_web",
"description": "搜索网络获取信息。用于回答需要实时信息或最新数据的问题。",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "搜索关键词,简洁明确"
},
"num_results": {
"type": "integer",
"description": "返回结果数量,默认5",
"default": 5,
"minimum": 1,
"maximum": 10
}
},
"required": ["query"]
}
},
{
"name": "execute_code",
"description": "执行Python代码。用于计算、数据处理等需要编程的任务。",
"parameters": {
"type": "object",
"properties": {
"code": {
"type": "string",
"description": "Python代码,必须是完整可执行的"
},
"timeout": {
"type": "integer",
"description": "执行超时时间(秒),默认30",
"default": 30
}
},
"required": ["code"]
}
}
]函数描述原则
yaml
函数描述原则:
明确性:
- 说明函数做什么,不怎么做
- 说明输入输出的预期格式
- 说明使用场景和限制
示例:
好的描述: 获取指定城市的天气信息。用于回答关于天气的问题。
坏的描述: 天气API
参数描述:
- 每个参数都要有描述
- 说明参数的格式要求
- 说明默认值和可选性
- 提供示例值
约束说明:
- 必填参数用 required 标记
- 使用 enum 限制可选值
- 使用 minimum/maximum 限制数值范围
- 使用 pattern 限制字符串格式函数执行框架
统一执行器
python
from dataclasses import dataclass
from typing import Any, Callable, Dict, Optional
from enum import Enum
import json
import asyncio
class ExecutionStatus(Enum):
SUCCESS = "success"
ERROR = "error"
TIMEOUT = "timeout"
INVALID_INPUT = "invalid_input"
@dataclass
class ExecutionResult:
status: ExecutionStatus
data: Any
error: Optional[str] = None
execution_time_ms: int = 0
class FunctionExecutor:
def __init__(
self,
timeout: int = 30,
max_retries: int = 2,
validate_input: bool = True
):
self.timeout = timeout
self.max_retries = max_retries
self.validate_input = validate_input
self.functions: Dict[str, Callable] = {}
self.validators: Dict[str, Callable] = {}
def register(
self,
name: str,
func: Callable,
validator: Optional[Callable] = None
):
"""注册函数"""
self.functions[name] = func
if validator:
self.validators[name] = validator
async def execute(
self,
name: str,
arguments: Dict[str, Any]
) -> ExecutionResult:
"""执行函数"""
import time
start_time = time.time()
# 检查函数是否存在
if name not in self.functions:
return ExecutionResult(
status=ExecutionStatus.ERROR,
data=None,
error=f"Unknown function: {name}"
)
# 验证输入
if self.validate_input and name in self.validators:
validation_error = self.validators[name](arguments)
if validation_error:
return ExecutionResult(
status=ExecutionStatus.INVALID_INPUT,
data=None,
error=validation_error
)
# 执行函数(带重试)
last_error = None
for attempt in range(self.max_retries + 1):
try:
# 带超时执行
func = self.functions[name]
if asyncio.iscoroutinefunction(func):
result = await asyncio.wait_for(
func(**arguments),
timeout=self.timeout
)
else:
result = await asyncio.wait_for(
asyncio.to_thread(func, **arguments),
timeout=self.timeout
)
execution_time = int((time.time() - start_time) * 1000)
return ExecutionResult(
status=ExecutionStatus.SUCCESS,
data=result,
execution_time_ms=execution_time
)
except asyncio.TimeoutError:
last_error = f"Function {name} timed out after {self.timeout}s"
except Exception as e:
last_error = str(e)
execution_time = int((time.time() - start_time) * 1000)
return ExecutionResult(
status=ExecutionStatus.TIMEOUT if "timeout" in str(last_error).lower() else ExecutionStatus.ERROR,
data=None,
error=last_error,
execution_time_ms=execution_time
)
# 使用示例
executor = FunctionExecutor(timeout=30, max_retries=2)
# 注册函数
def get_weather_impl(location: str, date: Optional[str] = None, unit: str = "celsius"):
# 实现天气查询
pass
def validate_weather_input(args: Dict[str, Any]) -> Optional[str]:
if not args.get("location"):
return "location is required"
if "date" in args:
# 验证日期格式
pass
return None
executor.register("get_weather", get_weather_impl, validate_weather_input)工具选择策略
自动工具选择
python
from typing import List, Dict, Any
from dataclasses import dataclass
@dataclass
class Tool:
name: str
description: str
parameters: Dict[str, Any]
keywords: List[str]
priority: int # 越高越优先
class ToolSelector:
def __init__(self, tools: List[Tool]):
self.tools = tools
self.tool_map = {t.name: t for t in tools}
def select_tools(self, query: str, max_tools: int = 3) -> List[str]:
"""选择相关工具"""
query_lower = query.lower()
scores = []
for tool in self.tools:
score = 0
# 关键词匹配
for keyword in tool.keywords:
if keyword in query_lower:
score += 10
# 语义相似度(简化版)
# 实际应用中可以使用 embedding 相似度
description_words = set(tool.description.lower().split())
query_words = set(query_lower.split())
overlap = len(description_words & query_words)
score += overlap * 5
# 优先级加成
score += tool.priority
scores.append((tool.name, score))
# 排序并返回 top-k
scores.sort(key=lambda x: x[1], reverse=True)
return [name for name, score in scores[:max_tools]]
def get_tool_definitions(self, tool_names: List[str]) -> List[Dict]:
"""获取工具定义"""
definitions = []
for name in tool_names:
if name in self.tool_map:
tool = self.tool_map[name]
definitions.append({
"name": tool.name,
"description": tool.description,
"parameters": tool.parameters
})
return definitions
# 使用示例
tools = [
Tool(
name="get_weather",
description="获取天气信息",
parameters={...},
keywords=["天气", "气温", "下雨", "晴天", "weather"],
priority=10
),
Tool(
name="search_web",
description="搜索网络信息",
parameters={...},
keywords=["搜索", "查找", "查询", "search", "find"],
priority=8
),
Tool(
name="execute_code",
description="执行代码",
parameters={...},
keywords=["计算", "编程", "代码", "calculate", "code", "python"],
priority=5
),
]
selector = ToolSelector(tools)
selected = selector.select_tools("北京今天天气怎么样?")
definitions = selector.get_tool_definitions(selected)多步骤工具调用
链式调用
python
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
@dataclass
class ToolCall:
name: str
arguments: Dict[str, Any]
result: Optional[Any] = None
class ChainedToolExecutor:
def __init__(self, model, executor: FunctionExecutor, max_steps: int = 5):
self.model = model
self.executor = executor
self.max_steps = max_steps
async def execute(self, query: str) -> str:
"""执行链式工具调用"""
conversation = [{"role": "user", "content": query}]
tool_calls: List[ToolCall] = []
for step in range(self.max_steps):
# 调用模型
response = await self.model.generate_with_tools(
conversation,
tools=FUNCTION_DEFINITIONS
)
# 检查是否需要工具调用
if not response.tool_calls:
return response.content
# 执行工具调用
for tool_call in response.tool_calls:
result = await self.executor.execute(
tool_call.name,
tool_call.arguments
)
tool_calls.append(ToolCall(
name=tool_call.name,
arguments=tool_call.arguments,
result=result.data if result.status == ExecutionStatus.SUCCESS else result.error
))
# 将结果添加到对话
conversation.append({
"role": "assistant",
"tool_calls": [{
"id": tool_call.id,
"name": tool_call.name,
"arguments": tool_call.arguments
}]
})
conversation.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result.data, ensure_ascii=False) if result.data else result.error
})
return "达到最大调用次数,请简化请求"
# 使用示例
async def run_chain():
model = ModelClient()
executor = FunctionExecutor()
chain = ChainedToolExecutor(model, executor, max_steps=5)
result = await chain.execute("帮我查一下北京今天的天气,然后告诉我是否适合出门运动")
print(result)并行调用
python
class ParallelToolExecutor:
def __init__(self, model, executor: FunctionExecutor):
self.model = model
self.executor = executor
async def execute(self, query: str) -> str:
"""执行并行工具调用"""
# 第一轮:获取所有需要的工具调用
response = await self.model.generate_with_tools(
[{"role": "user", "content": query}],
tools=FUNCTION_DEFINITIONS,
parallel_tool_calls=True # 允许并行调用
)
if not response.tool_calls:
return response.content
# 并行执行所有工具调用
tasks = []
for tool_call in response.tool_calls:
task = self.executor.execute(tool_call.name, tool_call.arguments)
tasks.append((tool_call.id, tool_call.name, task))
results = await asyncio.gather(*[t[2] for t in tasks])
# 构建结果消息
tool_results = []
for (tool_id, tool_name, _), result in zip(tasks, results):
tool_results.append({
"tool_call_id": tool_id,
"content": json.dumps(result.data, ensure_ascii=False) if result.data else result.error
})
# 第二轮:生成最终响应
conversation = [
{"role": "user", "content": query},
{"role": "assistant", "tool_calls": response.tool_calls},
*[{"role": "tool", **r} for r in tool_results]
]
final_response = await self.model.generate(conversation)
return final_response.content
# 使用示例
async def run_parallel():
model = ModelClient()
executor = FunctionExecutor()
parallel = ParallelToolExecutor(model, executor)
result = await parallel.execute("比较北京、上海、广州今天的天气")
print(result)安全最佳实践
输入验证
python
import re
from typing import Dict, Any, List, Optional
class ToolInputValidator:
"""工具输入验证器"""
def __init__(self):
self.dangerous_patterns = [
# SQL 注入
r"(?i)(union\s+select|insert\s+into|delete\s+from|drop\s+table)",
# 命令注入
r"(?i)(;\s*rm|;\s*del|;\s*format|\|\s*cat|\|\s*type)",
# 路径遍历
r"\.\.[\\/]",
# 敏感信息
r"(?i)(password|secret|api[_-]?key|token)\s*[:=]",
]
def validate(self, name: str, arguments: Dict[str, Any]) -> Optional[str]:
"""验证输入参数"""
# 检查函数名
if not self._is_valid_function_name(name):
return f"Invalid function name: {name}"
# 检查参数
for key, value in arguments.items():
if isinstance(value, str):
error = self._validate_string(key, value)
if error:
return error
return None
def _is_valid_function_name(self, name: str) -> bool:
"""验证函数名"""
return bool(re.match(r"^[a-z][a-z0-9_]*$", name))
def _validate_string(self, key: str, value: str) -> Optional[str]:
"""验证字符串参数"""
# 检查危险模式
for pattern in self.dangerous_patterns:
if re.search(pattern, value):
return f"Dangerous pattern detected in parameter {key}"
# 检查长度
if len(value) > 10000:
return f"Parameter {key} exceeds maximum length"
return None
# 敏感操作检查
class SensitiveOperationChecker:
"""敏感操作检查器"""
SENSITIVE_FUNCTIONS = {
"execute_code": ["delete", "remove", "format", "shutdown"],
"file_operations": ["delete", "overwrite"],
"network_operations": ["connect", "send", "upload"],
}
def check(self, name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
"""检查敏感操作"""
result = {
"is_sensitive": False,
"requires_approval": False,
"warnings": []
}
if name in self.SENSITIVE_FUNCTIONS:
result["is_sensitive"] = True
# 检查参数中的敏感关键词
dangerous_keywords = self.SENSITIVE_FUNCTIONS[name]
for key, value in arguments.items():
if isinstance(value, str):
for keyword in dangerous_keywords:
if keyword in value.lower():
result["requires_approval"] = True
result["warnings"].append(
f"Parameter {key} contains sensitive keyword: {keyword}"
)
return result权限控制
python
from enum import Enum
from typing import Set, Dict, List
class Permission(Enum):
READ = "read"
WRITE = "write"
EXECUTE = "execute"
DELETE = "delete"
NETWORK = "network"
class ToolPermissionManager:
"""工具权限管理器"""
def __init__(self):
# 工具默认权限
self.tool_permissions: Dict[str, Set[Permission]] = {
"get_weather": {Permission.READ, Permission.NETWORK},
"search_web": {Permission.READ, Permission.NETWORK},
"execute_code": {Permission.EXECUTE},
"read_file": {Permission.READ},
"write_file": {Permission.WRITE},
"delete_file": {Permission.DELETE},
}
# 用户权限
self.user_permissions: Dict[str, Set[Permission]] = {}
# 角色权限
self.role_permissions: Dict[str, Set[Permission]] = {
"admin": {Permission.READ, Permission.WRITE, Permission.EXECUTE, Permission.DELETE, Permission.NETWORK},
"user": {Permission.READ, Permission.EXECUTE, Permission.NETWORK},
"guest": {Permission.READ},
}
def check_permission(
self,
user_id: str,
tool_name: str,
action: Permission
) -> bool:
"""检查权限"""
# 获取用户权限
user_perms = self.user_permissions.get(user_id, set())
# 获取工具需要的权限
tool_perms = self.tool_permissions.get(tool_name, set())
# 检查是否有所需权限
return action in user_perms and action in tool_perms
def grant_role(self, user_id: str, role: str):
"""授予角色"""
if role in self.role_permissions:
if user_id not in self.user_permissions:
self.user_permissions[user_id] = set()
self.user_permissions[user_id].update(self.role_permissions[role])
def revoke_permission(self, user_id: str, permission: Permission):
"""撤销权限"""
if user_id in self.user_permissions:
self.user_permissions[user_id].discard(permission)
# 使用示例
permission_manager = ToolPermissionManager()
# 授予角色
permission_manager.grant_role("user123", "user")
# 检查权限
can_execute = permission_manager.check_permission(
"user123",
"execute_code",
Permission.EXECUTE
)最佳实践总结
函数设计清单
markdown
## 函数设计检查清单
### 函数定义
- [ ] 函数名清晰、动词开头
- [ ] 描述明确说明用途
- [ ] 参数描述完整
- [ ] 必填参数标记 required
- [ ] 使用 enum 限制可选值
- [ ] 设置合理的默认值
### 输入验证
- [ ] 验证参数类型
- [ ] 验证参数范围
- [ ] 检查危险模式
- [ ] 限制输入长度
### 执行控制
- [ ] 设置超时时间
- [ ] 实现重试机制
- [ ] 处理并发限制
- [ ] 记录执行日志
### 安全措施
- [ ] 实现权限控制
- [ ] 检查敏感操作
- [ ] 脱敏敏感数据
- [ ] 限制调用频率工具调用策略
| 场景 | 策略 | 说明 |
|---|---|---|
| 单一工具 | 直接调用 | 一次性调用,返回结果 |
| 多个独立工具 | 并行调用 | 同时调用,合并结果 |
| 有依赖关系 | 链式调用 | 按顺序调用,传递结果 |
| 复杂任务 | 混合调用 | 根据需要选择策略 |
