OpenAI Chat Completions —— 对话、流式、多模态
目标
- 掌握 Chat Completions API 的核心用法
- 实现多轮对话、流式输出
- 使用 Vision 理解图像
完整代码
import os
from openai import OpenAI
# 初始化客户端(自动从环境变量读取 OPENAI_API_KEY)
client = OpenAI()
# ============================================================
# 1. 基础 Chat Completion
# ============================================================
response = client.chat.completions.create(
model="gpt-4o-mini", # 性价比最高的模型
messages=[
{"role": "system", "content": "你是一个幽默且知识渊博的助手。"},
{"role": "user", "content": "用一句话解释什么是量子计算?"},
],
temperature=0.7,
max_tokens=200,
)
print(response.choices[0].message.content)
# 输出:量子计算就像同时抛一万枚硬币然后一次性看完所有结果——
# 传统计算则是一枚一枚地抛。
# ============================================================
# 2. 多轮对话(保持上下文)
# ============================================================
messages = [
{"role": "system", "content": "你是一个Python编程助手。"},
]
while True:
user_input = input("\n你: ")
if user_input.lower() in ["exit", "quit", "q"]:
break
messages.append({"role": "user", "content": user_input})
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
temperature=0.5,
)
reply = response.choices[0].message.content
messages.append({"role": "assistant", "content": reply})
print(f"\n助手: {reply}")
# ============================================================
# 3. 流式输出(打字机效果)
# ============================================================
stream = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "写一首关于编程的五言绝句"}],
stream=True,
)
print("助手: ", end="", flush=True)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="", flush=True)
print()
# ============================================================
# 4. Vision —— 图像理解
# ============================================================
response = client.chat.completions.create(
model="gpt-4o", # 支持视觉的模型
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "这张图片里有什么?请详细描述。"},
{
"type": "image_url",
"image_url": {
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
"detail": "auto", # low / high / auto
},
},
],
}
],
max_tokens=500,
)
print(response.choices[0].message.content)
# ============================================================
# 5. JSON 模式(结构化输出)
# ============================================================
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "你以 JSON 格式回答。只输出 JSON,不要有其他文字。"},
{"role": "user", "content": "列出3种编程语言及其主要用途"},
],
response_format={"type": "json_object"},
)
import json
result = json.loads(response.choices[0].message.content)
print(json.dumps(result, ensure_ascii=False, indent=2))
# {
# "languages": [
# {"name": "Python", "use": "数据科学..."},
# {"name": "JavaScript", "use": "Web开发..."},
# {"name": "Rust", "use": "系统编程..."}
# ]
# }
# ============================================================
# 6. Token 计数与成本预估
# ============================================================
import tiktoken
enc = tiktoken.encoding_for_model("gpt-4o-mini")
text = "Hello, this is a test message to count tokens."
tokens = len(enc.encode(text))
print(f"\n'{text}' → {tokens} tokens")
# 成本参考(2025 年)
pricing = {
"gpt-4o": {"input": 2.50, "output": 10.00}, # 每百万 token 美元
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4": {"input": 30.00, "output": 60.00},
}
model = "gpt-4o-mini"
input_tokens = 500
output_tokens = 200
cost = (input_tokens/1e6 * pricing[model]["input"] +
output_tokens/1e6 * pricing[model]["output"])
print(f"预估成本: ${cost:.6f}")
模型速查
| 模型 |
上下文 |
特性 |
gpt-4o |
128K |
旗舰多模态,最快 |
gpt-4o-mini |
128K |
性价比最高,适合大多数场景 |
o1-preview |
128K |
深度推理(数学、编程) |
gpt-4 |
8K/32K |
精确但较慢 |
dall-e-3 |
- |
图像生成 |
whisper-1 |
- |
语音转文字 |
tts-1-hd |
- |
文字转语音 |
text-embedding-3-small |
8K |
向量嵌入 |
OpenAI Function Calling + RAG 构建 AI Agent
目标
- 掌握 Function Calling(工具调用)机制
- 构建基于 Embeddings 的 RAG 知识库问答
- 实现完整的 AI Agent 循环
完整代码
1. Function Calling —— 让 GPT 调用你的函数
import json
from openai import OpenAI
client = OpenAI()
# ============================================================
# 定义工具(函数)
# ============================================================
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "获取指定城市的当前天气信息",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "城市名称,如 Beijing, Shanghai"
}
},
"required": ["city"],
},
},
},
{
"type": "function",
"function": {
"name": "search_database",
"description": "在内部数据库中搜索订单信息",
"parameters": {
"type": "object",
"properties": {
"order_id": {"type": "string"},
"customer_name": {"type": "string"},
},
},
},
},
]
# 模拟函数实现
def get_weather(city: str) -> dict:
"""模拟天气查询"""
return {
"city": city,
"temperature": 22,
"condition": "晴朗",
"humidity": "45%",
}
def search_database(order_id: str = None, customer_name: str = None) -> dict:
"""模拟数据库搜索"""
return {
"orders": [
{"id": "ORD-001", "product": "MacBook Pro", "status": "已发货"},
]
}
# 函数映射表
available_functions = {
"get_weather": get_weather,
"search_database": search_database,
}
def run_agent(user_query: str):
"""AI Agent 主循环"""
messages = [{"role": "user", "content": user_query}]
# 第一次调用:GPT 决定是否要调用工具
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
tools=tools,
tool_choice="auto",
)
response_message = response.choices[0].message
tool_calls = response_message.tool_calls
# 如果 GPT 需要调用工具
if tool_calls:
messages.append(response_message)
for tool_call in tool_calls:
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
print(f"🔧 调用函数: {function_name}({function_args})")
# 执行函数
function_to_call = available_functions[function_name]
function_response = function_to_call(**function_args)
# 将函数结果添加到消息中
messages.append({
"tool_call_id": tool_call.id,
"role": "tool",
"name": function_name,
"content": json.dumps(function_response, ensure_ascii=False),
})
# 第二次调用:GPT 根据函数结果生成最终回复
final_response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
)
return final_response.choices[0].message.content
return response_message.content
# 测试
print(run_agent("北京今天天气怎么样?"))
print(run_agent("帮我查一下订单 ORD-001 的状态"))
print(run_agent("你好,你是谁?")) # 不需要工具调用
2. RAG(检索增强生成)
import numpy as np
# ============================================================
# 构建知识库
# ============================================================
documents = [
"Vibe 平台是一个智能元器件管理系统,支持元器件搜索、文档生成和代码例程。",
"用户可以使用自然语言搜索技术栈,如 React、Spring Boot、PyTorch等。",
"平台支持自动生成 Markdown 格式的技术文档和代码示例。",
"价格单位为分(cents),1元 = 100分。",
"所有回复必须使用中文,元器件处理需要按工作流顺序执行。",
]
# 获取 Embeddings
def get_embeddings(texts: list[str]) -> list[list[float]]:
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts,
)
return [d.embedding for d in response.data]
doc_embeddings = get_embeddings(documents)
print(f"知识库大小: {len(documents)} 条文档, 向量维度: {len(doc_embeddings[0])}")
# ============================================================
# 语义搜索
# ============================================================
def semantic_search(query: str, top_k: int = 3) -> list[str]:
"""根据语义相似度检索最相关的文档"""
query_embedding = get_embeddings([query])[0]
# 计算余弦相似度
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
similarities = [
cosine_similarity(query_embedding, doc_emb)
for doc_emb in doc_embeddings
]
# 取 top-k
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [documents[i] for i in top_indices]
# ============================================================
# RAG 问答
# ============================================================
def rag_qa(query: str) -> str:
"""基于知识库的问答"""
# 1. 检索相关文档
relevant_docs = semantic_search(query, top_k=3)
context = "\n\n".join(relevant_docs)
print(f"📚 检索到 {len(relevant_docs)} 条相关文档")
# 2. 用检索到的上下文增强 Prompt
system_prompt = f"""你是一个知识库助手。请基于以下上下文回答问题。
如果上下文不足以回答,请说明。
上下文:
{context}"""
# 3. 调用 GPT 生成答案
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": query},
],
temperature=0.3, # 降低随机性
)
return response.choices[0].message.content
# 测试
print("\n" + "="*50)
print(rag_qa("价格单位是什么?"))
print("\n" + rag_qa("如何搜索技术栈?"))
3. 完整的 Agent + RAG 架构
用户提问
│
▼
┌─────────────────────┐
│ Router (GPT-4o) │ → 判断:需要工具?需要检索?
└──────┬──────────────┘
│
┌────┼────┐
▼ ▼ ▼
Tool RAG Direct
│ │ │
└────┼────┘
▼
Final Answer
关键要点
| 概念 |
说明 |
tools 参数 |
向 GPT 注册可调用函数 |
tool_choice: "auto" |
GPT 自行决定是否调用工具 |
role: "tool" |
返回函数执行结果给 GPT |
text-embedding-3-small |
性价比最高的嵌入模型 |
| 余弦相似度 |
衡量向量间语义相似度的标准方法 |
| RAG vs 微调 |
RAG 更新知识只需改文档,微调需重新训练 |