微信扫码
添加专属顾问
我要投稿
掌握OpenAI API JSON格式,轻松处理复杂数据。核心内容:1. OpenAI API返回JSON格式的核心参数配置2. 基本JSON格式响应及复杂结构化数据请求示例3. JSON错误处理与修复技巧
核心参数是response_format={"type": "json_object"}
,其他支持json调用的模型也可以这样使用的,下面我们以Openai模型为例
import openai
client = openai.OpenAI(api_key="your-api-key")
response = client.chat.completions.create(
model="gpt-4-turbo",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "你是一个返回JSON格式的助手。"},
{"role": "user", "content": "返回包含用户名、年龄和爱好的JSON"}
]
)
print(response.choices[0].message.content)
# 输出示例:
# {
# "name": "John Doe",
# "age": 30,
# "hobbies": ["reading", "hiking", "photography"]
# }
response = client.chat.completions.create(
model="gpt-4-turbo",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "你是一个返回JSON格式的助手。"},
{"role": "user", "content": "生成5个用户的数据,包括姓名、电子邮件和订阅状态"}
]
)
print(response.choices[0].message.content)
# 输出示例:
# {
# "users": [
# {"id": 1, "name": "Alice Smith", "email": "alice@example.com", "subscribed": true},
# {"id": 2, "name": "Bob Johnson", "email": "bob@example.com", "subscribed": false},
# {"id": 3, "name": "Carol Williams", "email": "carol@example.com", "subscribed": true},
# {"id": 4, "name": "David Brown", "email": "david@example.com", "subscribed": true},
# {"id": 5, "name": "Eve Davis", "email": "eve@example.com", "subscribed": false}
# ]
# }
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=[
{"role": "system", "content": "你是一个帮助用户的助手。"},
{"role": "user", "content": "分析以下文本的情感:'我今天非常开心,但天气不太好'"}
],
tools=[{
"type": "function",
"function": {
"name": "analyze_sentiment",
"description": "分析文本的情感",
"parameters": {
"type": "object",
"properties": {
"text": {"type": "string", "description": "要分析的文本"},
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral", "mixed"]},
"confidence": {"type": "number", "description": "情感分析的置信度"},
"details": {
"type": "object",
"properties": {
"positive_aspects": {"type": "array", "items": {"type": "string"}},
"negative_aspects": {"type": "array", "items": {"type": "string"}}
}
}
},
"required": ["sentiment", "confidence"]
}
}
}],
tool_choice={"type": "function", "function": {"name": "analyze_sentiment"}}
)
print(response.choices[0].message.tool_calls[0].function.arguments)
# 输出示例:
# {
# "text": "我今天非常开心,但天气不太好",
# "sentiment": "mixed",
# "confidence": 0.85,
# "details": {
# "positive_aspects": ["今天非常开心"],
# "negative_aspects": ["天气不太好"]
# }
# }
response = client.chat.completions.create(
model="gpt-4-turbo",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "你是一个返回JSON格式的助手。"},
{"role": "user", "content": "返回一个包含中文句子及其英文翻译的JSON数组"}
]
)
print(response.choices[0].message.content)
# 输出示例:
# {
# "translations": [
# {"chinese": "你好世界", "english": "Hello world"},
# {"chinese": "很高兴认识你", "english": "Nice to meet you"},
# {"chinese": "我爱学习编程", "english": "I love learning programming"}
# ]
# }
response = client.chat.completions.create(
model="gpt-4-turbo",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": "你是一个返回JSON格式的助手。"},
{"role": "user", "content": "返回一个公司结构的JSON,包含部门和员工"}
]
)
print(response.choices[0].message.content)
# 输出示例:
# {
# "company": {
# "name": "Tech Solutions Inc.",
# "founded": 2010,
# "departments": [
# {
# "name": "Engineering",
# "head": "Zhang Wei",
# "employees": [
# {"id": 101, "name": "李明", "position": "Senior Developer"},
# {"id": 102, "name": "王芳", "position": "QA Engineer"}
# ]
# },
# {
# "name": "Marketing",
# "head": "Sarah Johnson",
# "employees": [
# {"id": 201, "name": "刘青", "position": "Marketing Specialist"},
# {"id": 202, "name": "陈晓", "position": "Content Writer"}
# ]
# }
# ]
# }
# }
def get_structured_data(query, schema):
system_prompt = f"""
你必须严格按照以下JSON模式返回数据:
```
{json.dumps(schema, ensure_ascii=False, indent=2)}
```
不要添加任何额外的字段,也不要省略任何必需的字段。
不要在返回的JSON外包含任何其他文本、解释或注释。
"""
response = client.chat.completions.create(
model="gpt-4-turbo",
response_format={"type": "json_object"},
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": query}
]
)
return response.choices[0].message.content
# 定义一个特定的数据模式
product_schema = {
"type": "object",
"properties": {
"products": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {"type": "string"},
"name": {"type": "string"},
"price": {"type": "number"},
"category": {"type": "string"},
"inStock": {"type": "boolean"}
},
"required": ["id", "name", "price", "category", "inStock"]
}
}
},
"required": ["products"]
}
result = get_structured_data("生成3个电子产品的详细信息", product_schema)
print(result)
# 输出示例:
# {
# "products": [
# {
# "id": "EP001",
# "name": "超薄笔记本电脑",
# "price": 5999.99,
# "category": "电脑",
# "inStock": true
# },
# {
# "id": "EP002",
# "name": "智能手机",
# "price": 3999.99,
# "category": "手机",
# "inStock": true
# },
# {
# "id": "EP003",
# "name": "无线耳机",
# "price": 999.99,
# "category": "音频设备",
# "inStock": false
# }
# ]
# }
json_repair
修复JSON错误示例
当OpenAI API返回的JSON格式有问题时,可以使用json_repair库修复这些错误。可以看到大部分简单的错误示例是可以直接修复的,有些语义难度大的确实比较难修复。以下是常见的JSON错误及其修复示例:
from json_repair import repair_json, loads
import json
bad_json1 = "{'name': 'John', 'age': 30, 'city': 'New York'}"
fixed_json1 = repair_json(bad_json1)
print("修复单引号:")
print(f"修复前: {bad_json1}")
print(f"修复后: {fixed_json1}")
print()
bad_json2 = "{name: 'John', age: 30, city: 'New York'}"
fixed_json2 = repair_json(bad_json2)
print("修复缺少引号的键:")
print(f"修复前: {bad_json2}")
print(f"修复后: {fixed_json2}")
print()
bad_json3 = '{"name": "John", "age": 30, "city": "New York",}' # 结尾多余的逗号
fixed_json3 = repair_json(bad_json3)
print("修复多余的逗号:")
print(f"修复前: {bad_json3}")
print(f"修复后: {fixed_json3}")
print()
bad_json4 = '"name": "John", "age": 30, "city": "New York"' fixed_json4 = repair_json(bad_json4) print("修复缺少括号:") print(f"修复前: {bad_json4}") print(f"修复后: {fixed_json4}") print()
这个直接失败了,没有还原大括号
bad_json5 = '{"name": "John", "active": True, "data": None}'
fixed_json5 = repair_json(bad_json5)
print("修复非标准的布尔值或空值:")
print(f"修复前: {bad_json5}")
print(f"修复后: {fixed_json5}")
print()
bad_json6 = '{"user": {"name": "John", "contacts": {"email": "john@example.com", phone: "123-456-7890"}}}'
fixed_json6 = repair_json(bad_json6)
print("修复嵌套结构中的错误:")
print(f"修复前: {bad_json6}")
print(f"修复后: {fixed_json6}")
print()
bad_json7 = '{"items": [1, 2, 3,, 4, 5]}' # 数组中有多余的逗号 fixed_json7 = repair_json(bad_json7) print("修复数组中的错误:") print(f"修复前: {bad_json7}") print(f"修复后: {fixed_json7}") print()
bad_json8 = '{"name": "John", "items": [1, 2, 3}' # 方括号没有闭合
fixed_json8 = repair_json(bad_json8)
print("修复不匹配的括号:")
print(f"修复前: {bad_json8}")
print(f"修复后: {fixed_json8}")
print()
- 示例9: 修复中文等非ASCII字符的问题
```python
bad_json9 = "{'name': '张三', 'city': '北京'}"
fixed_json9 = repair_json(bad_json9, ensure_ascii=False)
print("修复包含中文的JSON并保留中文字符:")
print(f"修复前: {bad_json9}")
print(f"修复后: {fixed_json9}")
print()
bad_json10 = "{'name': 'John', 'age': 30, 'skills': ['Python', 'JavaScript']}"
fixed_obj10 = loads(bad_json10) # 等同于 repair_json(bad_json10, return_objects=True)
print("直接获取Python对象:")
print(f"修复前: {bad_json10}")
print(f"修复后(Python对象): {fixed_obj10}")
print(f"对象类型: {type(fixed_obj10)}")
print()
severely_broken_json = "{这不是有效的JSON,name: 'John', age: missing_value}"
try:
fixed_severely_broken = repair_json(severely_broken_json)
print("修复严重破损的JSON:")
print(f"修复前: {severely_broken_json}")
print(f"修复后: {fixed_severely_broken}")
except Exception as e:
print(f"修复失败: {e}")
print()
这个其实修复失败了,主要是因为前一个字段确实有句话影响比较大,修复难度比较大。
json_with_comments = """
{
"name": "John", // 这是用户名
"age": 30, /* 这是年龄 */
"city": "New York"
}
"""
fixed_json_comments = repair_json(json_with_comments)
print("修复包含注释的JSON:")
print(f"修复前: {json_with_comments}")
print(f"修复后: {fixed_json_comments}")
还有一个场景,就是我们会经常遇到开头为```json
比如下面:
markdown_json = """```json
{
"name": "张三",
"age": 30,
"skills": ['Python', 'JavaScript', 'React'],
"contact": {
email: "zhangsan@example.com",
phone: "123-456-7890"
}
}
```"""
或者
broken_json = """{
"products": [
{"id": 1, "name": "笔记本电脑", "price": 5999.99},
{"id": 2, "name": "智能手机", "price": 3999.99,},
{"id": 3, name: "无线耳机", "price": 999.99}
],
"total_items": 3,
"in_stock": True
}"""
我们可以用下面一个函数来去除前缀和后缀,然后再去修复
def repair_json_output(content: str) -> str:
"""
Repair and normalize JSON output.
Args:
content (str): String content that may contain JSON
Returns:
str: Repaired JSON string, or original content if not JSON
"""
content = content.strip()
if content.startswith(("{", "[")) or "```json"in content or "```ts"in content:
try:
# If content is wrapped in ```json code block, extract the JSON part
if content.startswith("```json"):
content = content.removeprefix("```json")
if content.startswith("```ts"):
content = content.removeprefix("```ts")
if content.endswith("```"):
content = content.removesuffix("```")
# Try to repair and parse JSON
repaired_content = json_repair.loads(content)
return json.dumps(repaired_content, ensure_ascii=False)
except Exception as e:
logger.warning(f"JSON repair failed: {e}")
return content
如果你觉得这篇文章对你有帮助,别忘了点个赞、送个喜欢
>/ 作者:致Great
>/ 作者:欢迎转载,标注来源即可53AI,企业落地大模型首选服务商
产品:场景落地咨询+大模型应用平台+行业解决方案
承诺:免费场景POC验证,效果验证后签署服务协议。零风险落地应用大模型,已交付160+中大型企业
2025-02-04
2025-02-04
2024-09-18
2024-07-11
2024-07-09
2024-07-11
2024-07-26
2025-02-05
2025-01-27
2025-02-01