mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 03:39:02 +08:00
fix(structured-output): reasoning model's json format parsing (#19261)
This commit is contained in:
parent
b78846078c
commit
0cfc82d731
@ -149,7 +149,7 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
self._llm_file_saver = llm_file_saver
|
||||
|
||||
def _run(self) -> Generator[NodeEvent | InNodeEvent, None, None]:
|
||||
def process_structured_output(text: str) -> Optional[dict[str, Any] | list[Any]]:
|
||||
def process_structured_output(text: str) -> Optional[dict[str, Any]]:
|
||||
"""Process structured output if enabled"""
|
||||
if not self.node_data.structured_output_enabled or not self.node_data.structured_output:
|
||||
return None
|
||||
@ -797,18 +797,22 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
stop = model_config.stop
|
||||
return filtered_prompt_messages, stop
|
||||
|
||||
def _parse_structured_output(self, result_text: str) -> dict[str, Any] | list[Any]:
|
||||
structured_output: dict[str, Any] | list[Any] = {}
|
||||
def _parse_structured_output(self, result_text: str) -> dict[str, Any]:
|
||||
structured_output: dict[str, Any] = {}
|
||||
try:
|
||||
parsed = json.loads(result_text)
|
||||
if not isinstance(parsed, (dict | list)):
|
||||
if not isinstance(parsed, dict):
|
||||
raise LLMNodeError(f"Failed to parse structured output: {result_text}")
|
||||
structured_output = parsed
|
||||
except json.JSONDecodeError as e:
|
||||
# if the result_text is not a valid json, try to repair it
|
||||
parsed = json_repair.loads(result_text)
|
||||
if not isinstance(parsed, (dict | list)):
|
||||
raise LLMNodeError(f"Failed to parse structured output: {result_text}")
|
||||
if not isinstance(parsed, dict):
|
||||
# handle reasoning model like deepseek-r1 got '<think>\n\n</think>\n' prefix
|
||||
if isinstance(parsed, list):
|
||||
parsed = next((item for item in parsed if isinstance(item, dict)), {})
|
||||
else:
|
||||
raise LLMNodeError(f"Failed to parse structured output: {result_text}")
|
||||
structured_output = parsed
|
||||
return structured_output
|
||||
|
||||
|
@ -185,3 +185,38 @@ def test_execute_llm_with_jinja2(setup_code_executor_mock, setup_model_mock):
|
||||
assert item.run_result.process_data is not None
|
||||
assert "sunny" in json.dumps(item.run_result.process_data)
|
||||
assert "what's the weather today?" in json.dumps(item.run_result.process_data)
|
||||
|
||||
|
||||
def test_extract_json():
|
||||
node = init_llm_node(
|
||||
config={
|
||||
"id": "llm",
|
||||
"data": {
|
||||
"title": "123",
|
||||
"type": "llm",
|
||||
"model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
|
||||
"prompt_config": {
|
||||
"structured_output": {
|
||||
"enabled": True,
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {"name": {"type": "string"}, "age": {"type": "number"}},
|
||||
},
|
||||
}
|
||||
},
|
||||
"prompt_template": [{"role": "user", "text": "{{#sys.query#}}"}],
|
||||
"memory": None,
|
||||
"context": {"enabled": False},
|
||||
"vision": {"enabled": False},
|
||||
},
|
||||
},
|
||||
)
|
||||
llm_texts = [
|
||||
'<think>\n\n</think>{"name": "test", "age": 123', # resoning model (deepseek-r1)
|
||||
'{"name":"test","age":123}', # json schema model (gpt-4o)
|
||||
'{\n "name": "test",\n "age": 123\n}', # small model (llama-3.2-1b)
|
||||
'```json\n{"name": "test", "age": 123}\n```', # json markdown (deepseek-chat)
|
||||
'{"name":"test",age:123}', # without quotes (qwen-2.5-0.5b)
|
||||
]
|
||||
result = {"name": "test", "age": 123}
|
||||
assert all(node._parse_structured_output(item) == result for item in llm_texts)
|
||||
|
Loading…
x
Reference in New Issue
Block a user