feat(api): use json_repair to fix invalid json while generating structured output (#18977)

When generating JSON schema using an LLM in the structured output feature,
models may occasionally return invalid JSON, which prevents clients from correctly 
parsing the response and can lead to UI breakage.

This commit addresses the issue by introducing `json_repair` to automatically 
fix invalid JSON strings returned by the LLM, ensuring smoother functionality 
and better client-side handling of structured outputs.


Co-authored-by: lizb <lizb@sugon.com>
This commit is contained in:
Ganondorf 2025-04-29 12:39:13 +08:00 committed by GitHub
parent 28a59ba344
commit a147d2a200
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,6 +3,8 @@ import logging
import re
from typing import Optional, cast
import json_repair
from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser
from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
from core.llm_generator.prompts import (
@ -366,7 +368,20 @@ class LLMGenerator:
),
)
generated_json_schema = cast(str, response.message.content)
raw_content = response.message.content
if not isinstance(raw_content, str):
raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}")
try:
parsed_content = json.loads(raw_content)
except json.JSONDecodeError:
parsed_content = json_repair.loads(raw_content)
if not isinstance(parsed_content, dict | list):
raise ValueError(f"Failed to parse structured output from llm: {raw_content}")
generated_json_schema = json.dumps(parsed_content, indent=2, ensure_ascii=False)
return {"output": generated_json_schema, "error": ""}
except InvokeError as e: