feat: support multi-language

This commit is contained in:
Li Xin 2025-04-21 19:50:34 +08:00
parent 0d2f93c773
commit b67b04ff5d
6 changed files with 54 additions and 45 deletions

View File

@ -1,22 +1,23 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import logging
import json import json
from typing import Literal, Annotated import logging
from typing import Annotated, Literal
from langchain_core.messages import HumanMessage, AIMessage from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.tools import tool
from langchain_core.runnables import RunnableConfig from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from langgraph.types import Command, interrupt from langgraph.types import Command, interrupt
from src.llms.llm import get_llm_by_type from src.agents.agents import coder_agent, research_agent
from src.config.agents import AGENT_LLM_MAP from src.config.agents import AGENT_LLM_MAP
from src.config.configuration import Configuration from src.config.configuration import Configuration
from src.prompts.template import apply_prompt_template from src.llms.llm import get_llm_by_type
from src.prompts.planner_model import Plan, StepType from src.prompts.planner_model import Plan, StepType
from src.prompts.template import apply_prompt_template
from src.utils.json_utils import repair_json_output from src.utils.json_utils import repair_json_output
from src.agents.agents import research_agent, coder_agent
from .types import State from .types import State
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -117,6 +118,7 @@ def human_feedback_node(
update={ update={
"current_plan": Plan.model_validate(new_plan), "current_plan": Plan.model_validate(new_plan),
"plan_iterations": plan_iterations, "plan_iterations": plan_iterations,
"locale": new_plan["locale"],
}, },
goto=goto, goto=goto,
) )
@ -209,7 +211,7 @@ def _execute_agent_step(
agent_input = { agent_input = {
"messages": [ "messages": [
HumanMessage( HumanMessage(
content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}" content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}\n\n##locale\n\n{state.get('locale', 'en-US')}"
) )
] ]
} }

View File

@ -2,9 +2,10 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import operator import operator
from typing import Annotated
from langgraph.graph import MessagesState from langgraph.graph import MessagesState
from typing import Annotated
from src.prompts.planner_model import Plan from src.prompts.planner_model import Plan
@ -12,6 +13,7 @@ class State(MessagesState):
"""State for the agent system, extends MessagesState with next field.""" """State for the agent system, extends MessagesState with next field."""
# Runtime Variables # Runtime Variables
locale: str = "en-US"
observations: Annotated[list[str], operator.add] = [] observations: Annotated[list[str], operator.add] = []
plan_iterations: int = 0 plan_iterations: int = 0
current_plan: Plan | str = None current_plan: Plan | str = None

View File

@ -23,12 +23,12 @@ You are a professional software engineer proficient in Python scripting. Your ta
- Use comments in code to improve readability and maintainability. - Use comments in code to improve readability and maintainability.
- If you want to see the output of a value, you MUST print it out with `print(...)`. - If you want to see the output of a value, you MUST print it out with `print(...)`.
- Always and only use Python to do the math. - Always and only use Python to do the math.
- Always use the same language as the initial question.
- Always use `yfinance` for financial market data: - Always use `yfinance` for financial market data:
- Get historical data with `yf.download()` - Get historical data with `yf.download()`
- Access company info with `Ticker` objects - Access company info with `Ticker` objects
- Use appropriate date ranges for data retrieval - Use appropriate date ranges for data retrieval
- Required Python packages are pre-installed: - Required Python packages are pre-installed:
- `pandas` for data manipulation - `pandas` for data manipulation
- `numpy` for numerical operations - `numpy` for numerical operations
- `yfinance` for financial market data - `yfinance` for financial market data
- Always output in the locale of **{{ locale }}**.

View File

@ -74,10 +74,10 @@ Different types of steps have different web search requirements:
## Exclusions ## Exclusions
- **No Direct Calculations in Research Steps**: - **No Direct Calculations in Research Steps**:
- Research steps should only gather data and information - Research steps should only gather data and information
- All mathematical calculations must be handled by processing steps - All mathematical calculations must be handled by processing steps
- Numerical analysis must be delegated to processing steps - Numerical analysis must be delegated to processing steps
- Research steps focus on information gathering only - Research steps focus on information gathering only
## Analysis Framework ## Analysis Framework
@ -135,16 +135,16 @@ When planning information gathering, consider these key aspects and ensure COMPR
- To begin with, repeat user's requirement in your own words as `thought`. - To begin with, repeat user's requirement in your own words as `thought`.
- Rigorously assess if there is sufficient context to answer the question using the strict criteria above. - Rigorously assess if there is sufficient context to answer the question using the strict criteria above.
- If context is sufficient: - If context is sufficient:
- Set `has_enough_context` to true - Set `has_enough_context` to true
- No need to create information gathering steps - No need to create information gathering steps
- If context is insufficient (default assumption): - If context is insufficient (default assumption):
- Break down the required information using the Analysis Framework - Break down the required information using the Analysis Framework
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
- Ensure each step is substantial and covers related information categories - Ensure each step is substantial and covers related information categories
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint - Prioritize breadth and depth within the {{ max_step_num }}-step constraint
- For each step, carefully assess if web search is needed: - For each step, carefully assess if web search is needed:
- Research and external data gathering: Set `need_web_search: true` - Research and external data gathering: Set `need_web_search: true`
- Internal data processing: Set `need_web_search: false` - Internal data processing: Set `need_web_search: false`
- Specify the exact data to be collected in step's `description`. Include a `note` if necessary. - Specify the exact data to be collected in step's `description`. Include a `note` if necessary.
- Prioritize depth and volume of relevant information - limited information is not acceptable. - Prioritize depth and volume of relevant information - limited information is not acceptable.
- Use the same language as the user to generate the plan. - Use the same language as the user to generate the plan.
@ -163,6 +163,7 @@ interface Step {
} }
interface Plan { interface Plan {
locale: string; // e.g. "en-US" or "zh-CN", based on the user's language or specific request
has_enough_context: boolean; has_enough_context: boolean;
thought: string; thought: string;
title: string; title: string;
@ -179,7 +180,7 @@ interface Plan {
- Never settle for minimal information - the goal is a comprehensive, detailed final report - Never settle for minimal information - the goal is a comprehensive, detailed final report
- Limited or insufficient information will lead to an inadequate final report - Limited or insufficient information will lead to an inadequate final report
- Carefully assess each step's web search requirement based on its nature: - Carefully assess each step's web search requirement based on its nature:
- Research steps (`need_web_search: true`) for gathering information - Research steps (`need_web_search: true`) for gathering information
- Processing steps (`need_web_search: false`) for calculations and data processing - Processing steps (`need_web_search: false`) for calculations and data processing
- Default to gathering more information unless the strictest sufficient context criteria are met - Default to gathering more information unless the strictest sufficient context criteria are met
- Always Use the same language as the user - Always use the same language as the user

View File

@ -1,9 +1,10 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
class StepType(str, Enum): class StepType(str, Enum):
@ -24,6 +25,9 @@ class Step(BaseModel):
class Plan(BaseModel): class Plan(BaseModel):
locale: str = Field(
..., description="e.g. 'en-US' or 'zh-CN', based on the user's language"
)
has_enough_context: bool has_enough_context: bool
thought: str thought: str
title: str title: str

View File

@ -33,7 +33,7 @@ You are dedicated to conducting thorough investigations and providing comprehens
- [Source Title](https://example.com/page2) - [Source Title](https://example.com/page2)
``` ```
- Always use the same language as the initial question. - Always output in the locale of **{{ locale }}**.
- DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format. - DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format.
# Notes # Notes
@ -49,4 +49,4 @@ You are dedicated to conducting thorough investigations and providing comprehens
- When presenting information from multiple sources, clearly indicate which source each piece of information comes from. - When presenting information from multiple sources, clearly indicate which source each piece of information comes from.
- Include images using `![Image Description](image_url)` in a separate section. - Include images using `![Image Description](image_url)` in a separate section.
- The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content. - The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content.
- Always use the same language as the initial question. - Always use the locale of **{{ locale }}** for the output.