feat: support multi-language

2025-10-04 06:36:31 +08:00 · 2025-04-21 19:50:34 +08:00 · 2025-04-21 19:50:34 +08:00 · b67b04ff5d
commit b67b04ff5d
parent 0d2f93c773
6 changed files with 54 additions and 45 deletions
--- a/src/graph/nodes.py
+++ b/src/graph/nodes.py
@ -1,22 +1,23 @@
 # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 # SPDX-License-Identifier: MIT

-import logging
 import json
-from typing import Literal, Annotated
+import logging
+from typing import Annotated, Literal

-from langchain_core.messages import HumanMessage, AIMessage
-from langchain_core.tools import tool
+from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.runnables import RunnableConfig
+from langchain_core.tools import tool
 from langgraph.types import Command, interrupt

-from src.llms.llm import get_llm_by_type
+from src.agents.agents import coder_agent, research_agent
 from src.config.agents import AGENT_LLM_MAP
 from src.config.configuration import Configuration
-from src.prompts.template import apply_prompt_template
+from src.llms.llm import get_llm_by_type
 from src.prompts.planner_model import Plan, StepType
+from src.prompts.template import apply_prompt_template
 from src.utils.json_utils import repair_json_output
-from src.agents.agents import research_agent, coder_agent
+
 from .types import State

 logger = logging.getLogger(__name__)
@ -117,6 +118,7 @@ def human_feedback_node(
        update={
            "current_plan": Plan.model_validate(new_plan),
            "plan_iterations": plan_iterations,
+            "locale": new_plan["locale"],
        },
        goto=goto,
    )
@ -209,7 +211,7 @@ def _execute_agent_step(
    agent_input = {
        "messages": [
            HumanMessage(
-                content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}"
+                content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}\n\n##locale\n\n{state.get('locale', 'en-US')}"
            )
        ]
    }
--- a/src/graph/types.py
+++ b/src/graph/types.py
@ -2,9 +2,10 @@
 # SPDX-License-Identifier: MIT

 import operator
+from typing import Annotated

 from langgraph.graph import MessagesState
-from typing import Annotated
+
 from src.prompts.planner_model import Plan


@ -12,6 +13,7 @@ class State(MessagesState):
    """State for the agent system, extends MessagesState with next field."""

    # Runtime Variables
+    locale: str = "en-US"
    observations: Annotated[list[str], operator.add] = []
    plan_iterations: int = 0
    current_plan: Plan | str = None
--- a/src/prompts/coder.md
+++ b/src/prompts/coder.md
@ -23,12 +23,12 @@ You are a professional software engineer proficient in Python scripting. Your ta
 - Use comments in code to improve readability and maintainability.
 - If you want to see the output of a value, you MUST print it out with `print(...)`.
 - Always and only use Python to do the math.
- Always use the same language as the initial question.
 - Always use `yfinance` for financial market data:
-  - Get historical data with `yf.download()`
-  - Access company info with `Ticker` objects
-  - Use appropriate date ranges for data retrieval
+    - Get historical data with `yf.download()`
+    - Access company info with `Ticker` objects
+    - Use appropriate date ranges for data retrieval
 - Required Python packages are pre-installed:
-  - `pandas` for data manipulation
-  - `numpy` for numerical operations
-  - `yfinance` for financial market data
+    - `pandas` for data manipulation
+    - `numpy` for numerical operations
+    - `yfinance` for financial market data
+- Always output in the locale of **{{ locale }}**.
--- a/src/prompts/planner.md
+++ b/src/prompts/planner.md
@ -14,7 +14,7 @@ As a Deep Researcher, you can breakdown the major subject into sub-topics and ex

 The successful research plan must meet these standards:

-1. **Comprehensive Coverage**: 
+1. **Comprehensive Coverage**:
   - Information must cover ALL aspects of the topic
   - Multiple perspectives must be represented
   - Both mainstream and alternative viewpoints should be included
@ -74,51 +74,51 @@ Different types of steps have different web search requirements:
 ## Exclusions

 - **No Direct Calculations in Research Steps**:
-  - Research steps should only gather data and information
-  - All mathematical calculations must be handled by processing steps
-  - Numerical analysis must be delegated to processing steps
-  - Research steps focus on information gathering only
+    - Research steps should only gather data and information
+    - All mathematical calculations must be handled by processing steps
+    - Numerical analysis must be delegated to processing steps
+    - Research steps focus on information gathering only

 ## Analysis Framework

 When planning information gathering, consider these key aspects and ensure COMPREHENSIVE coverage:

-1. **Historical Context**: 
+1. **Historical Context**:
   - What historical data and trends are needed?
   - What is the complete timeline of relevant events?
   - How has the subject evolved over time?

-2. **Current State**: 
+2. **Current State**:
   - What current data points need to be collected?
   - What is the present landscape/situation in detail?
   - What are the most recent developments?

-3. **Future Indicators**: 
+3. **Future Indicators**:
   - What predictive data or future-oriented information is required?
   - What are all relevant forecasts and projections?
   - What potential future scenarios should be considered?

-4. **Stakeholder Data**: 
+4. **Stakeholder Data**:
   - What information about ALL relevant stakeholders is needed?
   - How are different groups affected or involved?
   - What are the various perspectives and interests?

-5. **Quantitative Data**: 
+5. **Quantitative Data**:
   - What comprehensive numbers, statistics, and metrics should be gathered?
   - What numerical data is needed from multiple sources?
   - What statistical analyses are relevant?

-6. **Qualitative Data**: 
+6. **Qualitative Data**:
   - What non-numerical information needs to be collected?
   - What opinions, testimonials, and case studies are relevant?
   - What descriptive information provides context?

-7. **Comparative Data**: 
+7. **Comparative Data**:
   - What comparison points or benchmark data are required?
   - What similar cases or alternatives should be examined?
   - How does this compare across different contexts?

-8. **Risk Data**: 
+8. **Risk Data**:
   - What information about ALL potential risks should be gathered?
   - What are the challenges, limitations, and obstacles?
   - What contingencies and mitigations exist?
@ -135,16 +135,16 @@ When planning information gathering, consider these key aspects and ensure COMPR
 - To begin with, repeat user's requirement in your own words as `thought`.
 - Rigorously assess if there is sufficient context to answer the question using the strict criteria above.
 - If context is sufficient:
-  - Set `has_enough_context` to true
-  - No need to create information gathering steps
+    - Set `has_enough_context` to true
+    - No need to create information gathering steps
 - If context is insufficient (default assumption):
-  - Break down the required information using the Analysis Framework
-  - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
-  - Ensure each step is substantial and covers related information categories
-  - Prioritize breadth and depth within the {{ max_step_num }}-step constraint
-  - For each step, carefully assess if web search is needed:
-    - Research and external data gathering: Set `need_web_search: true`
-    - Internal data processing: Set `need_web_search: false`
+    - Break down the required information using the Analysis Framework
+    - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
+    - Ensure each step is substantial and covers related information categories
+    - Prioritize breadth and depth within the {{ max_step_num }}-step constraint
+    - For each step, carefully assess if web search is needed:
+        - Research and external data gathering: Set `need_web_search: true`
+        - Internal data processing: Set `need_web_search: false`
 - Specify the exact data to be collected in step's `description`. Include a `note` if necessary.
 - Prioritize depth and volume of relevant information - limited information is not acceptable.
 - Use the same language as the user to generate the plan.
@ -163,6 +163,7 @@ interface Step {
 }

 interface Plan {
+  locale: string; // e.g. "en-US" or "zh-CN", based on the user's language or specific request
  has_enough_context: boolean;
  thought: string;
  title: string;
@ -179,7 +180,7 @@ interface Plan {
 - Never settle for minimal information - the goal is a comprehensive, detailed final report
 - Limited or insufficient information will lead to an inadequate final report
 - Carefully assess each step's web search requirement based on its nature:
-  - Research steps (`need_web_search: true`) for gathering information
-  - Processing steps (`need_web_search: false`) for calculations and data processing
+    - Research steps (`need_web_search: true`) for gathering information
+    - Processing steps (`need_web_search: false`) for calculations and data processing
 - Default to gathering more information unless the strictest sufficient context criteria are met
- Always Use the same language as the user
+- Always use the same language as the user
--- a/src/prompts/planner_model.py
+++ b/src/prompts/planner_model.py
@ -1,9 +1,10 @@
 # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 # SPDX-License-Identifier: MIT

-from pydantic import BaseModel, Field
-from typing import List, Optional
 from enum import Enum
+from typing import List, Optional
+
+from pydantic import BaseModel, Field


 class StepType(str, Enum):
@ -24,6 +25,9 @@ class Step(BaseModel):


 class Plan(BaseModel):
+    locale: str = Field(
+        ..., description="e.g. 'en-US' or 'zh-CN', based on the user's language"
+    )
    has_enough_context: bool
    thought: str
    title: str
--- a/src/prompts/researcher.md
+++ b/src/prompts/researcher.md
@ -33,7 +33,7 @@ You are dedicated to conducting thorough investigations and providing comprehens

      - [Source Title](https://example.com/page2)
      ```
- Always use the same language as the initial question.
+- Always output in the locale of **{{ locale }}**.
 - DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format.

 # Notes
@ -49,4 +49,4 @@ You are dedicated to conducting thorough investigations and providing comprehens
 - When presenting information from multiple sources, clearly indicate which source each piece of information comes from.
 - Include images using `![Image Description](image_url)` in a separate section.
 - The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content.
- Always use the same language as the initial question.
+- Always use the locale of **{{ locale }}** for the output.