feat: support multi-language

This commit is contained in:
Li Xin 2025-04-21 19:50:34 +08:00
parent 0d2f93c773
commit b67b04ff5d
6 changed files with 54 additions and 45 deletions

View File

@ -1,22 +1,23 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
import logging
import json
from typing import Literal, Annotated
import logging
from typing import Annotated, Literal
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.tools import tool
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from langgraph.types import Command, interrupt
from src.llms.llm import get_llm_by_type
from src.agents.agents import coder_agent, research_agent
from src.config.agents import AGENT_LLM_MAP
from src.config.configuration import Configuration
from src.prompts.template import apply_prompt_template
from src.llms.llm import get_llm_by_type
from src.prompts.planner_model import Plan, StepType
from src.prompts.template import apply_prompt_template
from src.utils.json_utils import repair_json_output
from src.agents.agents import research_agent, coder_agent
from .types import State
logger = logging.getLogger(__name__)
@ -117,6 +118,7 @@ def human_feedback_node(
update={
"current_plan": Plan.model_validate(new_plan),
"plan_iterations": plan_iterations,
"locale": new_plan["locale"],
},
goto=goto,
)
@ -209,7 +211,7 @@ def _execute_agent_step(
agent_input = {
"messages": [
HumanMessage(
content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}"
content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}\n\n##locale\n\n{state.get('locale', 'en-US')}"
)
]
}

View File

@ -2,9 +2,10 @@
# SPDX-License-Identifier: MIT
import operator
from typing import Annotated
from langgraph.graph import MessagesState
from typing import Annotated
from src.prompts.planner_model import Plan
@ -12,6 +13,7 @@ class State(MessagesState):
"""State for the agent system, extends MessagesState with next field."""
# Runtime Variables
locale: str = "en-US"
observations: Annotated[list[str], operator.add] = []
plan_iterations: int = 0
current_plan: Plan | str = None

View File

@ -23,12 +23,12 @@ You are a professional software engineer proficient in Python scripting. Your ta
- Use comments in code to improve readability and maintainability.
- If you want to see the output of a value, you MUST print it out with `print(...)`.
- Always and only use Python to do the math.
- Always use the same language as the initial question.
- Always use `yfinance` for financial market data:
- Get historical data with `yf.download()`
- Access company info with `Ticker` objects
- Use appropriate date ranges for data retrieval
- Get historical data with `yf.download()`
- Access company info with `Ticker` objects
- Use appropriate date ranges for data retrieval
- Required Python packages are pre-installed:
- `pandas` for data manipulation
- `numpy` for numerical operations
- `yfinance` for financial market data
- `pandas` for data manipulation
- `numpy` for numerical operations
- `yfinance` for financial market data
- Always output in the locale of **{{ locale }}**.

View File

@ -14,7 +14,7 @@ As a Deep Researcher, you can breakdown the major subject into sub-topics and ex
The successful research plan must meet these standards:
1. **Comprehensive Coverage**:
1. **Comprehensive Coverage**:
- Information must cover ALL aspects of the topic
- Multiple perspectives must be represented
- Both mainstream and alternative viewpoints should be included
@ -74,51 +74,51 @@ Different types of steps have different web search requirements:
## Exclusions
- **No Direct Calculations in Research Steps**:
- Research steps should only gather data and information
- All mathematical calculations must be handled by processing steps
- Numerical analysis must be delegated to processing steps
- Research steps focus on information gathering only
- Research steps should only gather data and information
- All mathematical calculations must be handled by processing steps
- Numerical analysis must be delegated to processing steps
- Research steps focus on information gathering only
## Analysis Framework
When planning information gathering, consider these key aspects and ensure COMPREHENSIVE coverage:
1. **Historical Context**:
1. **Historical Context**:
- What historical data and trends are needed?
- What is the complete timeline of relevant events?
- How has the subject evolved over time?
2. **Current State**:
2. **Current State**:
- What current data points need to be collected?
- What is the present landscape/situation in detail?
- What are the most recent developments?
3. **Future Indicators**:
3. **Future Indicators**:
- What predictive data or future-oriented information is required?
- What are all relevant forecasts and projections?
- What potential future scenarios should be considered?
4. **Stakeholder Data**:
4. **Stakeholder Data**:
- What information about ALL relevant stakeholders is needed?
- How are different groups affected or involved?
- What are the various perspectives and interests?
5. **Quantitative Data**:
5. **Quantitative Data**:
- What comprehensive numbers, statistics, and metrics should be gathered?
- What numerical data is needed from multiple sources?
- What statistical analyses are relevant?
6. **Qualitative Data**:
6. **Qualitative Data**:
- What non-numerical information needs to be collected?
- What opinions, testimonials, and case studies are relevant?
- What descriptive information provides context?
7. **Comparative Data**:
7. **Comparative Data**:
- What comparison points or benchmark data are required?
- What similar cases or alternatives should be examined?
- How does this compare across different contexts?
8. **Risk Data**:
8. **Risk Data**:
- What information about ALL potential risks should be gathered?
- What are the challenges, limitations, and obstacles?
- What contingencies and mitigations exist?
@ -135,16 +135,16 @@ When planning information gathering, consider these key aspects and ensure COMPR
- To begin with, repeat user's requirement in your own words as `thought`.
- Rigorously assess if there is sufficient context to answer the question using the strict criteria above.
- If context is sufficient:
- Set `has_enough_context` to true
- No need to create information gathering steps
- Set `has_enough_context` to true
- No need to create information gathering steps
- If context is insufficient (default assumption):
- Break down the required information using the Analysis Framework
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
- Ensure each step is substantial and covers related information categories
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint
- For each step, carefully assess if web search is needed:
- Research and external data gathering: Set `need_web_search: true`
- Internal data processing: Set `need_web_search: false`
- Break down the required information using the Analysis Framework
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
- Ensure each step is substantial and covers related information categories
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint
- For each step, carefully assess if web search is needed:
- Research and external data gathering: Set `need_web_search: true`
- Internal data processing: Set `need_web_search: false`
- Specify the exact data to be collected in step's `description`. Include a `note` if necessary.
- Prioritize depth and volume of relevant information - limited information is not acceptable.
- Use the same language as the user to generate the plan.
@ -163,6 +163,7 @@ interface Step {
}
interface Plan {
locale: string; // e.g. "en-US" or "zh-CN", based on the user's language or specific request
has_enough_context: boolean;
thought: string;
title: string;
@ -179,7 +180,7 @@ interface Plan {
- Never settle for minimal information - the goal is a comprehensive, detailed final report
- Limited or insufficient information will lead to an inadequate final report
- Carefully assess each step's web search requirement based on its nature:
- Research steps (`need_web_search: true`) for gathering information
- Processing steps (`need_web_search: false`) for calculations and data processing
- Research steps (`need_web_search: true`) for gathering information
- Processing steps (`need_web_search: false`) for calculations and data processing
- Default to gathering more information unless the strictest sufficient context criteria are met
- Always Use the same language as the user
- Always use the same language as the user

View File

@ -1,9 +1,10 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT
from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
class StepType(str, Enum):
@ -24,6 +25,9 @@ class Step(BaseModel):
class Plan(BaseModel):
locale: str = Field(
..., description="e.g. 'en-US' or 'zh-CN', based on the user's language"
)
has_enough_context: bool
thought: str
title: str

View File

@ -33,7 +33,7 @@ You are dedicated to conducting thorough investigations and providing comprehens
- [Source Title](https://example.com/page2)
```
- Always use the same language as the initial question.
- Always output in the locale of **{{ locale }}**.
- DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format.
# Notes
@ -49,4 +49,4 @@ You are dedicated to conducting thorough investigations and providing comprehens
- When presenting information from multiple sources, clearly indicate which source each piece of information comes from.
- Include images using `![Image Description](image_url)` in a separate section.
- The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content.
- Always use the same language as the initial question.
- Always use the locale of **{{ locale }}** for the output.