feat: support multi-language

This commit is contained in:
Li Xin 2025-04-21 19:50:34 +08:00
parent 0d2f93c773
commit b67b04ff5d
6 changed files with 54 additions and 45 deletions

View File

@ -1,22 +1,23 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import logging
import json import json
from typing import Literal, Annotated import logging
from typing import Annotated, Literal
from langchain_core.messages import HumanMessage, AIMessage from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.tools import tool
from langchain_core.runnables import RunnableConfig from langchain_core.runnables import RunnableConfig
from langchain_core.tools import tool
from langgraph.types import Command, interrupt from langgraph.types import Command, interrupt
from src.llms.llm import get_llm_by_type from src.agents.agents import coder_agent, research_agent
from src.config.agents import AGENT_LLM_MAP from src.config.agents import AGENT_LLM_MAP
from src.config.configuration import Configuration from src.config.configuration import Configuration
from src.prompts.template import apply_prompt_template from src.llms.llm import get_llm_by_type
from src.prompts.planner_model import Plan, StepType from src.prompts.planner_model import Plan, StepType
from src.prompts.template import apply_prompt_template
from src.utils.json_utils import repair_json_output from src.utils.json_utils import repair_json_output
from src.agents.agents import research_agent, coder_agent
from .types import State from .types import State
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -117,6 +118,7 @@ def human_feedback_node(
update={ update={
"current_plan": Plan.model_validate(new_plan), "current_plan": Plan.model_validate(new_plan),
"plan_iterations": plan_iterations, "plan_iterations": plan_iterations,
"locale": new_plan["locale"],
}, },
goto=goto, goto=goto,
) )
@ -209,7 +211,7 @@ def _execute_agent_step(
agent_input = { agent_input = {
"messages": [ "messages": [
HumanMessage( HumanMessage(
content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}" content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}\n\n##locale\n\n{state.get('locale', 'en-US')}"
) )
] ]
} }

View File

@ -2,9 +2,10 @@
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
import operator import operator
from typing import Annotated
from langgraph.graph import MessagesState from langgraph.graph import MessagesState
from typing import Annotated
from src.prompts.planner_model import Plan from src.prompts.planner_model import Plan
@ -12,6 +13,7 @@ class State(MessagesState):
"""State for the agent system, extends MessagesState with next field.""" """State for the agent system, extends MessagesState with next field."""
# Runtime Variables # Runtime Variables
locale: str = "en-US"
observations: Annotated[list[str], operator.add] = [] observations: Annotated[list[str], operator.add] = []
plan_iterations: int = 0 plan_iterations: int = 0
current_plan: Plan | str = None current_plan: Plan | str = None

View File

@ -23,12 +23,12 @@ You are a professional software engineer proficient in Python scripting. Your ta
- Use comments in code to improve readability and maintainability. - Use comments in code to improve readability and maintainability.
- If you want to see the output of a value, you MUST print it out with `print(...)`. - If you want to see the output of a value, you MUST print it out with `print(...)`.
- Always and only use Python to do the math. - Always and only use Python to do the math.
- Always use the same language as the initial question.
- Always use `yfinance` for financial market data: - Always use `yfinance` for financial market data:
- Get historical data with `yf.download()` - Get historical data with `yf.download()`
- Access company info with `Ticker` objects - Access company info with `Ticker` objects
- Use appropriate date ranges for data retrieval - Use appropriate date ranges for data retrieval
- Required Python packages are pre-installed: - Required Python packages are pre-installed:
- `pandas` for data manipulation - `pandas` for data manipulation
- `numpy` for numerical operations - `numpy` for numerical operations
- `yfinance` for financial market data - `yfinance` for financial market data
- Always output in the locale of **{{ locale }}**.

View File

@ -14,7 +14,7 @@ As a Deep Researcher, you can breakdown the major subject into sub-topics and ex
The successful research plan must meet these standards: The successful research plan must meet these standards:
1. **Comprehensive Coverage**: 1. **Comprehensive Coverage**:
- Information must cover ALL aspects of the topic - Information must cover ALL aspects of the topic
- Multiple perspectives must be represented - Multiple perspectives must be represented
- Both mainstream and alternative viewpoints should be included - Both mainstream and alternative viewpoints should be included
@ -74,51 +74,51 @@ Different types of steps have different web search requirements:
## Exclusions ## Exclusions
- **No Direct Calculations in Research Steps**: - **No Direct Calculations in Research Steps**:
- Research steps should only gather data and information - Research steps should only gather data and information
- All mathematical calculations must be handled by processing steps - All mathematical calculations must be handled by processing steps
- Numerical analysis must be delegated to processing steps - Numerical analysis must be delegated to processing steps
- Research steps focus on information gathering only - Research steps focus on information gathering only
## Analysis Framework ## Analysis Framework
When planning information gathering, consider these key aspects and ensure COMPREHENSIVE coverage: When planning information gathering, consider these key aspects and ensure COMPREHENSIVE coverage:
1. **Historical Context**: 1. **Historical Context**:
- What historical data and trends are needed? - What historical data and trends are needed?
- What is the complete timeline of relevant events? - What is the complete timeline of relevant events?
- How has the subject evolved over time? - How has the subject evolved over time?
2. **Current State**: 2. **Current State**:
- What current data points need to be collected? - What current data points need to be collected?
- What is the present landscape/situation in detail? - What is the present landscape/situation in detail?
- What are the most recent developments? - What are the most recent developments?
3. **Future Indicators**: 3. **Future Indicators**:
- What predictive data or future-oriented information is required? - What predictive data or future-oriented information is required?
- What are all relevant forecasts and projections? - What are all relevant forecasts and projections?
- What potential future scenarios should be considered? - What potential future scenarios should be considered?
4. **Stakeholder Data**: 4. **Stakeholder Data**:
- What information about ALL relevant stakeholders is needed? - What information about ALL relevant stakeholders is needed?
- How are different groups affected or involved? - How are different groups affected or involved?
- What are the various perspectives and interests? - What are the various perspectives and interests?
5. **Quantitative Data**: 5. **Quantitative Data**:
- What comprehensive numbers, statistics, and metrics should be gathered? - What comprehensive numbers, statistics, and metrics should be gathered?
- What numerical data is needed from multiple sources? - What numerical data is needed from multiple sources?
- What statistical analyses are relevant? - What statistical analyses are relevant?
6. **Qualitative Data**: 6. **Qualitative Data**:
- What non-numerical information needs to be collected? - What non-numerical information needs to be collected?
- What opinions, testimonials, and case studies are relevant? - What opinions, testimonials, and case studies are relevant?
- What descriptive information provides context? - What descriptive information provides context?
7. **Comparative Data**: 7. **Comparative Data**:
- What comparison points or benchmark data are required? - What comparison points or benchmark data are required?
- What similar cases or alternatives should be examined? - What similar cases or alternatives should be examined?
- How does this compare across different contexts? - How does this compare across different contexts?
8. **Risk Data**: 8. **Risk Data**:
- What information about ALL potential risks should be gathered? - What information about ALL potential risks should be gathered?
- What are the challenges, limitations, and obstacles? - What are the challenges, limitations, and obstacles?
- What contingencies and mitigations exist? - What contingencies and mitigations exist?
@ -135,16 +135,16 @@ When planning information gathering, consider these key aspects and ensure COMPR
- To begin with, repeat user's requirement in your own words as `thought`. - To begin with, repeat user's requirement in your own words as `thought`.
- Rigorously assess if there is sufficient context to answer the question using the strict criteria above. - Rigorously assess if there is sufficient context to answer the question using the strict criteria above.
- If context is sufficient: - If context is sufficient:
- Set `has_enough_context` to true - Set `has_enough_context` to true
- No need to create information gathering steps - No need to create information gathering steps
- If context is insufficient (default assumption): - If context is insufficient (default assumption):
- Break down the required information using the Analysis Framework - Break down the required information using the Analysis Framework
- Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects
- Ensure each step is substantial and covers related information categories - Ensure each step is substantial and covers related information categories
- Prioritize breadth and depth within the {{ max_step_num }}-step constraint - Prioritize breadth and depth within the {{ max_step_num }}-step constraint
- For each step, carefully assess if web search is needed: - For each step, carefully assess if web search is needed:
- Research and external data gathering: Set `need_web_search: true` - Research and external data gathering: Set `need_web_search: true`
- Internal data processing: Set `need_web_search: false` - Internal data processing: Set `need_web_search: false`
- Specify the exact data to be collected in step's `description`. Include a `note` if necessary. - Specify the exact data to be collected in step's `description`. Include a `note` if necessary.
- Prioritize depth and volume of relevant information - limited information is not acceptable. - Prioritize depth and volume of relevant information - limited information is not acceptable.
- Use the same language as the user to generate the plan. - Use the same language as the user to generate the plan.
@ -163,6 +163,7 @@ interface Step {
} }
interface Plan { interface Plan {
locale: string; // e.g. "en-US" or "zh-CN", based on the user's language or specific request
has_enough_context: boolean; has_enough_context: boolean;
thought: string; thought: string;
title: string; title: string;
@ -179,7 +180,7 @@ interface Plan {
- Never settle for minimal information - the goal is a comprehensive, detailed final report - Never settle for minimal information - the goal is a comprehensive, detailed final report
- Limited or insufficient information will lead to an inadequate final report - Limited or insufficient information will lead to an inadequate final report
- Carefully assess each step's web search requirement based on its nature: - Carefully assess each step's web search requirement based on its nature:
- Research steps (`need_web_search: true`) for gathering information - Research steps (`need_web_search: true`) for gathering information
- Processing steps (`need_web_search: false`) for calculations and data processing - Processing steps (`need_web_search: false`) for calculations and data processing
- Default to gathering more information unless the strictest sufficient context criteria are met - Default to gathering more information unless the strictest sufficient context criteria are met
- Always Use the same language as the user - Always use the same language as the user

View File

@ -1,9 +1,10 @@
# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
# SPDX-License-Identifier: MIT # SPDX-License-Identifier: MIT
from pydantic import BaseModel, Field
from typing import List, Optional
from enum import Enum from enum import Enum
from typing import List, Optional
from pydantic import BaseModel, Field
class StepType(str, Enum): class StepType(str, Enum):
@ -24,6 +25,9 @@ class Step(BaseModel):
class Plan(BaseModel): class Plan(BaseModel):
locale: str = Field(
..., description="e.g. 'en-US' or 'zh-CN', based on the user's language"
)
has_enough_context: bool has_enough_context: bool
thought: str thought: str
title: str title: str

View File

@ -33,7 +33,7 @@ You are dedicated to conducting thorough investigations and providing comprehens
- [Source Title](https://example.com/page2) - [Source Title](https://example.com/page2)
``` ```
- Always use the same language as the initial question. - Always output in the locale of **{{ locale }}**.
- DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format. - DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format.
# Notes # Notes
@ -49,4 +49,4 @@ You are dedicated to conducting thorough investigations and providing comprehens
- When presenting information from multiple sources, clearly indicate which source each piece of information comes from. - When presenting information from multiple sources, clearly indicate which source each piece of information comes from.
- Include images using `![Image Description](image_url)` in a separate section. - Include images using `![Image Description](image_url)` in a separate section.
- The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content. - The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content.
- Always use the same language as the initial question. - Always use the locale of **{{ locale }}** for the output.