From b67b04ff5d2adaa1507c3cfb4a220de7ab1e7873 Mon Sep 17 00:00:00 2001 From: Li Xin Date: Mon, 21 Apr 2025 19:50:34 +0800 Subject: [PATCH] feat: support multi-language --- src/graph/nodes.py | 18 +++++++------ src/graph/types.py | 4 ++- src/prompts/coder.md | 14 +++++----- src/prompts/planner.md | 51 ++++++++++++++++++------------------ src/prompts/planner_model.py | 8 ++++-- src/prompts/researcher.md | 4 +-- 6 files changed, 54 insertions(+), 45 deletions(-) diff --git a/src/graph/nodes.py b/src/graph/nodes.py index 4a7a5ca..d3397eb 100644 --- a/src/graph/nodes.py +++ b/src/graph/nodes.py @@ -1,22 +1,23 @@ # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # SPDX-License-Identifier: MIT -import logging import json -from typing import Literal, Annotated +import logging +from typing import Annotated, Literal -from langchain_core.messages import HumanMessage, AIMessage -from langchain_core.tools import tool +from langchain_core.messages import AIMessage, HumanMessage from langchain_core.runnables import RunnableConfig +from langchain_core.tools import tool from langgraph.types import Command, interrupt -from src.llms.llm import get_llm_by_type +from src.agents.agents import coder_agent, research_agent from src.config.agents import AGENT_LLM_MAP from src.config.configuration import Configuration -from src.prompts.template import apply_prompt_template +from src.llms.llm import get_llm_by_type from src.prompts.planner_model import Plan, StepType +from src.prompts.template import apply_prompt_template from src.utils.json_utils import repair_json_output -from src.agents.agents import research_agent, coder_agent + from .types import State logger = logging.getLogger(__name__) @@ -117,6 +118,7 @@ def human_feedback_node( update={ "current_plan": Plan.model_validate(new_plan), "plan_iterations": plan_iterations, + "locale": new_plan["locale"], }, goto=goto, ) @@ -209,7 +211,7 @@ def _execute_agent_step( agent_input = { "messages": [ HumanMessage( - content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}" + content=f"#Task\n\n##title\n\n{step.title}\n\n##description\n\n{step.description}\n\n##locale\n\n{state.get('locale', 'en-US')}" ) ] } diff --git a/src/graph/types.py b/src/graph/types.py index 71a208e..128bc67 100644 --- a/src/graph/types.py +++ b/src/graph/types.py @@ -2,9 +2,10 @@ # SPDX-License-Identifier: MIT import operator +from typing import Annotated from langgraph.graph import MessagesState -from typing import Annotated + from src.prompts.planner_model import Plan @@ -12,6 +13,7 @@ class State(MessagesState): """State for the agent system, extends MessagesState with next field.""" # Runtime Variables + locale: str = "en-US" observations: Annotated[list[str], operator.add] = [] plan_iterations: int = 0 current_plan: Plan | str = None diff --git a/src/prompts/coder.md b/src/prompts/coder.md index 8222f74..9093044 100644 --- a/src/prompts/coder.md +++ b/src/prompts/coder.md @@ -23,12 +23,12 @@ You are a professional software engineer proficient in Python scripting. Your ta - Use comments in code to improve readability and maintainability. - If you want to see the output of a value, you MUST print it out with `print(...)`. - Always and only use Python to do the math. -- Always use the same language as the initial question. - Always use `yfinance` for financial market data: - - Get historical data with `yf.download()` - - Access company info with `Ticker` objects - - Use appropriate date ranges for data retrieval + - Get historical data with `yf.download()` + - Access company info with `Ticker` objects + - Use appropriate date ranges for data retrieval - Required Python packages are pre-installed: - - `pandas` for data manipulation - - `numpy` for numerical operations - - `yfinance` for financial market data + - `pandas` for data manipulation + - `numpy` for numerical operations + - `yfinance` for financial market data +- Always output in the locale of **{{ locale }}**. diff --git a/src/prompts/planner.md b/src/prompts/planner.md index 619187b..1c5d4be 100644 --- a/src/prompts/planner.md +++ b/src/prompts/planner.md @@ -14,7 +14,7 @@ As a Deep Researcher, you can breakdown the major subject into sub-topics and ex The successful research plan must meet these standards: -1. **Comprehensive Coverage**: +1. **Comprehensive Coverage**: - Information must cover ALL aspects of the topic - Multiple perspectives must be represented - Both mainstream and alternative viewpoints should be included @@ -74,51 +74,51 @@ Different types of steps have different web search requirements: ## Exclusions - **No Direct Calculations in Research Steps**: - - Research steps should only gather data and information - - All mathematical calculations must be handled by processing steps - - Numerical analysis must be delegated to processing steps - - Research steps focus on information gathering only + - Research steps should only gather data and information + - All mathematical calculations must be handled by processing steps + - Numerical analysis must be delegated to processing steps + - Research steps focus on information gathering only ## Analysis Framework When planning information gathering, consider these key aspects and ensure COMPREHENSIVE coverage: -1. **Historical Context**: +1. **Historical Context**: - What historical data and trends are needed? - What is the complete timeline of relevant events? - How has the subject evolved over time? -2. **Current State**: +2. **Current State**: - What current data points need to be collected? - What is the present landscape/situation in detail? - What are the most recent developments? -3. **Future Indicators**: +3. **Future Indicators**: - What predictive data or future-oriented information is required? - What are all relevant forecasts and projections? - What potential future scenarios should be considered? -4. **Stakeholder Data**: +4. **Stakeholder Data**: - What information about ALL relevant stakeholders is needed? - How are different groups affected or involved? - What are the various perspectives and interests? -5. **Quantitative Data**: +5. **Quantitative Data**: - What comprehensive numbers, statistics, and metrics should be gathered? - What numerical data is needed from multiple sources? - What statistical analyses are relevant? -6. **Qualitative Data**: +6. **Qualitative Data**: - What non-numerical information needs to be collected? - What opinions, testimonials, and case studies are relevant? - What descriptive information provides context? -7. **Comparative Data**: +7. **Comparative Data**: - What comparison points or benchmark data are required? - What similar cases or alternatives should be examined? - How does this compare across different contexts? -8. **Risk Data**: +8. **Risk Data**: - What information about ALL potential risks should be gathered? - What are the challenges, limitations, and obstacles? - What contingencies and mitigations exist? @@ -135,16 +135,16 @@ When planning information gathering, consider these key aspects and ensure COMPR - To begin with, repeat user's requirement in your own words as `thought`. - Rigorously assess if there is sufficient context to answer the question using the strict criteria above. - If context is sufficient: - - Set `has_enough_context` to true - - No need to create information gathering steps + - Set `has_enough_context` to true + - No need to create information gathering steps - If context is insufficient (default assumption): - - Break down the required information using the Analysis Framework - - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects - - Ensure each step is substantial and covers related information categories - - Prioritize breadth and depth within the {{ max_step_num }}-step constraint - - For each step, carefully assess if web search is needed: - - Research and external data gathering: Set `need_web_search: true` - - Internal data processing: Set `need_web_search: false` + - Break down the required information using the Analysis Framework + - Create NO MORE THAN {{ max_step_num }} focused and comprehensive steps that cover the most essential aspects + - Ensure each step is substantial and covers related information categories + - Prioritize breadth and depth within the {{ max_step_num }}-step constraint + - For each step, carefully assess if web search is needed: + - Research and external data gathering: Set `need_web_search: true` + - Internal data processing: Set `need_web_search: false` - Specify the exact data to be collected in step's `description`. Include a `note` if necessary. - Prioritize depth and volume of relevant information - limited information is not acceptable. - Use the same language as the user to generate the plan. @@ -163,6 +163,7 @@ interface Step { } interface Plan { + locale: string; // e.g. "en-US" or "zh-CN", based on the user's language or specific request has_enough_context: boolean; thought: string; title: string; @@ -179,7 +180,7 @@ interface Plan { - Never settle for minimal information - the goal is a comprehensive, detailed final report - Limited or insufficient information will lead to an inadequate final report - Carefully assess each step's web search requirement based on its nature: - - Research steps (`need_web_search: true`) for gathering information - - Processing steps (`need_web_search: false`) for calculations and data processing + - Research steps (`need_web_search: true`) for gathering information + - Processing steps (`need_web_search: false`) for calculations and data processing - Default to gathering more information unless the strictest sufficient context criteria are met -- Always Use the same language as the user +- Always use the same language as the user diff --git a/src/prompts/planner_model.py b/src/prompts/planner_model.py index 615d701..b75d00b 100644 --- a/src/prompts/planner_model.py +++ b/src/prompts/planner_model.py @@ -1,9 +1,10 @@ # Copyright (c) 2025 Bytedance Ltd. and/or its affiliates # SPDX-License-Identifier: MIT -from pydantic import BaseModel, Field -from typing import List, Optional from enum import Enum +from typing import List, Optional + +from pydantic import BaseModel, Field class StepType(str, Enum): @@ -24,6 +25,9 @@ class Step(BaseModel): class Plan(BaseModel): + locale: str = Field( + ..., description="e.g. 'en-US' or 'zh-CN', based on the user's language" + ) has_enough_context: bool thought: str title: str diff --git a/src/prompts/researcher.md b/src/prompts/researcher.md index 53699a2..208a6cf 100644 --- a/src/prompts/researcher.md +++ b/src/prompts/researcher.md @@ -33,7 +33,7 @@ You are dedicated to conducting thorough investigations and providing comprehens - [Source Title](https://example.com/page2) ``` -- Always use the same language as the initial question. +- Always output in the locale of **{{ locale }}**. - DO NOT include inline citations in the text. Instead, track all sources and list them in the References section at the end using link reference format. # Notes @@ -49,4 +49,4 @@ You are dedicated to conducting thorough investigations and providing comprehens - When presenting information from multiple sources, clearly indicate which source each piece of information comes from. - Include images using `![Image Description](image_url)` in a separate section. - The included images should **only** be from the information gathered **from the search results or the crawled content**. **Never** include images that are not from the search results or the crawled content. -- Always use the same language as the initial question. +- Always use the locale of **{{ locale }}** for the output.