From ab17606e7917781efa0f564b1ec1ae2c125669f1 Mon Sep 17 00:00:00 2001 From: so95 Date: Mon, 17 Feb 2025 12:33:43 +0700 Subject: [PATCH] Rewrite Support specified language or language according to initial question (#4990) Support specified language or language according to initial question --------- Co-authored-by: Kevin Hu --- agent/component/rewrite.py | 85 +++++++++++++------ web/src/locales/en.ts | 2 + web/src/pages/flow/constant.tsx | 1 + web/src/pages/flow/flow-drawer/index.tsx | 1 + .../flow/form/rewrite-question-form/index.tsx | 10 ++- 5 files changed, 74 insertions(+), 25 deletions(-) diff --git a/agent/component/rewrite.py b/agent/component/rewrite.py index 688664b8a..e0302ec95 100644 --- a/agent/component/rewrite.py +++ b/agent/component/rewrite.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import logging from abc import ABC from api.db import LLMType from api.db.services.llm_service import LLMBundle @@ -21,36 +20,33 @@ from agent.component import GenerateParam, Generate class RewriteQuestionParam(GenerateParam): - """ Define the QuestionRewrite component parameters. """ + def __init__(self): super().__init__() self.temperature = 0.9 self.prompt = "" + self.language = "" def check(self): super().check() - def get_prompt(self, conv): - self.prompt = """ - You are an expert at query expansion to generate a paraphrasing of a question. - I can't retrieval relevant information from the knowledge base by using user's question directly. - You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase, - writing the abbreviation in its entirety, adding some extra descriptions or explanations, - changing the way of expression, translating the original question into another language (English/Chinese), etc. - And return 5 versions of question and one is from translation. - Just list the question. No other words are needed. - """ - return f""" + def get_prompt(self, conv, language, query): + prompt = """ Role: A helpful assistant Task: Generate a full user question that would follow the conversation. Requirements & Restrictions: - Text generated MUST be in the same language of the original user's question. - If the user's latest question is completely, don't do anything, just return the original question. - - DON'T generate anything except a refined question. + - DON'T generate anything except a refined question.""" + if language: + prompt += f""" + - Text generated MUST be in {language}""" + + prompt += f""" ###################### -Examples- ###################### @@ -68,7 +64,7 @@ USER: What is the name of Donald Trump's father? ASSISTANT: Fred Trump. USER: And his mother? ASSISTANT: Mary Trump. -User: What's her full name? +USER: What's her full name? ############### Output: What's the full name of Donald Trump's mother Mary Trump? ###################### @@ -76,8 +72,8 @@ Output: What's the full name of Donald Trump's mother Mary Trump? ## Conversation {conv} ############### - """ - return self.prompt +""" + return prompt class RewriteQuestion(Generate, ABC): @@ -85,21 +81,62 @@ class RewriteQuestion(Generate, ABC): def _run(self, history, **kwargs): hist = self._canvas.get_history(self._param.message_history_window_size) + query = self.get_input() + query = str(query["content"][0]) if "content" in query else "" conv = [] for m in hist: if m["role"] not in ["user", "assistant"]: continue conv.append("{}: {}".format(m["role"].upper(), m["content"])) conv = "\n".join(conv) - chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id) - ans = chat_mdl.chat(self._param.get_prompt(conv), [{"role": "user", "content": "Output: "}], - self._param.gen_conf()) + ans = chat_mdl.chat(self._param.get_prompt(conv, self.gen_lang(self._param.language), query), + [{"role": "user", "content": "Output: "}], self._param.gen_conf()) self._canvas.history.pop() self._canvas.history.append(("user", ans)) - - logging.debug(ans) return RewriteQuestion.be_output(ans) - - + @staticmethod + def gen_lang(language): + # convert code lang to language word for the prompt + language_dict = {'af': 'Afrikaans', 'ak': 'Akan', 'sq': 'Albanian', 'ws': 'Samoan', 'am': 'Amharic', + 'ar': 'Arabic', 'hy': 'Armenian', 'az': 'Azerbaijani', 'eu': 'Basque', 'be': 'Belarusian', + 'bem': 'Bemba', 'bn': 'Bengali', 'bh': 'Bihari', + 'xx-bork': 'Bork', 'bs': 'Bosnian', 'br': 'Breton', 'bg': 'Bulgarian', 'bt': 'Bhutani', + 'km': 'Cambodian', 'ca': 'Catalan', 'chr': 'Cherokee', 'ny': 'Chichewa', 'zh-cn': 'Chinese', + 'zh-tw': 'Chinese', 'co': 'Corsican', + 'hr': 'Croatian', 'cs': 'Czech', 'da': 'Danish', 'nl': 'Dutch', 'xx-elmer': 'Elmer', + 'en': 'English', 'eo': 'Esperanto', 'et': 'Estonian', 'ee': 'Ewe', 'fo': 'Faroese', + 'tl': 'Filipino', 'fi': 'Finnish', 'fr': 'French', + 'fy': 'Frisian', 'gaa': 'Ga', 'gl': 'Galician', 'ka': 'Georgian', 'de': 'German', + 'el': 'Greek', 'kl': 'Greenlandic', 'gn': 'Guarani', 'gu': 'Gujarati', 'xx-hacker': 'Hacker', + 'ht': 'Haitian Creole', 'ha': 'Hausa', 'haw': 'Hawaiian', + 'iw': 'Hebrew', 'hi': 'Hindi', 'hu': 'Hungarian', 'is': 'Icelandic', 'ig': 'Igbo', + 'id': 'Indonesian', 'ia': 'Interlingua', 'ga': 'Irish', 'it': 'Italian', 'ja': 'Japanese', + 'jw': 'Javanese', 'kn': 'Kannada', 'kk': 'Kazakh', 'rw': 'Kinyarwanda', + 'rn': 'Kirundi', 'xx-klingon': 'Klingon', 'kg': 'Kongo', 'ko': 'Korean', 'kri': 'Krio', + 'ku': 'Kurdish', 'ckb': 'Kurdish (Sorani)', 'ky': 'Kyrgyz', 'lo': 'Laothian', 'la': 'Latin', + 'lv': 'Latvian', 'ln': 'Lingala', 'lt': 'Lithuanian', + 'loz': 'Lozi', 'lg': 'Luganda', 'ach': 'Luo', 'mk': 'Macedonian', 'mg': 'Malagasy', + 'ms': 'Malay', 'ml': 'Malayalam', 'mt': 'Maltese', 'mv': 'Maldivian', 'mi': 'Maori', + 'mr': 'Marathi', 'mfe': 'Mauritian Creole', 'mo': 'Moldavian', 'mn': 'Mongolian', + 'sr-me': 'Montenegrin', 'my': 'Burmese', 'ne': 'Nepali', 'pcm': 'Nigerian Pidgin', + 'nso': 'Northern Sotho', 'no': 'Norwegian', 'nn': 'Norwegian Nynorsk', 'oc': 'Occitan', + 'or': 'Oriya', 'om': 'Oromo', 'ps': 'Pashto', 'fa': 'Persian', + 'xx-pirate': 'Pirate', 'pl': 'Polish', 'pt': 'Portuguese', 'pt-br': 'Portuguese (Brazilian)', + 'pt-pt': 'Portuguese (Portugal)', 'pa': 'Punjabi', 'qu': 'Quechua', 'ro': 'Romanian', + 'rm': 'Romansh', 'nyn': 'Runyankole', 'ru': 'Russian', 'gd': 'Scots Gaelic', + 'sr': 'Serbian', 'sh': 'Serbo-Croatian', 'st': 'Sesotho', 'tn': 'Setswana', + 'crs': 'Seychellois Creole', 'sn': 'Shona', 'sd': 'Sindhi', 'si': 'Sinhalese', 'sk': 'Slovak', + 'sl': 'Slovenian', 'so': 'Somali', 'es': 'Spanish', 'es-419': 'Spanish (Latin America)', + 'su': 'Sundanese', + 'sw': 'Swahili', 'sv': 'Swedish', 'tg': 'Tajik', 'ta': 'Tamil', 'tt': 'Tatar', 'te': 'Telugu', + 'th': 'Thai', 'ti': 'Tigrinya', 'to': 'Tongan', 'lua': 'Tshiluba', 'tum': 'Tumbuka', + 'tr': 'Turkish', 'tk': 'Turkmen', 'tw': 'Twi', + 'ug': 'Uyghur', 'uk': 'Ukrainian', 'ur': 'Urdu', 'uz': 'Uzbek', 'vu': 'Vanuatu', + 'vi': 'Vietnamese', 'cy': 'Welsh', 'wo': 'Wolof', 'xh': 'Xhosa', 'yi': 'Yiddish', + 'yo': 'Yoruba', 'zu': 'Zulu'} + if language in language_dict: + return language_dict[language] + else: + return "" diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 494665d80..f47004449 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -523,6 +523,8 @@ This procedure will improve precision of retrieval by adding more information to 'It will retrieve descriptions of relevant entities,relations and community reports, which will enhance inference of multi-hop and complex question.', keyword: 'Keyword analysis', keywordTip: `Apply LLM to analyze user's questions, extract keywords which will be emphesize during the relevance omputation.`, + languageTip: + 'Allows sentence rewriting with the specified language or defaults to the latest question if not selected.', }, setting: { profile: 'Profile', diff --git a/web/src/pages/flow/constant.tsx b/web/src/pages/flow/constant.tsx index 05580f847..cc44daffa 100644 --- a/web/src/pages/flow/constant.tsx +++ b/web/src/pages/flow/constant.tsx @@ -426,6 +426,7 @@ export const initialGenerateValues = { export const initialRewriteQuestionValues = { ...initialLlmBaseValues, + language: '', message_history_window_size: 6, }; diff --git a/web/src/pages/flow/flow-drawer/index.tsx b/web/src/pages/flow/flow-drawer/index.tsx index 3dc822be0..40afa61f7 100644 --- a/web/src/pages/flow/flow-drawer/index.tsx +++ b/web/src/pages/flow/flow-drawer/index.tsx @@ -166,6 +166,7 @@ const FormDrawer = ({ > )} + {needsSingleStepDebugging(operatorName) && ( { @@ -23,6 +24,13 @@ const RewriteQuestionForm = ({ onValuesChange, form }: IOperatorForm) => { > + + +