Rewrite Support specified language or language according to initial question (#4990)

Support specified language or language according to initial question

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
so95 2025-02-17 12:33:43 +07:00 committed by GitHub
parent 7c90b87715
commit ab17606e79
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 74 additions and 25 deletions

View File

@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
from abc import ABC
from api.db import LLMType
from api.db.services.llm_service import LLMBundle
@ -21,36 +20,33 @@ from agent.component import GenerateParam, Generate
class RewriteQuestionParam(GenerateParam):
"""
Define the QuestionRewrite component parameters.
"""
def __init__(self):
super().__init__()
self.temperature = 0.9
self.prompt = ""
self.language = ""
def check(self):
super().check()
def get_prompt(self, conv):
self.prompt = """
You are an expert at query expansion to generate a paraphrasing of a question.
I can't retrieval relevant information from the knowledge base by using user's question directly.
You need to expand or paraphrase user's question by multiple ways such as using synonyms words/phrase,
writing the abbreviation in its entirety, adding some extra descriptions or explanations,
changing the way of expression, translating the original question into another language (English/Chinese), etc.
And return 5 versions of question and one is from translation.
Just list the question. No other words are needed.
"""
return f"""
def get_prompt(self, conv, language, query):
prompt = """
Role: A helpful assistant
Task: Generate a full user question that would follow the conversation.
Requirements & Restrictions:
- Text generated MUST be in the same language of the original user's question.
- If the user's latest question is completely, don't do anything, just return the original question.
- DON'T generate anything except a refined question.
- DON'T generate anything except a refined question."""
if language:
prompt += f"""
- Text generated MUST be in {language}"""
prompt += f"""
######################
-Examples-
######################
@ -68,7 +64,7 @@ USER: What is the name of Donald Trump's father?
ASSISTANT: Fred Trump.
USER: And his mother?
ASSISTANT: Mary Trump.
User: What's her full name?
USER: What's her full name?
###############
Output: What's the full name of Donald Trump's mother Mary Trump?
######################
@ -76,8 +72,8 @@ Output: What's the full name of Donald Trump's mother Mary Trump?
## Conversation
{conv}
###############
"""
return self.prompt
"""
return prompt
class RewriteQuestion(Generate, ABC):
@ -85,21 +81,62 @@ class RewriteQuestion(Generate, ABC):
def _run(self, history, **kwargs):
hist = self._canvas.get_history(self._param.message_history_window_size)
query = self.get_input()
query = str(query["content"][0]) if "content" in query else ""
conv = []
for m in hist:
if m["role"] not in ["user", "assistant"]:
continue
conv.append("{}: {}".format(m["role"].upper(), m["content"]))
conv = "\n".join(conv)
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
ans = chat_mdl.chat(self._param.get_prompt(conv), [{"role": "user", "content": "Output: "}],
self._param.gen_conf())
ans = chat_mdl.chat(self._param.get_prompt(conv, self.gen_lang(self._param.language), query),
[{"role": "user", "content": "Output: "}], self._param.gen_conf())
self._canvas.history.pop()
self._canvas.history.append(("user", ans))
logging.debug(ans)
return RewriteQuestion.be_output(ans)
@staticmethod
def gen_lang(language):
# convert code lang to language word for the prompt
language_dict = {'af': 'Afrikaans', 'ak': 'Akan', 'sq': 'Albanian', 'ws': 'Samoan', 'am': 'Amharic',
'ar': 'Arabic', 'hy': 'Armenian', 'az': 'Azerbaijani', 'eu': 'Basque', 'be': 'Belarusian',
'bem': 'Bemba', 'bn': 'Bengali', 'bh': 'Bihari',
'xx-bork': 'Bork', 'bs': 'Bosnian', 'br': 'Breton', 'bg': 'Bulgarian', 'bt': 'Bhutani',
'km': 'Cambodian', 'ca': 'Catalan', 'chr': 'Cherokee', 'ny': 'Chichewa', 'zh-cn': 'Chinese',
'zh-tw': 'Chinese', 'co': 'Corsican',
'hr': 'Croatian', 'cs': 'Czech', 'da': 'Danish', 'nl': 'Dutch', 'xx-elmer': 'Elmer',
'en': 'English', 'eo': 'Esperanto', 'et': 'Estonian', 'ee': 'Ewe', 'fo': 'Faroese',
'tl': 'Filipino', 'fi': 'Finnish', 'fr': 'French',
'fy': 'Frisian', 'gaa': 'Ga', 'gl': 'Galician', 'ka': 'Georgian', 'de': 'German',
'el': 'Greek', 'kl': 'Greenlandic', 'gn': 'Guarani', 'gu': 'Gujarati', 'xx-hacker': 'Hacker',
'ht': 'Haitian Creole', 'ha': 'Hausa', 'haw': 'Hawaiian',
'iw': 'Hebrew', 'hi': 'Hindi', 'hu': 'Hungarian', 'is': 'Icelandic', 'ig': 'Igbo',
'id': 'Indonesian', 'ia': 'Interlingua', 'ga': 'Irish', 'it': 'Italian', 'ja': 'Japanese',
'jw': 'Javanese', 'kn': 'Kannada', 'kk': 'Kazakh', 'rw': 'Kinyarwanda',
'rn': 'Kirundi', 'xx-klingon': 'Klingon', 'kg': 'Kongo', 'ko': 'Korean', 'kri': 'Krio',
'ku': 'Kurdish', 'ckb': 'Kurdish (Sorani)', 'ky': 'Kyrgyz', 'lo': 'Laothian', 'la': 'Latin',
'lv': 'Latvian', 'ln': 'Lingala', 'lt': 'Lithuanian',
'loz': 'Lozi', 'lg': 'Luganda', 'ach': 'Luo', 'mk': 'Macedonian', 'mg': 'Malagasy',
'ms': 'Malay', 'ml': 'Malayalam', 'mt': 'Maltese', 'mv': 'Maldivian', 'mi': 'Maori',
'mr': 'Marathi', 'mfe': 'Mauritian Creole', 'mo': 'Moldavian', 'mn': 'Mongolian',
'sr-me': 'Montenegrin', 'my': 'Burmese', 'ne': 'Nepali', 'pcm': 'Nigerian Pidgin',
'nso': 'Northern Sotho', 'no': 'Norwegian', 'nn': 'Norwegian Nynorsk', 'oc': 'Occitan',
'or': 'Oriya', 'om': 'Oromo', 'ps': 'Pashto', 'fa': 'Persian',
'xx-pirate': 'Pirate', 'pl': 'Polish', 'pt': 'Portuguese', 'pt-br': 'Portuguese (Brazilian)',
'pt-pt': 'Portuguese (Portugal)', 'pa': 'Punjabi', 'qu': 'Quechua', 'ro': 'Romanian',
'rm': 'Romansh', 'nyn': 'Runyankole', 'ru': 'Russian', 'gd': 'Scots Gaelic',
'sr': 'Serbian', 'sh': 'Serbo-Croatian', 'st': 'Sesotho', 'tn': 'Setswana',
'crs': 'Seychellois Creole', 'sn': 'Shona', 'sd': 'Sindhi', 'si': 'Sinhalese', 'sk': 'Slovak',
'sl': 'Slovenian', 'so': 'Somali', 'es': 'Spanish', 'es-419': 'Spanish (Latin America)',
'su': 'Sundanese',
'sw': 'Swahili', 'sv': 'Swedish', 'tg': 'Tajik', 'ta': 'Tamil', 'tt': 'Tatar', 'te': 'Telugu',
'th': 'Thai', 'ti': 'Tigrinya', 'to': 'Tongan', 'lua': 'Tshiluba', 'tum': 'Tumbuka',
'tr': 'Turkish', 'tk': 'Turkmen', 'tw': 'Twi',
'ug': 'Uyghur', 'uk': 'Ukrainian', 'ur': 'Urdu', 'uz': 'Uzbek', 'vu': 'Vanuatu',
'vi': 'Vietnamese', 'cy': 'Welsh', 'wo': 'Wolof', 'xh': 'Xhosa', 'yi': 'Yiddish',
'yo': 'Yoruba', 'zu': 'Zulu'}
if language in language_dict:
return language_dict[language]
else:
return ""

View File

@ -523,6 +523,8 @@ This procedure will improve precision of retrieval by adding more information to
'It will retrieve descriptions of relevant entities,relations and community reports, which will enhance inference of multi-hop and complex question.',
keyword: 'Keyword analysis',
keywordTip: `Apply LLM to analyze user's questions, extract keywords which will be emphesize during the relevance omputation.`,
languageTip:
'Allows sentence rewriting with the specified language or defaults to the latest question if not selected.',
},
setting: {
profile: 'Profile',

View File

@ -426,6 +426,7 @@ export const initialGenerateValues = {
export const initialRewriteQuestionValues = {
...initialLlmBaseValues,
language: '',
message_history_window_size: 6,
};

View File

@ -166,6 +166,7 @@ const FormDrawer = ({
></Input>
)}
</Flex>
{needsSingleStepDebugging(operatorName) && (
<RunTooltip>
<Play

View File

@ -1,7 +1,8 @@
import LLMSelect from '@/components/llm-select';
import MessageHistoryWindowSizeItem from '@/components/message-history-window-size-item';
import { useTranslate } from '@/hooks/common-hooks';
import { Form } from 'antd';
import { Form, Select } from 'antd';
import { GoogleLanguageOptions } from '../../constant';
import { IOperatorForm } from '../../interface';
const RewriteQuestionForm = ({ onValuesChange, form }: IOperatorForm) => {
@ -23,6 +24,13 @@ const RewriteQuestionForm = ({ onValuesChange, form }: IOperatorForm) => {
>
<LLMSelect></LLMSelect>
</Form.Item>
<Form.Item
label={t('language')}
name={'language'}
tooltip={t('languageTip')}
>
<Select options={GoogleLanguageOptions} allowClear={true}></Select>
</Form.Item>
<MessageHistoryWindowSizeItem
initialValue={6}
></MessageHistoryWindowSizeItem>