diff --git a/.github/workflows/check_no_chinese_comments.py b/.github/workflows/check_no_chinese_comments.py index e59cfb538b..8fb51970af 100644 --- a/.github/workflows/check_no_chinese_comments.py +++ b/.github/workflows/check_no_chinese_comments.py @@ -20,7 +20,8 @@ def check_file_for_chinese_comments(file_path): def main(): has_chinese = False excluded_files = ["model_template.py", 'stopwords.py', 'commands.py', - 'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py'] + 'indexing_runner.py', 'web_reader_tool.py', 'spark_provider.py', + 'prompts.py'] for root, _, files in os.walk("."): for file in files: diff --git a/api/core/generator/llm_generator.py b/api/core/generator/llm_generator.py index 3ace061ac7..93208df960 100644 --- a/api/core/generator/llm_generator.py +++ b/api/core/generator/llm_generator.py @@ -1,3 +1,4 @@ +import json import logging from langchain.schema import OutputParserException @@ -22,18 +23,25 @@ class LLMGenerator: if len(query) > 2000: query = query[:300] + "...[TRUNCATED]..." + query[-300:] - prompt = prompt.format(query=query) + query = query.replace("\n", " ") + + prompt += query + "\n" model_instance = ModelFactory.get_text_generation_model( tenant_id=tenant_id, model_kwargs=ModelKwargs( - max_tokens=50 + temperature=1, + max_tokens=100 ) ) prompts = [PromptMessage(content=prompt)] response = model_instance.run(prompts) answer = response.content + + result_dict = json.loads(answer) + answer = result_dict['Your Output'] + return answer.strip() @classmethod diff --git a/api/core/prompt/prompts.py b/api/core/prompt/prompts.py index f30d329ec2..971c822e85 100644 --- a/api/core/prompt/prompts.py +++ b/api/core/prompt/prompts.py @@ -1,10 +1,64 @@ -CONVERSATION_TITLE_PROMPT = ( - "Human:{query}\n-----\n" - "Help me summarize the intent of what the human said and provide a title, the title should not exceed 20 words.\n" - "If what the human said is conducted in English, you should only return an English title.\n" - "If what the human said is conducted in Chinese, you should only return a Chinese title.\n" - "title:" -) +# Written by YORKI MINAKO🤡 +CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is. +Notice: the language type user using is abundant, can be English, Chinese, Español, Arabic, Japanese, and etc. +MAKE SURE your output is the SAME language as the user's input! +Your output is restricted only to: (Input language) Intention + Subject(short as possible) + +Tip: When the user's question is directed at you (the language model), you can add an emoji to make it more fun. + + +example 1: +User Input: hi, yesterday i had some burgers. +{ + "Language Type": "The user's input is pure English", + "Your Reasoning": "The language of my output must be pure English.", + "Your Output": "sharing yesterday's food" +} + +example 2: +User Input: hello +{ + "Language Type": "The user's input is written in pure English", + "Your Reasoning": "The language of my output must be pure English.", + "Your Output": "Greeting myself☺️" +} + + +example 3: +User Input: why mmap file: oom +{ + "Language Type": "The user's input is written in pure English", + "Your Reasoning": "The language of my output must be pure English.", + "Your Output": "Asking about the reason for mmap file: oom" +} + + +example 4: +User Input: www.convinceme.yesterday-you-ate-seafood.tv讲了什么? +{ + "Language Type": "The user's input English-Chinese mixed", + "Your Reasoning": "The English-part is an URL, the main intention is still written in Chinese, so the language of my output must be using Chinese.", + "Your Output": "询问网站www.convinceme.yesterday-you-ate-seafood.tv" +} + +example 5: +User Input: why小红的年龄is老than小明? +{ + "Language Type": "The user's input is English-Chinese mixed", + "Your Reasoning": "The English parts are subjective particles, the main intention is written in Chinese, besides, Chinese occupies a greater \"actual meaning\" than English, so the language of my output must be using Chinese.", + "Your Output": "询问小红和小明的年龄" +} + +example 6: +User Input: yo, 你今天咋样? +{ + "Language Type": "The user's input is English-Chinese mixed", + "Your Reasoning": "The English-part is a subjective particle, the main intention is written in Chinese, so the language of my output must be using Chinese.", + "Your Output": "查询今日我的状态☺️" +} + +User Input: +""" CONVERSATION_SUMMARY_PROMPT = ( "Please generate a short summary of the following conversation.\n"