From 31d2b3cb5afb91d32ba2cd4c6444d60be80ea721 Mon Sep 17 00:00:00 2001 From: Adrian Altermatt Date: Tue, 3 Jun 2025 13:41:59 +0200 Subject: [PATCH] Fix: Grammar and clarity improvements in prompt templates (#8023) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixed grammar errors and improved clarity in prompt templates throughout `rag/prompts.py`. ## Changes Made - **Fixed incomplete sentence**: `"If the user's latest question is completely, don't do anything"` → `"If the user's latest question is already complete, don't do anything"` - **Improved phrasing**: `"of like [ID:i]"` → `"such as [ID:i]"` - **Added missing articles**: `"give top 3"` → `"give the top 3"` - **Fixed prepositions**: `"in language of"` → `"in the same language as"` - **Corrected spelling**: `"Jappanese"` → `"Japanese"` - **Standardized formatting**: Consistent role descriptions and punctuation ## Impact These changes improve prompt readability and should make instructions clearer for the underlying language models. ## Test Plan - [x] Verified changes maintain original prompt functionality - [x] No breaking changes to prompt structure or expected outputs Co-authored-by: Adrian Altermatt --- rag/prompts.py | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/rag/prompts.py b/rag/prompts.py index beca2f799..551ed99f9 100644 --- a/rag/prompts.py +++ b/rag/prompts.py @@ -141,7 +141,7 @@ def citation_prompt(): # Citation requirements: -- Use a uniform citation format of like [ID:i] [ID:j], where "i" and "j" are the document ID enclosed in square brackets. Separate multiple IDs with spaces (e.g., [ID:0] [ID:1]). +- Use a uniform citation format such as [ID:i] [ID:j], where "i" and "j" are document IDs enclosed in square brackets. Separate multiple IDs with spaces (e.g., [ID:0] [ID:1]). - Citation markers must be placed at the end of a sentence, separated by a space from the final punctuation (e.g., period, question mark). A maximum of 4 citations are allowed per sentence. - DO NOT insert CITATION in the answer if the content is not from retrieved chunks. - DO NOT use standalone Document IDs (e.g., '#ID#'). @@ -184,13 +184,13 @@ Overall, while Musk enjoys Dogecoin and often promotes it, he also warns against def keyword_extraction(chat_mdl, content, topn=3): prompt = f""" -Role: You're a text analyzer. -Task: extract the most important keywords/phrases of a given piece of text content. +Role: You are a text analyzer. +Task: Extract the most important keywords/phrases of a given piece of text content. Requirements: - - Summarize the text content, and give top {topn} important keywords/phrases. - - The keywords MUST be in language of the given piece of text content. + - Summarize the text content, and give the top {topn} important keywords/phrases. + - The keywords MUST be in the same language as the given piece of text content. - The keywords are delimited by ENGLISH COMMA. - - Keywords ONLY in output. + - Output keywords ONLY. ### Text Content {content} @@ -209,15 +209,15 @@ Requirements: def question_proposal(chat_mdl, content, topn=3): prompt = f""" -Role: You're a text analyzer. -Task: propose {topn} questions about a given piece of text content. +Role: You are a text analyzer. +Task: Propose {topn} questions about a given piece of text content. Requirements: - - Understand and summarize the text content, and propose top {topn} important questions. + - Understand and summarize the text content, and propose the top {topn} important questions. - The questions SHOULD NOT have overlapping meanings. - The questions SHOULD cover the main content of the text as much as possible. - - The questions MUST be in language of the given piece of text content. + - The questions MUST be in the same language as the given piece of text content. - One question per line. - - Question ONLY in output. + - Output questions ONLY. ### Text Content {content} @@ -258,14 +258,14 @@ Task and steps: 2. If the user's question involves relative date, you need to convert it into absolute date based on the current date, which is {today}. For example: 'yesterday' would be converted to {yesterday}. Requirements & Restrictions: - - If the user's latest question is completely, don't do anything, just return the original question. + - If the user's latest question is already complete, don't do anything, just return the original question. - DON'T generate anything except a refined question.""" if language: prompt += f""" - Text generated MUST be in {language}.""" else: prompt += """ - - Text generated MUST be in the same language of the original user's question. + - Text generated MUST be in the same language as the original user's question. """ prompt += f""" @@ -342,7 +342,7 @@ Act as a streamlined multilingual translator. Strictly output translations separ Input: Hello World! Let's discuss AI safety. === -Chinese, French, Jappanese +Chinese, French, Japanese Output: 你好世界!让我们讨论人工智能安全问题。 @@ -369,20 +369,20 @@ Output: def content_tagging(chat_mdl, content, all_tags, examples, topn=3): prompt = f""" -Role: You're a text analyzer. +Role: You are a text analyzer. -Task: Tag (put on some labels) to a given piece of text content based on the examples and the entire tag set. +Task: Add tags (labels) to a given piece of text content based on the examples and the entire tag set. -Steps:: - - Comprehend the tag/label set. - - Comprehend examples which all consist of both text content and assigned tags with relevance score in format of JSON. - - Summarize the text content, and tag it with top {topn} most relevant tags from the set of tag/label and the corresponding relevance score. +Steps: + - Review the tag/label set. + - Review examples which all consist of both text content and assigned tags with relevance score in JSON format. + - Summarize the text content, and tag it with the top {topn} most relevant tags from the set of tags/labels and the corresponding relevance score. -Requirements +Requirements: - The tags MUST be from the tag set. - The output MUST be in JSON format only, the key is tag and the value is its relevance score. - - The relevance score must be range from 1 to 10. - - Keywords ONLY in output. + - The relevance score must range from 1 to 10. + - Output keywords ONLY. # TAG SET {", ".join(all_tags)} @@ -482,6 +482,6 @@ Output format (include only sections relevant to the image content): - Trends / Insights: [Analysis and interpretation] - Captions / Annotations: [Text and relevance, if available] -Ensure high accuracy, clarity, and completeness in your analysis, and includes only the information present in the image. Avoid unnecessary statements about missing elements. +Ensure high accuracy, clarity, and completeness in your analysis, and include only the information present in the image. Avoid unnecessary statements about missing elements. """ return prompt