question classifier optimize (#4147)

This commit is contained in:
Jyong 2024-05-07 16:44:27 +08:00 committed by GitHub
parent c2f0f958ef
commit e353809680
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 38 additions and 27 deletions

View File

@ -1,3 +1,4 @@
import json
import logging
from typing import Optional, Union, cast
@ -62,13 +63,20 @@ class QuestionClassifierNode(LLMNode):
prompt_messages=prompt_messages,
stop=stop
)
categories = [_class.name for _class in node_data.classes]
category_name = node_data.classes[0].name
category_id = node_data.classes[0].id
try:
result_text_json = parse_and_check_json_markdown(result_text, [])
#result_text_json = json.loads(result_text.strip('```JSON\n'))
categories_result = result_text_json.get('categories', [])
if categories_result:
categories = categories_result
# result_text_json = json.loads(result_text.strip('```JSON\n'))
if 'category_name' in result_text_json and 'category_id' in result_text_json:
category_id_result = result_text_json['category_id']
classes = node_data.classes
classes_map = {class_.id: class_.name for class_ in classes}
category_ids = [_class.id for _class in classes]
if category_id_result in category_ids:
category_name = classes_map[category_id_result]
category_id = category_id_result
except Exception:
logging.error(f"Failed to parse result text: {result_text}")
try:
@ -81,17 +89,15 @@ class QuestionClassifierNode(LLMNode):
'usage': jsonable_encoder(usage),
}
outputs = {
'class_name': categories[0] if categories else ''
'class_name': category_name
}
classes = node_data.classes
classes_map = {class_.name: class_.id for class_ in classes}
return NodeRunResult(
status=WorkflowNodeExecutionStatus.SUCCEEDED,
inputs=variables,
process_data=process_data,
outputs=outputs,
edge_source_handle=classes_map.get(categories[0], None),
edge_source_handle=category_id,
metadata={
NodeRunMetadataKey.TOTAL_TOKENS: usage.total_tokens,
NodeRunMetadataKey.TOTAL_PRICE: usage.total_price,
@ -210,8 +216,13 @@ class QuestionClassifierNode(LLMNode):
-> Union[list[ChatModelMessage], CompletionModelPromptTemplate]:
model_mode = ModelMode.value_of(node_data.model.mode)
classes = node_data.classes
class_names = [class_.name for class_ in classes]
class_names_str = ','.join(f'"{name}"' for name in class_names)
categories = []
for class_ in classes:
category = {
'category_id': class_.id,
'category_name': class_.name
}
categories.append(category)
instruction = node_data.instruction if node_data.instruction else ''
input_text = query
memory_str = ''
@ -248,7 +259,7 @@ class QuestionClassifierNode(LLMNode):
user_prompt_message_3 = ChatModelMessage(
role=PromptMessageRole.USER,
text=QUESTION_CLASSIFIER_USER_PROMPT_3.format(input_text=input_text,
categories=class_names_str,
categories=json.dumps(categories),
classification_instructions=instruction)
)
prompt_messages.append(user_prompt_message_3)
@ -257,7 +268,7 @@ class QuestionClassifierNode(LLMNode):
return CompletionModelPromptTemplate(
text=QUESTION_CLASSIFIER_COMPLETION_PROMPT.format(histories=memory_str,
input_text=input_text,
categories=class_names_str,
categories=json.dumps(categories),
classification_instructions=instruction)
)

View File

@ -6,7 +6,7 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
### Task
Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
### Format
The input text is in the variable text_field.Categories are specified as a comma-separated list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
The input text is in the variable text_field.Categories are specified as a category list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Memory
@ -18,33 +18,33 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
QUESTION_CLASSIFIER_USER_PROMPT_1 = """
{ "input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."],
"categories": ["Customer Service", "Satisfaction", "Sales", "Product"],
"categories": [{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"},{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"},{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"},{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}],
"classification_instructions": ["classify the text based on the feedback provided by customer"]}
"""
QUESTION_CLASSIFIER_ASSISTANT_PROMPT_1 = """
```json
{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],
"categories": ["Customer Service"]}
{"category_id": "f5660049-284f-41a7-b301-fd24176a711c",
"category_name": "Customer Service"}
```
"""
QUESTION_CLASSIFIER_USER_PROMPT_2 = """
{"input_text": ["bad service, slow to bring the food"],
"categories": ["Food Quality", "Experience", "Price" ],
"categories": [{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"},{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"},{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}],
"classification_instructions": []}
"""
QUESTION_CLASSIFIER_ASSISTANT_PROMPT_2 = """
```json
{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],
"categories": ["Experience"]}
{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
"category_name": "Experience"}
```
"""
QUESTION_CLASSIFIER_USER_PROMPT_3 = """
'{{"input_text": ["{input_text}"],',
'"categories": ["{categories}" ], ',
'"categories": {categories}, ',
'"classification_instructions": ["{classification_instructions}"]}}'
"""
@ -54,16 +54,16 @@ You are a text classification engine that analyzes text data and assigns categor
### Task
Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output. Additionally, you need to extract the key words from the text that are related to the classification.
### Format
The input text is in the variable text_field. Categories are specified as a comma-separated list in the variable categories or left empty for automatic determination. Classification instructions may be included to improve the classification accuracy.
The input text is in the variable text_field. Categories are specified as a category list in the variable categories or left empty for automatic determination. Classification instructions may be included to improve the classification accuracy.
### Constraint
DO NOT include anything other than the JSON array in your response.
### Example
Here is the chat example between human and assistant, inside <example></example> XML tags.
<example>
User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."],"categories": ["Customer Service, Satisfaction, Sales, Product"], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"categories": ["Customer Service"]}}
User:{{"input_text": ["bad service, slow to bring the food"],"categories": ["Food Quality, Experience, Price" ], "classification_instructions": []}}
Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"categories": ["Customer Service"]}}{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"categories": ["Experience""]}}
User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."], "categories": [{{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"}},{{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"}},{{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"}},{{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}}], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
Assistant:{{"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}}
Assistant:{{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
</example>
### Memory
Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
@ -71,6 +71,6 @@ Here is the chat histories between human and assistant, inside <histories></hist
{histories}
</histories>
### User Input
{{"input_text" : ["{input_text}"], "categories" : ["{categories}"],"classification_instruction" : ["{classification_instructions}"]}}
{{"input_text" : ["{input_text}"], "categories" : {categories},"classification_instruction" : ["{classification_instructions}"]}}
### Assistant Output
"""