question classifier optimize (#4147)

2025-08-12 22:29:03 +08:00 · 2024-05-07 16:44:27 +08:00 · 2024-05-07 16:44:27 +08:00 · e353809680
commit e353809680
parent c2f0f958ef
2 changed files with 38 additions and 27 deletions
--- a/api/core/workflow/nodes/question_classifier/question_classifier_node.py
+++ b/api/core/workflow/nodes/question_classifier/question_classifier_node.py
@ -1,3 +1,4 @@
+import json
 import logging
 from typing import Optional, Union, cast

@ -62,13 +63,20 @@ class QuestionClassifierNode(LLMNode):
            prompt_messages=prompt_messages,
            stop=stop
        )
-        categories = [_class.name for _class in node_data.classes]
+        category_name = node_data.classes[0].name
+        category_id = node_data.classes[0].id
        try:
            result_text_json = parse_and_check_json_markdown(result_text, [])
-            #result_text_json = json.loads(result_text.strip('```JSON\n'))
-            categories_result = result_text_json.get('categories', [])
-            if categories_result:
-                categories = categories_result
+            # result_text_json = json.loads(result_text.strip('```JSON\n'))
+            if 'category_name' in result_text_json and 'category_id' in result_text_json:
+                category_id_result = result_text_json['category_id']
+                classes = node_data.classes
+                classes_map = {class_.id: class_.name for class_ in classes}
+                category_ids = [_class.id for _class in classes]
+                if category_id_result in category_ids:
+                    category_name = classes_map[category_id_result]
+                    category_id = category_id_result
+
        except Exception:
            logging.error(f"Failed to parse result text: {result_text}")
        try:
@ -81,17 +89,15 @@ class QuestionClassifierNode(LLMNode):
                'usage': jsonable_encoder(usage),
            }
            outputs = {
-                'class_name': categories[0] if categories else ''
+                'class_name': category_name
            }
-            classes = node_data.classes
-            classes_map = {class_.name: class_.id for class_ in classes}

            return NodeRunResult(
                status=WorkflowNodeExecutionStatus.SUCCEEDED,
                inputs=variables,
                process_data=process_data,
                outputs=outputs,
-                edge_source_handle=classes_map.get(categories[0], None),
+                edge_source_handle=category_id,
                metadata={
                    NodeRunMetadataKey.TOTAL_TOKENS: usage.total_tokens,
                    NodeRunMetadataKey.TOTAL_PRICE: usage.total_price,
@ -210,8 +216,13 @@ class QuestionClassifierNode(LLMNode):
            -> Union[list[ChatModelMessage], CompletionModelPromptTemplate]:
        model_mode = ModelMode.value_of(node_data.model.mode)
        classes = node_data.classes
-        class_names = [class_.name for class_ in classes]
-        class_names_str = ','.join(f'"{name}"' for name in class_names)
+        categories = []
+        for class_ in classes:
+            category = {
+                'category_id': class_.id,
+                'category_name': class_.name
+            }
+            categories.append(category)
        instruction = node_data.instruction if node_data.instruction else ''
        input_text = query
        memory_str = ''
@ -248,7 +259,7 @@ class QuestionClassifierNode(LLMNode):
            user_prompt_message_3 = ChatModelMessage(
                role=PromptMessageRole.USER,
                text=QUESTION_CLASSIFIER_USER_PROMPT_3.format(input_text=input_text,
-                                                              categories=class_names_str,
+                                                              categories=json.dumps(categories),
                                                              classification_instructions=instruction)
            )
            prompt_messages.append(user_prompt_message_3)
@ -257,7 +268,7 @@ class QuestionClassifierNode(LLMNode):
            return CompletionModelPromptTemplate(
                text=QUESTION_CLASSIFIER_COMPLETION_PROMPT.format(histories=memory_str,
                                                                  input_text=input_text,
-                                                                  categories=class_names_str,
+                                                                  categories=json.dumps(categories),
                                                                  classification_instructions=instruction)
            )

--- a/api/core/workflow/nodes/question_classifier/template_prompts.py
+++ b/api/core/workflow/nodes/question_classifier/template_prompts.py
@ -6,7 +6,7 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """
    ### Task
    Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.Additionally, you need to extract the key words from the text that are related to the classification.
    ### Format
-    The input text is in the variable text_field.Categories are specified as a comma-separated list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
+    The input text is in the variable text_field.Categories are specified as a category list in the variable categories or left empty for automatic determination.Classification instructions may be included to improve the classification accuracy.
    ### Constraint
    DO NOT include anything other than the JSON array in your response.
    ### Memory
@ -18,33 +18,33 @@ QUESTION_CLASSIFIER_SYSTEM_PROMPT = """

 QUESTION_CLASSIFIER_USER_PROMPT_1 = """
    { "input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."],
-    "categories": ["Customer Service", "Satisfaction", "Sales", "Product"],
+    "categories": [{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"},{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"},{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"},{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}],
    "classification_instructions": ["classify the text based on the feedback provided by customer"]}
 """

 QUESTION_CLASSIFIER_ASSISTANT_PROMPT_1 = """
 ```json
-    {"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],
-    "categories": ["Customer Service"]}
+    {"category_id": "f5660049-284f-41a7-b301-fd24176a711c",
+    "category_name": "Customer Service"}
 ```
 """

 QUESTION_CLASSIFIER_USER_PROMPT_2 = """
    {"input_text": ["bad service, slow to bring the food"],
-    "categories": ["Food Quality", "Experience", "Price" ], 
+    "categories": [{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"},{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"},{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}],
    "classification_instructions": []}
 """

 QUESTION_CLASSIFIER_ASSISTANT_PROMPT_2 = """
 ```json
-    {"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],
-    "categories": ["Experience"]}
+    {"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f",
+    "category_name": "Experience"}
 ```
 """

 QUESTION_CLASSIFIER_USER_PROMPT_3 = """
    '{{"input_text": ["{input_text}"],',
-    '"categories": ["{categories}" ], ',
+    '"categories": {categories}, ',
    '"classification_instructions": ["{classification_instructions}"]}}'
 """

@ -54,16 +54,16 @@ You are a text classification engine that analyzes text data and assigns categor
 ### Task
 Your task is to assign one categories ONLY to the input text and only one category may be assigned returned in the output.  Additionally, you need to extract the key words from the text that are related to the classification.
 ### Format
-The input text is in the variable text_field. Categories are specified as a comma-separated list in the variable categories or left empty for automatic determination. Classification instructions may be included to improve the classification accuracy. 
+The input text is in the variable text_field. Categories are specified as a category list in the variable categories or left empty for automatic determination. Classification instructions may be included to improve the classification accuracy. 
 ### Constraint 
 DO NOT include anything other than the JSON array in your response.
 ### Example
 Here is the chat example between human and assistant, inside <example></example> XML tags.
 <example>
-User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."],"categories": ["Customer Service, Satisfaction, Sales, Product"], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
-Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"categories": ["Customer Service"]}}
-User:{{"input_text": ["bad service, slow to bring the food"],"categories": ["Food Quality, Experience, Price" ], "classification_instructions": []}}
-Assistant:{{"keywords": ["recently", "great experience", "company", "service", "prompt", "staff", "friendly"],"categories": ["Customer Service"]}}{{"keywords": ["bad service", "slow", "food", "tip", "terrible", "waitresses"],"categories": ["Experience""]}}
+User:{{"input_text": ["I recently had a great experience with your company. The service was prompt and the staff was very friendly."], "categories": [{{"category_id":"f5660049-284f-41a7-b301-fd24176a711c","category_name":"Customer Service"}},{{"category_id":"8d007d06-f2c9-4be5-8ff6-cd4381c13c60","category_name":"Satisfaction"}},{{"category_id":"5fbbbb18-9843-466d-9b8e-b9bfbb9482c8","category_name":"Sales"}},{{"category_id":"23623c75-7184-4a2e-8226-466c2e4631e4","category_name":"Product"}}], "classification_instructions": ["classify the text based on the feedback provided by customer"]}}
+Assistant:{{"category_id": "f5660049-284f-41a7-b301-fd24176a711c","category_name": "Customer Service"}}
+User:{{"input_text": ["bad service, slow to bring the food"], "categories": [{{"category_id":"80fb86a0-4454-4bf5-924c-f253fdd83c02","category_name":"Food Quality"}},{{"category_id":"f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name":"Experience"}},{{"category_id":"cc771f63-74e7-4c61-882e-3eda9d8ba5d7","category_name":"Price"}}], "classification_instructions": []}}
+Assistant:{{"category_id": "f6ff5bc3-aca0-4e4a-8627-e760d0aca78f","category_name": "Customer Service"}}
 </example> 
 ### Memory
 Here is the chat histories between human and assistant, inside <histories></histories> XML tags.
@ -71,6 +71,6 @@ Here is the chat histories between human and assistant, inside <histories></hist
 {histories}
 </histories>
 ### User Input
-{{"input_text" : ["{input_text}"], "categories" : ["{categories}"],"classification_instruction" : ["{classification_instructions}"]}}
+{{"input_text" : ["{input_text}"], "categories" : {categories},"classification_instruction" : ["{classification_instructions}"]}}
 ### Assistant Output
 """