From 974a467cf628e3bf7441a6e50b6a1632638e42a1 Mon Sep 17 00:00:00 2001 From: Xc1995 <47383595+MichaelXcc@users.noreply.github.com> Date: Thu, 27 Mar 2025 17:02:21 +0800 Subject: [PATCH] Fix: The rule of Categorize operator is adjusted. (#6599) ### What problem does this PR solve? When I use the categorization operator, I find that if the keyword I want to Categorize appears repeatedly in the input, then I cannot judge the word that appears most frequently. Instead, I simply get the word that matches and return all the ones that have made the following changes to the categorize filter. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [x] New Feature (non-breaking change which adds functionality) - [x] Refactoring - [x] Performance Improvement --- agent/component/categorize.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/agent/component/categorize.py b/agent/component/categorize.py index 3f7a0cba9..4d8715f09 100644 --- a/agent/component/categorize.py +++ b/agent/component/categorize.py @@ -87,10 +87,17 @@ class Categorize(Generate, ABC): chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id) ans = chat_mdl.chat(self._param.get_prompt(input), [{"role": "user", "content": "\nCategory: "}], self._param.gen_conf()) - logging.debug(f"input: {input}, answer: {str(ans)}") + logging.debug(f"input: {input}, answer: {str(ans)}") + # Count the number of times each category appears in the answer. + category_counts = {} for c in self._param.category_description.keys(): - if ans.lower().find(c.lower()) >= 0: - return Categorize.be_output(self._param.category_description[c]["to"]) + count = ans.lower().count(c.lower()) + category_counts[c] = count + + # If a category is found, return the category with the highest count. + if any(category_counts.values()): + max_category = max(category_counts.items(), key=lambda x: x[1]) + return Categorize.be_output(self._param.category_description[max_category[0]]["to"]) return Categorize.be_output(list(self._param.category_description.items())[-1][1]["to"])