Fix: The rule of Categorize operator is adjusted. (#6599)

### What problem does this PR solve?

When I use the categorization operator, I find that if the keyword I
want to Categorize appears repeatedly in the input, then I cannot judge
the word that appears most frequently. Instead, I simply get the word
that matches and return all the ones that have made the following
changes to the categorize filter.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Refactoring
- [x] Performance Improvement
This commit is contained in:
Xc1995 2025-03-27 17:02:21 +08:00 committed by GitHub
parent 36b62e0fab
commit 974a467cf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -87,10 +87,17 @@ class Categorize(Generate, ABC):
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id) chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
ans = chat_mdl.chat(self._param.get_prompt(input), [{"role": "user", "content": "\nCategory: "}], ans = chat_mdl.chat(self._param.get_prompt(input), [{"role": "user", "content": "\nCategory: "}],
self._param.gen_conf()) self._param.gen_conf())
logging.debug(f"input: {input}, answer: {str(ans)}") logging.debug(f"input: {input}, answer: {str(ans)}")
# Count the number of times each category appears in the answer.
category_counts = {}
for c in self._param.category_description.keys(): for c in self._param.category_description.keys():
if ans.lower().find(c.lower()) >= 0: count = ans.lower().count(c.lower())
return Categorize.be_output(self._param.category_description[c]["to"]) category_counts[c] = count
# If a category is found, return the category with the highest count.
if any(category_counts.values()):
max_category = max(category_counts.items(), key=lambda x: x[1])
return Categorize.be_output(self._param.category_description[max_category[0]]["to"])
return Categorize.be_output(list(self._param.category_description.items())[-1][1]["to"]) return Categorize.be_output(list(self._param.category_description.items())[-1][1]["to"])