Fix: point in tag issue. (#6436)

### What problem does this PR solve?

#6414

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2025-03-24 10:45:29 +08:00 committed by GitHub
parent a6aed0da46
commit ee5aa51d43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 3 deletions

View File

@ -27,7 +27,7 @@ def beAdoc(d, q, a, eng, row_num=-1):
d["content_with_weight"] = q
d["content_ltks"] = rag_tokenizer.tokenize(q)
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
d["tag_kwd"] = [t.strip() for t in a.split(",") if t.strip()]
d["tag_kwd"] = [t.strip().replace(".", "_") for t in a.split(",") if t.strip()]
if row_num >= 0:
d["top_int"] = [row_num]
return d

View File

@ -465,7 +465,7 @@ class Dealer:
cnt = np.sum([c for _, c in aggs])
tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs],
key=lambda x: x[1] * -1)[:topn_tags]
doc[TAG_FLD] = {a: c for a, c in tag_fea if c > 0}
doc[TAG_FLD] = {a.replace(".", "_"): c for a, c in tag_fea if c > 0}
return True
def tag_query(self, question: str, tenant_ids: str | list[str], kb_ids: list[str], all_tags, topn_tags=3, S=1000):
@ -481,4 +481,4 @@ class Dealer:
cnt = np.sum([c for _, c in aggs])
tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs],
key=lambda x: x[1] * -1)[:topn_tags]
return {a: max(1, c) for a, c in tag_fea}
return {a.replace(".", "_"): max(1, c) for a, c in tag_fea}