diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 5d67d9da7..abb69401c 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -465,7 +465,7 @@ class Dealer: if not aggs: return False cnt = np.sum([c for _, c in aggs]) - tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / (all_tags.get(a, 0.0001)))) for a, c in aggs], + tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] doc[TAG_FLD] = {a: c for a, c in tag_fea if c > 0} return True @@ -481,6 +481,6 @@ class Dealer: if not aggs: return {} cnt = np.sum([c for _, c in aggs]) - tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / (all_tags.get(a, 0.0001)))) for a, c in aggs], + tag_fea = sorted([(a, round(0.1*(c + 1) / (cnt + S) / max(1e-6, all_tags.get(a, 0.0001)))) for a, c in aggs], key=lambda x: x[1] * -1)[:topn_tags] return {a: max(1, c) for a, c in tag_fea}