From b164116277af392b718084329bc8f00a5dcc88e2 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Mon, 14 Oct 2024 13:33:18 +0800 Subject: [PATCH] refine token similarity (#2824) ### What problem does this PR solve? ### Type of change - [x] Performance Improvement --- agent/canvas.py | 2 +- rag/nlp/query.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/agent/canvas.py b/agent/canvas.py index 9919fe1da..f78167532 100644 --- a/agent/canvas.py +++ b/agent/canvas.py @@ -262,7 +262,7 @@ class Canvas(ABC): convs = [] for role, obj in self.history[(window_size + 1) * -1:]: convs.append({"role": role, "content": (obj if role == "user" else - '\n'.join(pd.DataFrame(obj)['content']))}) + '\n'.join([str(s) for s in pd.DataFrame(obj)['content']]))}) return convs def add_user_input(self, question): diff --git a/rag/nlp/query.py b/rag/nlp/query.py index 2bc482b95..c58c99c4c 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -186,8 +186,5 @@ class EsQueryer: s += v # * dtwt[k] q = 1e-9 for k, v in qtwt.items(): - q += v # * v - #d = 1e-9 - # for k, v in dtwt.items(): - # d += v * v - return s / q / max(1, math.sqrt(math.log10(max(len(qtwt.keys()), len(dtwt.keys())))))# math.sqrt(q) / math.sqrt(d) + q += v + return s / q