From 6d232f1bdbea16074d0b534869df97898398608e Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 3 Sep 2024 13:37:32 +0800 Subject: [PATCH] enable 3 char words to finegrind tokenize (#2210) ### What problem does this PR solve? ### Type of change - [x] Performance Improvement --- rag/nlp/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/nlp/query.py b/rag/nlp/query.py index ccd1a5af2..a8d2d0b59 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -83,7 +83,7 @@ class EsQueryer: ), tks def need_fine_grained_tokenize(tk): - if len(tk) < 4: + if len(tk) < 3: return False if re.match(r"[0-9a-z\.\+#_\*-]+$", tk): return False