mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
make language judgement robuster (#3287)
### What problem does this PR solve? ### Type of change - [x] Performance Improvement
This commit is contained in:
parent
a2153d61ce
commit
d88f0d43ea
@ -63,9 +63,9 @@ class EsQueryer:
|
||||
rag_tokenizer.tradi2simp(
|
||||
rag_tokenizer.strQ2B(
|
||||
txt.lower()))).strip()
|
||||
txt = EsQueryer.rmWWW(txt)
|
||||
|
||||
if not self.isChinese(txt):
|
||||
txt = EsQueryer.rmWWW(txt)
|
||||
tks = rag_tokenizer.tokenize(txt).split(" ")
|
||||
tks_w = self.tw.weights(tks)
|
||||
tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w]
|
||||
@ -89,6 +89,7 @@ class EsQueryer:
|
||||
return False
|
||||
return True
|
||||
|
||||
txt = EsQueryer.rmWWW(txt)
|
||||
qs, keywords = [], []
|
||||
for tt in self.tw.split(txt)[:256]: # .split(" "):
|
||||
if not tt:
|
||||
|
Loading…
x
Reference in New Issue
Block a user