diff --git a/rag/nlp/rag_tokenizer.py b/rag/nlp/rag_tokenizer.py index c262ca9b4..6f2686851 100644 --- a/rag/nlp/rag_tokenizer.py +++ b/rag/nlp/rag_tokenizer.py @@ -118,6 +118,8 @@ class RagTokenizer: def dfs_(self, chars, s, preTks, tkslist): res = s + if len(tkslist) >= 2048: + return res # if s > MAX_L or s>= len(chars): if s >= len(chars): tkslist.append(preTks)