From 998f819b04f442a1da6e48340be72ab3b96694c2 Mon Sep 17 00:00:00 2001 From: crazywoola <100913391+crazywoola@users.noreply.github.com> Date: Wed, 28 Jun 2023 14:58:40 +0800 Subject: [PATCH] use sub to operate all (#475) --- api/core/indexing_runner.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index f4f8b7d8de..30ad5a99df 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -346,10 +346,10 @@ class IndexingRunner: return text_docs def filter_string(self, text): - text = text.replace('<|', '<') - text = text.replace('|>', '>') - pattern = re.compile('[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]') - return pattern.sub('', text) + text = re.sub(r'<\|', '<', text) + text = re.sub(r'\|>', '>', text) + text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]', '', text) + return text def _get_splitter(self, processing_rule: DatasetProcessRule) -> TextSplitter: """