mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 00:05:57 +08:00
use sub to operate all (#475)
This commit is contained in:
parent
6194b82752
commit
998f819b04
@ -346,10 +346,10 @@ class IndexingRunner:
|
||||
return text_docs
|
||||
|
||||
def filter_string(self, text):
|
||||
text = text.replace('<|', '<')
|
||||
text = text.replace('|>', '>')
|
||||
pattern = re.compile('[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]')
|
||||
return pattern.sub('', text)
|
||||
text = re.sub(r'<\|', '<', text)
|
||||
text = re.sub(r'\|>', '>', text)
|
||||
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]', '', text)
|
||||
return text
|
||||
|
||||
def _get_splitter(self, processing_rule: DatasetProcessRule) -> TextSplitter:
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user