mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 22:35:54 +08:00
fix special code (#473)
This commit is contained in:
parent
97e9ebd29a
commit
2eea114ac0
@ -235,7 +235,8 @@ class IndexingRunner:
|
|||||||
if len(preview_texts) < 5:
|
if len(preview_texts) < 5:
|
||||||
preview_texts.append(document.page_content)
|
preview_texts.append(document.page_content)
|
||||||
|
|
||||||
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name, document.page_content)
|
tokens += TokenCalculator.get_num_tokens(self.embedding_model_name,
|
||||||
|
self.filter_string(document.page_content))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_segments": total_segments,
|
"total_segments": total_segments,
|
||||||
@ -345,6 +346,8 @@ class IndexingRunner:
|
|||||||
return text_docs
|
return text_docs
|
||||||
|
|
||||||
def filter_string(self, text):
|
def filter_string(self, text):
|
||||||
|
text = text.replace('<|', '<')
|
||||||
|
text = text.replace('|>', '>')
|
||||||
pattern = re.compile('[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]')
|
pattern = re.compile('[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]')
|
||||||
return pattern.sub('', text)
|
return pattern.sub('', text)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user