mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 08:28:57 +08:00
fix: split text keep separator (#7930)
This commit is contained in:
parent
7b2cf8215f
commit
571415d1a4
@ -30,15 +30,14 @@ def _split_text_with_regex(
|
|||||||
if keep_separator:
|
if keep_separator:
|
||||||
# The parentheses in the pattern keep the delimiters in the result.
|
# The parentheses in the pattern keep the delimiters in the result.
|
||||||
_splits = re.split(f"({re.escape(separator)})", text)
|
_splits = re.split(f"({re.escape(separator)})", text)
|
||||||
splits = [_splits[i] + _splits[i + 1] for i in range(1, len(_splits), 2)]
|
splits = [_splits[i - 1] + _splits[i] for i in range(1, len(_splits), 2)]
|
||||||
if len(_splits) % 2 == 0:
|
if len(_splits) % 2 != 0:
|
||||||
splits += _splits[-1:]
|
splits += _splits[-1:]
|
||||||
splits = [_splits[0]] + splits
|
|
||||||
else:
|
else:
|
||||||
splits = re.split(separator, text)
|
splits = re.split(separator, text)
|
||||||
else:
|
else:
|
||||||
splits = list(text)
|
splits = list(text)
|
||||||
return [s for s in splits if s != ""]
|
return [s for s in splits if (s != "" and s != '\n')]
|
||||||
|
|
||||||
|
|
||||||
class TextSplitter(BaseDocumentTransformer, ABC):
|
class TextSplitter(BaseDocumentTransformer, ABC):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user