diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 56c45bfd85..2baecedfd7 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -632,8 +632,8 @@ class IndexingRunner: return text def format_split_text(self, text): - regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)" - matches = re.findall(regex, text, re.MULTILINE) + regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q\d+:|$)" + matches = re.findall(regex, text, re.UNICODE) return [ {