mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 23:16:04 +08:00
1506 remove duplicated code (#1511)
This commit is contained in:
parent
f3b9647bb4
commit
d0e1ea8f06
@ -89,22 +89,6 @@ class IndexingRunner:
|
|||||||
dataset_document.stopped_at = datetime.datetime.utcnow()
|
dataset_document.stopped_at = datetime.datetime.utcnow()
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
def format_split_text(self, text):
|
|
||||||
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
|
|
||||||
matches = re.findall(regex, text, re.MULTILINE)
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for match in matches:
|
|
||||||
q = match[0]
|
|
||||||
a = match[1]
|
|
||||||
if q and a:
|
|
||||||
result.append({
|
|
||||||
"question": q,
|
|
||||||
"answer": re.sub(r"\n\s*", "\n", a.strip())
|
|
||||||
})
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def run_in_splitting_status(self, dataset_document: DatasetDocument):
|
def run_in_splitting_status(self, dataset_document: DatasetDocument):
|
||||||
"""Run the indexing process when the index_status is splitting."""
|
"""Run the indexing process when the index_status is splitting."""
|
||||||
try:
|
try:
|
||||||
@ -647,21 +631,16 @@ class IndexingRunner:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def format_split_text(self, text):
|
def format_split_text(self, text):
|
||||||
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)" # 匹配Q和A的正则表达式
|
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q|$)"
|
||||||
matches = re.findall(regex, text, re.MULTILINE) # 获取所有匹配到的结果
|
matches = re.findall(regex, text, re.MULTILINE)
|
||||||
|
|
||||||
result = [] # 存储最终的结果
|
return [
|
||||||
for match in matches:
|
{
|
||||||
q = match[0]
|
|
||||||
a = match[1]
|
|
||||||
if q and a:
|
|
||||||
# 如果Q和A都存在,就将其添加到结果中
|
|
||||||
result.append({
|
|
||||||
"question": q,
|
"question": q,
|
||||||
"answer": re.sub(r"\n\s*", "\n", a.strip())
|
"answer": re.sub(r"\n\s*", "\n", a.strip())
|
||||||
})
|
}
|
||||||
|
for q, a in matches if q and a
|
||||||
return result
|
]
|
||||||
|
|
||||||
def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None:
|
def _build_index(self, dataset: Dataset, dataset_document: DatasetDocument, documents: List[Document]) -> None:
|
||||||
"""
|
"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user