mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-04-19 12:19:42 +08:00
deal db session in celery worker (#17549)
This commit is contained in:
parent
da7f8ad936
commit
c8145ce581
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
@ -28,7 +27,9 @@ def add_document_to_index_task(dataset_document_id: str):
|
||||
|
||||
dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == dataset_document_id).first()
|
||||
if not dataset_document:
|
||||
raise NotFound("Document not found")
|
||||
logging.info(click.style("Document not found: {}".format(dataset_document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
if dataset_document.indexing_status != "completed":
|
||||
return
|
||||
|
@ -6,6 +6,7 @@ from celery import shared_task # type: ignore
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from services.dataset_service import DatasetCollectionBindingService
|
||||
|
||||
@ -55,3 +56,5 @@ def add_annotation_to_index_task(
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Build index for annotation failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -88,3 +88,5 @@ def batch_import_annotations_task(job_id: str, content_list: list[dict], app_id:
|
||||
indexing_error_msg_key = "app_annotation_batch_import_error_msg_{}".format(str(job_id))
|
||||
redis_client.setex(indexing_error_msg_key, 600, str(e))
|
||||
logging.exception("Build index for batch import annotations failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -5,6 +5,7 @@ import click
|
||||
from celery import shared_task # type: ignore
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from services.dataset_service import DatasetCollectionBindingService
|
||||
|
||||
@ -39,3 +40,5 @@ def delete_annotation_index_task(annotation_id: str, app_id: str, tenant_id: str
|
||||
)
|
||||
except Exception as e:
|
||||
logging.exception("Annotation deleted index failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -3,7 +3,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from extensions.ext_database import db
|
||||
@ -23,14 +22,18 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
|
||||
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
|
||||
annotations_count = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).count()
|
||||
if not app:
|
||||
raise NotFound("App not found")
|
||||
logging.info(click.style("App not found: {}".format(app_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
app_annotation_setting = (
|
||||
db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).first()
|
||||
)
|
||||
|
||||
if not app_annotation_setting:
|
||||
raise NotFound("App annotation setting not found")
|
||||
logging.info(click.style("App annotation setting not found: {}".format(app_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
disable_app_annotation_key = "disable_app_annotation_{}".format(str(app_id))
|
||||
disable_app_annotation_job_key = "disable_app_annotation_job_{}".format(str(job_id))
|
||||
@ -66,3 +69,4 @@ def disable_annotation_reply_task(job_id: str, app_id: str, tenant_id: str):
|
||||
redis_client.setex(disable_app_annotation_error_key, 600, str(e))
|
||||
finally:
|
||||
redis_client.delete(disable_app_annotation_key)
|
||||
db.session.close()
|
||||
|
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.models.document import Document
|
||||
@ -34,7 +33,9 @@ def enable_annotation_reply_task(
|
||||
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id, App.status == "normal").first()
|
||||
|
||||
if not app:
|
||||
raise NotFound("App not found")
|
||||
logging.info(click.style("App not found: {}".format(app_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
annotations = db.session.query(MessageAnnotation).filter(MessageAnnotation.app_id == app_id).all()
|
||||
enable_app_annotation_key = "enable_app_annotation_{}".format(str(app_id))
|
||||
@ -121,3 +122,4 @@ def enable_annotation_reply_task(
|
||||
db.session.rollback()
|
||||
finally:
|
||||
redis_client.delete(enable_app_annotation_key)
|
||||
db.session.close()
|
||||
|
@ -6,6 +6,7 @@ from celery import shared_task # type: ignore
|
||||
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from services.dataset_service import DatasetCollectionBindingService
|
||||
|
||||
@ -56,3 +57,5 @@ def update_annotation_to_index_task(
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Build index for annotation failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -74,3 +74,5 @@ def batch_clean_document_task(document_ids: list[str], dataset_id: str, doc_form
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Cleaned documents when documents deleted failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -127,3 +127,5 @@ def batch_create_segment_to_index_task(
|
||||
except Exception:
|
||||
logging.exception("Segments batch created index failed")
|
||||
redis_client.setex(indexing_cache_key, 600, "error")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -121,3 +121,5 @@ def clean_dataset_task(
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Cleaned dataset when dataset deleted failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -82,3 +82,5 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Cleaned document when document deleted failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -53,3 +53,5 @@ def clean_notion_document_task(document_ids: list[str], dataset_id: str):
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Cleaned document when import form notion document deleted failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -5,7 +5,6 @@ from typing import Optional
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from core.rag.models.document import Document
|
||||
@ -27,7 +26,9 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]]
|
||||
|
||||
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
|
||||
if not segment:
|
||||
raise NotFound("Segment not found")
|
||||
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
if segment.status != "waiting":
|
||||
return
|
||||
@ -93,3 +94,4 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]]
|
||||
db.session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -167,3 +167,5 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str):
|
||||
)
|
||||
except Exception:
|
||||
logging.exception("Deal dataset vector index failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -41,3 +41,5 @@ def delete_segment_from_index_task(index_node_ids: list, dataset_id: str, docume
|
||||
logging.info(click.style("Segment deleted from index latency: {}".format(end_at - start_at), fg="green"))
|
||||
except Exception:
|
||||
logging.exception("delete segment from index failed")
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -3,7 +3,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from extensions.ext_database import db
|
||||
@ -24,10 +23,14 @@ def disable_segment_from_index_task(segment_id: str):
|
||||
|
||||
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
|
||||
if not segment:
|
||||
raise NotFound("Segment not found")
|
||||
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
if segment.status != "completed":
|
||||
raise NotFound("Segment is not completed , disable action is not allowed.")
|
||||
logging.info(click.style("Segment is not completed, disable is not allowed: {}".format(segment_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
indexing_cache_key = "segment_{}_indexing".format(segment.id)
|
||||
|
||||
@ -62,3 +65,4 @@ def disable_segment_from_index_task(segment_id: str):
|
||||
db.session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -26,15 +26,18 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
logging.info(click.style("Dataset {} not found, pass.".format(dataset_id), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()
|
||||
|
||||
if not dataset_document:
|
||||
logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
|
||||
logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
# sync index processor
|
||||
index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
|
||||
@ -50,6 +53,7 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
|
||||
)
|
||||
|
||||
if not segments:
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
try:
|
||||
@ -76,3 +80,4 @@ def disable_segments_from_index_task(segment_ids: list, dataset_id: str, documen
|
||||
for segment in segments:
|
||||
indexing_cache_key = "segment_{}_indexing".format(segment.id)
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
||||
from core.rag.extractor.notion_extractor import NotionExtractor
|
||||
@ -29,7 +28,9 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
|
||||
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()
|
||||
|
||||
if not document:
|
||||
raise NotFound("Document not found")
|
||||
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
data_source_info = document.data_source_info_dict
|
||||
if document.data_source_type == "notion_import":
|
||||
|
@ -27,6 +27,7 @@ def document_indexing_task(dataset_id: str, document_ids: list):
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
logging.info(click.style("Dataset is not found: {}".format(dataset_id), fg="yellow"))
|
||||
db.session.close()
|
||||
return
|
||||
# check document limit
|
||||
features = FeatureService.get_features(dataset.tenant_id)
|
||||
@ -55,6 +56,7 @@ def document_indexing_task(dataset_id: str, document_ids: list):
|
||||
document.stopped_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
for document_id in document_ids:
|
||||
@ -80,3 +82,5 @@ def document_indexing_task(dataset_id: str, document_ids: list):
|
||||
logging.info(click.style(str(ex), fg="yellow"))
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
@ -27,7 +26,9 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
|
||||
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()
|
||||
|
||||
if not document:
|
||||
raise NotFound("Document not found")
|
||||
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
document.indexing_status = "parsing"
|
||||
document.processing_started_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
@ -73,3 +74,5 @@ def document_indexing_update_task(dataset_id: str, document_id: str):
|
||||
logging.info(click.style(str(ex), fg="yellow"))
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -27,7 +27,9 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list):
|
||||
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if dataset is None:
|
||||
raise ValueError("Dataset not found")
|
||||
logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
# check document limit
|
||||
features = FeatureService.get_features(dataset.tenant_id)
|
||||
@ -57,6 +59,8 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list):
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
return
|
||||
finally:
|
||||
db.session.close()
|
||||
|
||||
for document_id in document_ids:
|
||||
logging.info(click.style("Start process document: {}".format(document_id), fg="green"))
|
||||
@ -96,3 +100,5 @@ def duplicate_document_indexing_task(dataset_id: str, document_ids: list):
|
||||
logging.info(click.style(str(ex), fg="yellow"))
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexType
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
@ -27,10 +26,14 @@ def enable_segment_to_index_task(segment_id: str):
|
||||
|
||||
segment = db.session.query(DocumentSegment).filter(DocumentSegment.id == segment_id).first()
|
||||
if not segment:
|
||||
raise NotFound("Segment not found")
|
||||
logging.info(click.style("Segment not found: {}".format(segment_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
if segment.status != "completed":
|
||||
raise NotFound("Segment is not completed, enable action is not allowed.")
|
||||
logging.info(click.style("Segment is not completed, enable is not allowed: {}".format(segment_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
indexing_cache_key = "segment_{}_indexing".format(segment.id)
|
||||
|
||||
@ -94,3 +97,4 @@ def enable_segment_to_index_task(segment_id: str):
|
||||
db.session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -34,9 +34,11 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
|
||||
|
||||
if not dataset_document:
|
||||
logging.info(click.style("Document {} not found, pass.".format(document_id), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
if not dataset_document.enabled or dataset_document.archived or dataset_document.indexing_status != "completed":
|
||||
logging.info(click.style("Document {} status is invalid, pass.".format(document_id), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
# sync index processor
|
||||
index_processor = IndexProcessorFactory(dataset_document.doc_form).init_index_processor()
|
||||
@ -51,6 +53,8 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
|
||||
.all()
|
||||
)
|
||||
if not segments:
|
||||
logging.info(click.style("Segments not found: {}".format(segment_ids), fg="cyan"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
try:
|
||||
@ -108,3 +112,4 @@ def enable_segments_to_index_task(segment_ids: list, dataset_id: str, document_i
|
||||
for segment in segments:
|
||||
indexing_cache_key = "segment_{}_indexing".format(segment.id)
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -3,7 +3,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.indexing_runner import DocumentIsPausedError, IndexingRunner
|
||||
from extensions.ext_database import db
|
||||
@ -25,7 +24,9 @@ def recover_document_indexing_task(dataset_id: str, document_id: str):
|
||||
document = db.session.query(Document).filter(Document.id == document_id, Document.dataset_id == dataset_id).first()
|
||||
|
||||
if not document:
|
||||
raise NotFound("Document not found")
|
||||
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
try:
|
||||
indexing_runner = IndexingRunner()
|
||||
@ -43,3 +44,5 @@ def recover_document_indexing_task(dataset_id: str, document_id: str):
|
||||
logging.info(click.style(str(ex), fg="yellow"))
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
db.session.close()
|
||||
|
@ -4,7 +4,6 @@ import time
|
||||
|
||||
import click
|
||||
from celery import shared_task # type: ignore
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from extensions.ext_database import db
|
||||
@ -25,9 +24,13 @@ def remove_document_from_index_task(document_id: str):
|
||||
|
||||
document = db.session.query(Document).filter(Document.id == document_id).first()
|
||||
if not document:
|
||||
raise NotFound("Document not found")
|
||||
logging.info(click.style("Document not found: {}".format(document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
if document.indexing_status != "completed":
|
||||
logging.info(click.style("Document is not completed, remove is not allowed: {}".format(document_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
indexing_cache_key = "document_{}_indexing".format(document.id)
|
||||
@ -71,3 +74,4 @@ def remove_document_from_index_task(document_id: str):
|
||||
db.session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
|
@ -27,7 +27,9 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
|
||||
|
||||
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||
if not dataset:
|
||||
raise ValueError("Dataset not found")
|
||||
logging.info(click.style("Dataset not found: {}".format(dataset_id), fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
for document_id in document_ids:
|
||||
retry_indexing_cache_key = "document_{}_is_retried".format(document_id)
|
||||
@ -52,6 +54,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
|
||||
db.session.add(document)
|
||||
db.session.commit()
|
||||
redis_client.delete(retry_indexing_cache_key)
|
||||
db.session.close()
|
||||
return
|
||||
|
||||
logging.info(click.style("Start retry document: {}".format(document_id), fg="green"))
|
||||
@ -60,6 +63,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
|
||||
)
|
||||
if not document:
|
||||
logging.info(click.style("Document not found: {}".format(document_id), fg="yellow"))
|
||||
db.session.close()
|
||||
return
|
||||
try:
|
||||
# clean old data
|
||||
@ -92,5 +96,7 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str]):
|
||||
logging.info(click.style(str(ex), fg="yellow"))
|
||||
redis_client.delete(retry_indexing_cache_key)
|
||||
pass
|
||||
finally:
|
||||
db.session.close()
|
||||
end_at = time.perf_counter()
|
||||
logging.info(click.style("Retry dataset: {} latency: {}".format(dataset_id, end_at - start_at), fg="green"))
|
||||
|
Loading…
x
Reference in New Issue
Block a user