mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-15 06:35:53 +08:00
chore: model.query change to db.session.query (#19551)
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
This commit is contained in:
parent
f1e7099541
commit
085bd1aa93
@ -552,11 +552,12 @@ def old_metadata_migration():
|
|||||||
page = 1
|
page = 1
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
documents = (
|
stmt = (
|
||||||
DatasetDocument.query.filter(DatasetDocument.doc_metadata is not None)
|
select(DatasetDocument)
|
||||||
|
.filter(DatasetDocument.doc_metadata.is_not(None))
|
||||||
.order_by(DatasetDocument.created_at.desc())
|
.order_by(DatasetDocument.created_at.desc())
|
||||||
.paginate(page=page, per_page=50)
|
|
||||||
)
|
)
|
||||||
|
documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
|
||||||
except NotFound:
|
except NotFound:
|
||||||
break
|
break
|
||||||
if not documents:
|
if not documents:
|
||||||
|
@ -66,7 +66,7 @@ class InstalledAppsListApi(Resource):
|
|||||||
parser.add_argument("app_id", type=str, required=True, help="Invalid app_id")
|
parser.add_argument("app_id", type=str, required=True, help="Invalid app_id")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
recommended_app = RecommendedApp.query.filter(RecommendedApp.app_id == args["app_id"]).first()
|
recommended_app = db.session.query(RecommendedApp).filter(RecommendedApp.app_id == args["app_id"]).first()
|
||||||
if recommended_app is None:
|
if recommended_app is None:
|
||||||
raise NotFound("App not found")
|
raise NotFound("App not found")
|
||||||
|
|
||||||
@ -79,9 +79,11 @@ class InstalledAppsListApi(Resource):
|
|||||||
if not app.is_public:
|
if not app.is_public:
|
||||||
raise Forbidden("You can't install a non-public app")
|
raise Forbidden("You can't install a non-public app")
|
||||||
|
|
||||||
installed_app = InstalledApp.query.filter(
|
installed_app = (
|
||||||
and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id)
|
db.session.query(InstalledApp)
|
||||||
).first()
|
.filter(and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id))
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
if installed_app is None:
|
if installed_app is None:
|
||||||
# todo: position
|
# todo: position
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
|
from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
|
||||||
@ -7,6 +9,8 @@ from extensions.ext_database import db
|
|||||||
from models.dataset import ChildChunk, DatasetQuery, DocumentSegment
|
from models.dataset import ChildChunk, DatasetQuery, DocumentSegment
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class DatasetIndexToolCallbackHandler:
|
class DatasetIndexToolCallbackHandler:
|
||||||
"""Callback handler for dataset tool."""
|
"""Callback handler for dataset tool."""
|
||||||
@ -42,9 +46,14 @@ class DatasetIndexToolCallbackHandler:
|
|||||||
"""Handle tool end."""
|
"""Handle tool end."""
|
||||||
for document in documents:
|
for document in documents:
|
||||||
if document.metadata is not None:
|
if document.metadata is not None:
|
||||||
dataset_document = DatasetDocument.query.filter(
|
document_id = document.metadata["document_id"]
|
||||||
DatasetDocument.id == document.metadata["document_id"]
|
dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first()
|
||||||
).first()
|
if not dataset_document:
|
||||||
|
_logger.warning(
|
||||||
|
"Expected DatasetDocument record to exist, but none was found, document_id=%s",
|
||||||
|
document_id,
|
||||||
|
)
|
||||||
|
continue
|
||||||
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
||||||
child_chunk = (
|
child_chunk = (
|
||||||
db.session.query(ChildChunk)
|
db.session.query(ChildChunk)
|
||||||
|
@ -660,10 +660,10 @@ class IndexingRunner:
|
|||||||
"""
|
"""
|
||||||
Update the document indexing status.
|
Update the document indexing status.
|
||||||
"""
|
"""
|
||||||
count = DatasetDocument.query.filter_by(id=document_id, is_paused=True).count()
|
count = db.session.query(DatasetDocument).filter_by(id=document_id, is_paused=True).count()
|
||||||
if count > 0:
|
if count > 0:
|
||||||
raise DocumentIsPausedError()
|
raise DocumentIsPausedError()
|
||||||
document = DatasetDocument.query.filter_by(id=document_id).first()
|
document = db.session.query(DatasetDocument).filter_by(id=document_id).first()
|
||||||
if not document:
|
if not document:
|
||||||
raise DocumentIsDeletedPausedError()
|
raise DocumentIsDeletedPausedError()
|
||||||
|
|
||||||
@ -672,7 +672,7 @@ class IndexingRunner:
|
|||||||
if extra_update_params:
|
if extra_update_params:
|
||||||
update_params.update(extra_update_params)
|
update_params.update(extra_update_params)
|
||||||
|
|
||||||
DatasetDocument.query.filter_by(id=document_id).update(update_params)
|
db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -317,7 +317,7 @@ class NotionExtractor(BaseExtractor):
|
|||||||
data_source_info["last_edited_time"] = last_edited_time
|
data_source_info["last_edited_time"] = last_edited_time
|
||||||
update_params = {DocumentModel.data_source_info: json.dumps(data_source_info)}
|
update_params = {DocumentModel.data_source_info: json.dumps(data_source_info)}
|
||||||
|
|
||||||
DocumentModel.query.filter_by(id=document_model.id).update(update_params)
|
db.session.query(DocumentModel).filter_by(id=document_model.id).update(update_params)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
def get_notion_last_edited_time(self) -> str:
|
def get_notion_last_edited_time(self) -> str:
|
||||||
|
@ -238,11 +238,15 @@ class DatasetRetrieval:
|
|||||||
for record in records:
|
for record in records:
|
||||||
segment = record.segment
|
segment = record.segment
|
||||||
dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first()
|
dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first()
|
||||||
document = DatasetDocument.query.filter(
|
document = (
|
||||||
DatasetDocument.id == segment.document_id,
|
db.session.query(DatasetDocument)
|
||||||
DatasetDocument.enabled == True,
|
.filter(
|
||||||
DatasetDocument.archived == False,
|
DatasetDocument.id == segment.document_id,
|
||||||
).first()
|
DatasetDocument.enabled == True,
|
||||||
|
DatasetDocument.archived == False,
|
||||||
|
)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
if dataset and document:
|
if dataset and document:
|
||||||
source = {
|
source = {
|
||||||
"dataset_id": dataset.id,
|
"dataset_id": dataset.id,
|
||||||
@ -506,9 +510,11 @@ class DatasetRetrieval:
|
|||||||
dify_documents = [document for document in documents if document.provider == "dify"]
|
dify_documents = [document for document in documents if document.provider == "dify"]
|
||||||
for document in dify_documents:
|
for document in dify_documents:
|
||||||
if document.metadata is not None:
|
if document.metadata is not None:
|
||||||
dataset_document = DatasetDocument.query.filter(
|
dataset_document = (
|
||||||
DatasetDocument.id == document.metadata["document_id"]
|
db.session.query(DatasetDocument)
|
||||||
).first()
|
.filter(DatasetDocument.id == document.metadata["document_id"])
|
||||||
|
.first()
|
||||||
|
)
|
||||||
if dataset_document:
|
if dataset_document:
|
||||||
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
||||||
child_chunk = (
|
child_chunk = (
|
||||||
|
@ -186,11 +186,15 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool):
|
|||||||
for record in records:
|
for record in records:
|
||||||
segment = record.segment
|
segment = record.segment
|
||||||
dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first()
|
dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first()
|
||||||
document = DatasetDocument.query.filter(
|
document = (
|
||||||
DatasetDocument.id == segment.document_id,
|
db.session.query(DatasetDocument) # type: ignore
|
||||||
DatasetDocument.enabled == True,
|
.filter(
|
||||||
DatasetDocument.archived == False,
|
DatasetDocument.id == segment.document_id,
|
||||||
).first()
|
DatasetDocument.enabled == True,
|
||||||
|
DatasetDocument.archived == False,
|
||||||
|
)
|
||||||
|
.first()
|
||||||
|
)
|
||||||
if dataset and document:
|
if dataset and document:
|
||||||
source = {
|
source = {
|
||||||
"dataset_id": dataset.id,
|
"dataset_id": dataset.id,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import datetime
|
import datetime
|
||||||
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import click
|
import click
|
||||||
@ -20,6 +21,8 @@ from models.model import (
|
|||||||
from models.web import SavedMessage
|
from models.web import SavedMessage
|
||||||
from services.feature_service import FeatureService
|
from services.feature_service import FeatureService
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@app.celery.task(queue="dataset")
|
@app.celery.task(queue="dataset")
|
||||||
def clean_messages():
|
def clean_messages():
|
||||||
@ -46,7 +49,14 @@ def clean_messages():
|
|||||||
break
|
break
|
||||||
for message in messages:
|
for message in messages:
|
||||||
plan_sandbox_clean_message_day = message.created_at
|
plan_sandbox_clean_message_day = message.created_at
|
||||||
app = App.query.filter_by(id=message.app_id).first()
|
app = db.session.query(App).filter_by(id=message.app_id).first()
|
||||||
|
if not app:
|
||||||
|
_logger.warning(
|
||||||
|
"Expected App record to exist, but none was found, app_id=%s, message_id=%s",
|
||||||
|
message.app_id,
|
||||||
|
message.id,
|
||||||
|
)
|
||||||
|
continue
|
||||||
features_cache_key = f"features:{app.tenant_id}"
|
features_cache_key = f"features:{app.tenant_id}"
|
||||||
plan_cache = redis_client.get(features_cache_key)
|
plan_cache = redis_client.get(features_cache_key)
|
||||||
if plan_cache is None:
|
if plan_cache is None:
|
||||||
|
@ -54,7 +54,7 @@ def mail_clean_document_notify_task():
|
|||||||
)
|
)
|
||||||
if not current_owner_join:
|
if not current_owner_join:
|
||||||
continue
|
continue
|
||||||
account = Account.query.filter(Account.id == current_owner_join.account_id).first()
|
account = db.session.query(Account).filter(Account.id == current_owner_join.account_id).first()
|
||||||
if not account:
|
if not account:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from core.model_manager import ModelInstance, ModelManager
|
from core.model_manager import ModelInstance, ModelManager
|
||||||
@ -12,6 +13,8 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm
|
|||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.entities.knowledge_entities.knowledge_entities import ParentMode
|
from services.entities.knowledge_entities.knowledge_entities import ParentMode
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class VectorService:
|
class VectorService:
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -22,7 +25,14 @@ class VectorService:
|
|||||||
|
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
if doc_form == IndexType.PARENT_CHILD_INDEX:
|
if doc_form == IndexType.PARENT_CHILD_INDEX:
|
||||||
document = DatasetDocument.query.filter_by(id=segment.document_id).first()
|
document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first()
|
||||||
|
if not document:
|
||||||
|
_logger.warning(
|
||||||
|
"Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s",
|
||||||
|
segment.document_id,
|
||||||
|
segment.id,
|
||||||
|
)
|
||||||
|
continue
|
||||||
# get the process rule
|
# get the process rule
|
||||||
processing_rule = (
|
processing_rule = (
|
||||||
db.session.query(DatasetProcessRule)
|
db.session.query(DatasetProcessRule)
|
||||||
@ -52,7 +62,7 @@ class VectorService:
|
|||||||
raise ValueError("The knowledge base index technique is not high quality!")
|
raise ValueError("The knowledge base index technique is not high quality!")
|
||||||
cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False)
|
cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False)
|
||||||
else:
|
else:
|
||||||
document = Document(
|
document = Document( # type: ignore
|
||||||
page_content=segment.content,
|
page_content=segment.content,
|
||||||
metadata={
|
metadata={
|
||||||
"doc_id": segment.index_node_id,
|
"doc_id": segment.index_node_id,
|
||||||
@ -64,7 +74,7 @@ class VectorService:
|
|||||||
documents.append(document)
|
documents.append(document)
|
||||||
if len(documents) > 0:
|
if len(documents) > 0:
|
||||||
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
index_processor = IndexProcessorFactory(doc_form).init_index_processor()
|
||||||
index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list)
|
index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list) # type: ignore
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset):
|
def update_segment_vector(cls, keywords: Optional[list[str]], segment: DocumentSegment, dataset: Dataset):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user