mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 18:56:00 +08:00
fix parent-child retrival count (#15119)
This commit is contained in:
parent
2a6e522a87
commit
435564f0f2
@ -1,9 +1,11 @@
|
|||||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
|
from core.app.entities.queue_entities import QueueRetrieverResourcesEvent
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexType
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import DatasetQuery, DocumentSegment
|
from models.dataset import ChildChunk, DatasetQuery, DocumentSegment
|
||||||
|
from models.dataset import Document as DatasetDocument
|
||||||
from models.model import DatasetRetrieverResource
|
from models.model import DatasetRetrieverResource
|
||||||
|
|
||||||
|
|
||||||
@ -41,6 +43,20 @@ class DatasetIndexToolCallbackHandler:
|
|||||||
"""Handle tool end."""
|
"""Handle tool end."""
|
||||||
for document in documents:
|
for document in documents:
|
||||||
if document.metadata is not None:
|
if document.metadata is not None:
|
||||||
|
dataset_document = DatasetDocument.query.filter(
|
||||||
|
DatasetDocument.id == document.metadata["document_id"]
|
||||||
|
).first()
|
||||||
|
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
||||||
|
child_chunk = ChildChunk.query.filter(
|
||||||
|
ChildChunk.index_node_id == document.metadata["doc_id"],
|
||||||
|
ChildChunk.dataset_id == dataset_document.dataset_id,
|
||||||
|
ChildChunk.document_id == dataset_document.id,
|
||||||
|
).first()
|
||||||
|
if child_chunk:
|
||||||
|
segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update(
|
||||||
|
{DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False
|
||||||
|
)
|
||||||
|
else:
|
||||||
query = db.session.query(DocumentSegment).filter(
|
query = db.session.query(DocumentSegment).filter(
|
||||||
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
||||||
)
|
)
|
||||||
|
@ -21,6 +21,7 @@ from core.rag.data_post_processor.data_post_processor import DataPostProcessor
|
|||||||
from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler
|
from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.entities.context_entities import DocumentContext
|
from core.rag.entities.context_entities import DocumentContext
|
||||||
|
from core.rag.index_processor.constant.index_type import IndexType
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
from core.rag.rerank.rerank_type import RerankMode
|
from core.rag.rerank.rerank_type import RerankMode
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
@ -28,7 +29,7 @@ from core.rag.retrieval.router.multi_dataset_function_call_router import Functio
|
|||||||
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter
|
||||||
from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Dataset, DatasetQuery, DocumentSegment
|
from models.dataset import ChildChunk, Dataset, DatasetQuery, DocumentSegment
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from services.external_knowledge_service import ExternalDatasetService
|
from services.external_knowledge_service import ExternalDatasetService
|
||||||
|
|
||||||
@ -429,6 +430,21 @@ class DatasetRetrieval:
|
|||||||
dify_documents = [document for document in documents if document.provider == "dify"]
|
dify_documents = [document for document in documents if document.provider == "dify"]
|
||||||
for document in dify_documents:
|
for document in dify_documents:
|
||||||
if document.metadata is not None:
|
if document.metadata is not None:
|
||||||
|
dataset_document = DatasetDocument.query.filter(
|
||||||
|
DatasetDocument.id == document.metadata["document_id"]
|
||||||
|
).first()
|
||||||
|
if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX:
|
||||||
|
child_chunk = ChildChunk.query.filter(
|
||||||
|
ChildChunk.index_node_id == document.metadata["doc_id"],
|
||||||
|
ChildChunk.dataset_id == dataset_document.dataset_id,
|
||||||
|
ChildChunk.document_id == dataset_document.id,
|
||||||
|
).first()
|
||||||
|
if child_chunk:
|
||||||
|
segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update(
|
||||||
|
{DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False
|
||||||
|
)
|
||||||
|
db.session.commit()
|
||||||
|
else:
|
||||||
query = db.session.query(DocumentSegment).filter(
|
query = db.session.query(DocumentSegment).filter(
|
||||||
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
DocumentSegment.index_node_id == document.metadata["doc_id"]
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user