feat: add retireval_top_n to config in env (#11132)

This commit is contained in:
Cling_o3 2024-11-30 11:14:45 +08:00 committed by GitHub
parent 9dd4bf5574
commit f9c2aa7689
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 20 additions and 4 deletions

View File

@ -411,4 +411,6 @@ POSITION_PROVIDER_EXCLUDES=
# Reset password token expiry minutes # Reset password token expiry minutes
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5 RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5
CREATE_TIDB_SERVICE_JOB_ENABLED=false CREATE_TIDB_SERVICE_JOB_ENABLED=false
RETRIEVAL_TOP_N=0

View File

@ -626,6 +626,8 @@ class DataSetConfig(BaseSettings):
default=30, default=30,
) )
RETRIEVAL_TOP_N: int = Field(description="number of retrieval top_n", default=0)
class WorkspaceConfig(BaseSettings): class WorkspaceConfig(BaseSettings):
""" """

View File

@ -3,6 +3,7 @@ from typing import Optional
from flask import Flask, current_app from flask import Flask, current_app
from configs import DifyConfig
from core.rag.data_post_processor.data_post_processor import DataPostProcessor from core.rag.data_post_processor.data_post_processor import DataPostProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
@ -110,8 +111,12 @@ class RetrievalService:
str(dataset.tenant_id), reranking_mode, reranking_model, weights, False str(dataset.tenant_id), reranking_mode, reranking_model, weights, False
) )
all_documents = data_post_processor.invoke( all_documents = data_post_processor.invoke(
query=query, documents=all_documents, score_threshold=score_threshold, top_n=top_k query=query,
documents=all_documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N or top_k,
) )
return all_documents return all_documents
@classmethod @classmethod
@ -178,7 +183,10 @@ class RetrievalService:
) )
all_documents.extend( all_documents.extend(
data_post_processor.invoke( data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents) query=query,
documents=documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N or len(documents),
) )
) )
else: else:
@ -220,7 +228,10 @@ class RetrievalService:
) )
all_documents.extend( all_documents.extend(
data_post_processor.invoke( data_post_processor.invoke(
query=query, documents=documents, score_threshold=score_threshold, top_n=len(documents) query=query,
documents=documents,
score_threshold=score_threshold,
top_n=DifyConfig.RETRIEVAL_TOP_N or len(documents),
) )
) )
else: else:

View File

@ -287,6 +287,7 @@ x-shared-env: &shared-api-worker-env
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G} OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false} CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false}
RETRIEVAL_TOP_N: ${RETRIEVAL_TOP_N:-0}
services: services:
# API service # API service