mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-04-23 14:19:42 +08:00
fix(api): Some params were ignored when creating empty Datasets through API (#17932)
This commit is contained in:
parent
4aecc9f090
commit
1f722cde22
@ -89,7 +89,7 @@ class AnnotationReplyActionStatusApi(Resource):
|
|||||||
app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id))
|
app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id))
|
||||||
cache_result = redis_client.get(app_annotation_job_key)
|
cache_result = redis_client.get(app_annotation_job_key)
|
||||||
if cache_result is None:
|
if cache_result is None:
|
||||||
raise ValueError("The job is not exist.")
|
raise ValueError("The job does not exist.")
|
||||||
|
|
||||||
job_status = cache_result.decode()
|
job_status = cache_result.decode()
|
||||||
error_msg = ""
|
error_msg = ""
|
||||||
@ -226,7 +226,7 @@ class AnnotationBatchImportStatusApi(Resource):
|
|||||||
indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id))
|
indexing_cache_key = "app_annotation_batch_import_{}".format(str(job_id))
|
||||||
cache_result = redis_client.get(indexing_cache_key)
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
if cache_result is None:
|
if cache_result is None:
|
||||||
raise ValueError("The job is not exist.")
|
raise ValueError("The job does not exist.")
|
||||||
job_status = cache_result.decode()
|
job_status = cache_result.decode()
|
||||||
error_msg = ""
|
error_msg = ""
|
||||||
if job_status == "error":
|
if job_status == "error":
|
||||||
|
@ -398,7 +398,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
|
|||||||
indexing_cache_key = "segment_batch_import_{}".format(job_id)
|
indexing_cache_key = "segment_batch_import_{}".format(job_id)
|
||||||
cache_result = redis_client.get(indexing_cache_key)
|
cache_result = redis_client.get(indexing_cache_key)
|
||||||
if cache_result is None:
|
if cache_result is None:
|
||||||
raise ValueError("The job is not exist.")
|
raise ValueError("The job does not exist.")
|
||||||
|
|
||||||
return {"job_id": job_id, "job_status": cache_result.decode()}, 200
|
return {"job_id": job_id, "job_status": cache_result.decode()}, 200
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ from fields.dataset_fields import dataset_detail_fields
|
|||||||
from libs.login import current_user
|
from libs.login import current_user
|
||||||
from models.dataset import Dataset, DatasetPermissionEnum
|
from models.dataset import Dataset, DatasetPermissionEnum
|
||||||
from services.dataset_service import DatasetPermissionService, DatasetService
|
from services.dataset_service import DatasetPermissionService, DatasetService
|
||||||
|
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
|
||||||
|
|
||||||
|
|
||||||
def _validate_name(name):
|
def _validate_name(name):
|
||||||
@ -120,8 +121,11 @@ class DatasetListApi(DatasetApiResource):
|
|||||||
nullable=True,
|
nullable=True,
|
||||||
required=False,
|
required=False,
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json")
|
||||||
|
parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
|
||||||
|
parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
try:
|
try:
|
||||||
dataset = DatasetService.create_empty_dataset(
|
dataset = DatasetService.create_empty_dataset(
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
@ -133,6 +137,9 @@ class DatasetListApi(DatasetApiResource):
|
|||||||
provider=args["provider"],
|
provider=args["provider"],
|
||||||
external_knowledge_api_id=args["external_knowledge_api_id"],
|
external_knowledge_api_id=args["external_knowledge_api_id"],
|
||||||
external_knowledge_id=args["external_knowledge_id"],
|
external_knowledge_id=args["external_knowledge_id"],
|
||||||
|
embedding_model_provider=args["embedding_model_provider"],
|
||||||
|
embedding_model_name=args["embedding_model"],
|
||||||
|
retrieval_model=RetrievalModel(**args["retrieval_model"]),
|
||||||
)
|
)
|
||||||
except services.errors.dataset.DatasetNameDuplicateError:
|
except services.errors.dataset.DatasetNameDuplicateError:
|
||||||
raise DatasetNameDuplicateError()
|
raise DatasetNameDuplicateError()
|
||||||
|
@ -49,7 +49,9 @@ class DocumentAddByTextApi(DatasetApiResource):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
|
"indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
|
||||||
)
|
)
|
||||||
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
|
parser.add_argument("retrieval_model", type=dict, required=False, nullable=True, location="json")
|
||||||
|
parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
|
||||||
|
parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
dataset_id = str(dataset_id)
|
dataset_id = str(dataset_id)
|
||||||
@ -57,7 +59,7 @@ class DocumentAddByTextApi(DatasetApiResource):
|
|||||||
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
if not dataset:
|
if not dataset:
|
||||||
raise ValueError("Dataset is not exist.")
|
raise ValueError("Dataset does not exist.")
|
||||||
|
|
||||||
if not dataset.indexing_technique and not args["indexing_technique"]:
|
if not dataset.indexing_technique and not args["indexing_technique"]:
|
||||||
raise ValueError("indexing_technique is required.")
|
raise ValueError("indexing_technique is required.")
|
||||||
@ -114,7 +116,7 @@ class DocumentUpdateByTextApi(DatasetApiResource):
|
|||||||
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
if not dataset:
|
if not dataset:
|
||||||
raise ValueError("Dataset is not exist.")
|
raise ValueError("Dataset does not exist.")
|
||||||
|
|
||||||
# indexing_technique is already set in dataset since this is an update
|
# indexing_technique is already set in dataset since this is an update
|
||||||
args["indexing_technique"] = dataset.indexing_technique
|
args["indexing_technique"] = dataset.indexing_technique
|
||||||
@ -172,7 +174,7 @@ class DocumentAddByFileApi(DatasetApiResource):
|
|||||||
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
if not dataset:
|
if not dataset:
|
||||||
raise ValueError("Dataset is not exist.")
|
raise ValueError("Dataset does not exist.")
|
||||||
if not dataset.indexing_technique and not args.get("indexing_technique"):
|
if not dataset.indexing_technique and not args.get("indexing_technique"):
|
||||||
raise ValueError("indexing_technique is required.")
|
raise ValueError("indexing_technique is required.")
|
||||||
|
|
||||||
@ -239,7 +241,7 @@ class DocumentUpdateByFileApi(DatasetApiResource):
|
|||||||
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
if not dataset:
|
if not dataset:
|
||||||
raise ValueError("Dataset is not exist.")
|
raise ValueError("Dataset does not exist.")
|
||||||
|
|
||||||
# indexing_technique is already set in dataset since this is an update
|
# indexing_technique is already set in dataset since this is an update
|
||||||
args["indexing_technique"] = dataset.indexing_technique
|
args["indexing_technique"] = dataset.indexing_technique
|
||||||
@ -303,7 +305,7 @@ class DocumentDeleteApi(DatasetApiResource):
|
|||||||
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
if not dataset:
|
if not dataset:
|
||||||
raise ValueError("Dataset is not exist.")
|
raise ValueError("Dataset does not exist.")
|
||||||
|
|
||||||
document = DocumentService.get_document(dataset.id, document_id)
|
document = DocumentService.get_document(dataset.id, document_id)
|
||||||
|
|
||||||
|
@ -444,7 +444,7 @@ class QdrantVectorFactory(AbstractVectorFactory):
|
|||||||
if dataset_collection_binding:
|
if dataset_collection_binding:
|
||||||
collection_name = dataset_collection_binding.collection_name
|
collection_name = dataset_collection_binding.collection_name
|
||||||
else:
|
else:
|
||||||
raise ValueError("Dataset Collection Bindings is not exist!")
|
raise ValueError("Dataset Collection Bindings does not exist!")
|
||||||
else:
|
else:
|
||||||
if dataset.index_struct_dict:
|
if dataset.index_struct_dict:
|
||||||
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||||
|
@ -169,6 +169,9 @@ class DatasetService:
|
|||||||
provider: str = "vendor",
|
provider: str = "vendor",
|
||||||
external_knowledge_api_id: Optional[str] = None,
|
external_knowledge_api_id: Optional[str] = None,
|
||||||
external_knowledge_id: Optional[str] = None,
|
external_knowledge_id: Optional[str] = None,
|
||||||
|
embedding_model_provider: Optional[str] = None,
|
||||||
|
embedding_model_name: Optional[str] = None,
|
||||||
|
retrieval_model: Optional[RetrievalModel] = None,
|
||||||
):
|
):
|
||||||
# check if dataset name already exists
|
# check if dataset name already exists
|
||||||
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
|
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
|
||||||
@ -176,9 +179,30 @@ class DatasetService:
|
|||||||
embedding_model = None
|
embedding_model = None
|
||||||
if indexing_technique == "high_quality":
|
if indexing_technique == "high_quality":
|
||||||
model_manager = ModelManager()
|
model_manager = ModelManager()
|
||||||
|
if embedding_model_provider and embedding_model_name:
|
||||||
|
# check if embedding model setting is valid
|
||||||
|
DatasetService.check_embedding_model_setting(tenant_id, embedding_model_provider, embedding_model_name)
|
||||||
|
embedding_model = model_manager.get_model_instance(
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
provider=embedding_model_provider,
|
||||||
|
model_type=ModelType.TEXT_EMBEDDING,
|
||||||
|
model=embedding_model_name,
|
||||||
|
)
|
||||||
|
else:
|
||||||
embedding_model = model_manager.get_default_model_instance(
|
embedding_model = model_manager.get_default_model_instance(
|
||||||
tenant_id=tenant_id, model_type=ModelType.TEXT_EMBEDDING
|
tenant_id=tenant_id, model_type=ModelType.TEXT_EMBEDDING
|
||||||
)
|
)
|
||||||
|
if retrieval_model and retrieval_model.reranking_model:
|
||||||
|
if (
|
||||||
|
retrieval_model.reranking_model.reranking_provider_name
|
||||||
|
and retrieval_model.reranking_model.reranking_model_name
|
||||||
|
):
|
||||||
|
# check if reranking model setting is valid
|
||||||
|
DatasetService.check_embedding_model_setting(
|
||||||
|
tenant_id,
|
||||||
|
retrieval_model.reranking_model.reranking_provider_name,
|
||||||
|
retrieval_model.reranking_model.reranking_model_name,
|
||||||
|
)
|
||||||
dataset = Dataset(name=name, indexing_technique=indexing_technique)
|
dataset = Dataset(name=name, indexing_technique=indexing_technique)
|
||||||
# dataset = Dataset(name=name, provider=provider, config=config)
|
# dataset = Dataset(name=name, provider=provider, config=config)
|
||||||
dataset.description = description
|
dataset.description = description
|
||||||
@ -187,6 +211,7 @@ class DatasetService:
|
|||||||
dataset.tenant_id = tenant_id
|
dataset.tenant_id = tenant_id
|
||||||
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None
|
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None
|
||||||
dataset.embedding_model = embedding_model.model if embedding_model else None
|
dataset.embedding_model = embedding_model.model if embedding_model else None
|
||||||
|
dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None
|
||||||
dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
|
dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
|
||||||
dataset.provider = provider
|
dataset.provider = provider
|
||||||
db.session.add(dataset)
|
db.session.add(dataset)
|
||||||
|
@ -314,6 +314,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
</Property>
|
</Property>
|
||||||
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
<Property name='indexing_technique' type='string' key='indexing_technique'>
|
||||||
Index technique (optional)
|
Index technique (optional)
|
||||||
|
If this is not set, embedding_model, embedding_provider_name and retrieval_model will be set to null
|
||||||
- <code>high_quality</code> High quality
|
- <code>high_quality</code> High quality
|
||||||
- <code>economy</code> Economy
|
- <code>economy</code> Economy
|
||||||
</Property>
|
</Property>
|
||||||
@ -334,6 +335,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
||||||
External knowledge ID (optional)
|
External knowledge ID (optional)
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='embedding_model' type='str' key='embedding_model'>
|
||||||
|
Embedding model name (optional)
|
||||||
|
</Property>
|
||||||
|
<Property name='embedding_provider_name' type='str' key='embedding_provider_name'>
|
||||||
|
Embedding model provider name (optional)
|
||||||
|
</Property>
|
||||||
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
|
Retrieval model (optional)
|
||||||
|
- <code>search_method</code> (string) Search method
|
||||||
|
- <code>hybrid_search</code> Hybrid search
|
||||||
|
- <code>semantic_search</code> Semantic search
|
||||||
|
- <code>full_text_search</code> Full-text search
|
||||||
|
- <code>reranking_enable</code> (bool) Whether to enable reranking
|
||||||
|
- <code>reranking_model</code> (object) Rerank model configuration
|
||||||
|
- <code>reranking_provider_name</code> (string) Rerank model provider
|
||||||
|
- <code>reranking_model_name</code> (string) Rerank model name
|
||||||
|
- <code>top_k</code> (int) Number of results to return
|
||||||
|
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
|
||||||
|
- <code>score_threshold</code> (float) Score threshold
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
|
@ -334,6 +334,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
||||||
外部ナレッジ ID (オプション)
|
外部ナレッジ ID (オプション)
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='embedding_model' type='str' key='embedding_model'>
|
||||||
|
埋め込みモデル名(任意)
|
||||||
|
</Property>
|
||||||
|
<Property name='embedding_provider_name' type='str' key='embedding_provider_name'>
|
||||||
|
埋め込みモデルのプロバイダ名(任意)
|
||||||
|
</Property>
|
||||||
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
|
検索モデル(任意)
|
||||||
|
- <code>search_method</code> (文字列) 検索方法
|
||||||
|
- <code>hybrid_search</code> ハイブリッド検索
|
||||||
|
- <code>semantic_search</code> セマンティック検索
|
||||||
|
- <code>full_text_search</code> 全文検索
|
||||||
|
- <code>reranking_enable</code> (ブール値) リランキングを有効にするかどうか
|
||||||
|
- <code>reranking_model</code> (オブジェクト) リランクモデルの設定
|
||||||
|
- <code>reranking_provider_name</code> (文字列) リランクモデルのプロバイダ
|
||||||
|
- <code>reranking_model_name</code> (文字列) リランクモデル名
|
||||||
|
- <code>top_k</code> (整数) 返される結果の数
|
||||||
|
- <code>score_threshold_enabled</code> (ブール値) スコア閾値を有効にするかどうか
|
||||||
|
- <code>score_threshold</code> (浮動小数点数) スコア閾値
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
|
@ -335,6 +335,26 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
<Property name='external_knowledge_id' type='str' key='external_knowledge_id'>
|
||||||
外部知识库 ID(选填)
|
外部知识库 ID(选填)
|
||||||
</Property>
|
</Property>
|
||||||
|
<Property name='embedding_model' type='str' key='embedding_model'>
|
||||||
|
Embedding 模型名称
|
||||||
|
</Property>
|
||||||
|
<Property name='embedding_provider_name' type='str' key='embedding_provider_name'>
|
||||||
|
Embedding 模型供应商
|
||||||
|
</Property>
|
||||||
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
|
检索模式
|
||||||
|
- <code>search_method</code> (string) 检索方法
|
||||||
|
- <code>hybrid_search</code> 混合检索
|
||||||
|
- <code>semantic_search</code> 语义检索
|
||||||
|
- <code>full_text_search</code> 全文检索
|
||||||
|
- <code>reranking_enable</code> (bool) 是否开启rerank
|
||||||
|
- <code>reranking_model</code> (object) Rerank 模型配置
|
||||||
|
- <code>reranking_provider_name</code> (string) Rerank 模型的提供商
|
||||||
|
- <code>reranking_model_name</code> (string) Rerank 模型的名称
|
||||||
|
- <code>top_k</code> (int) 召回条数
|
||||||
|
- <code>score_threshold_enabled</code> (bool)是否开启召回分数限制
|
||||||
|
- <code>score_threshold</code> (float) 召回分数限制
|
||||||
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Col>
|
</Col>
|
||||||
<Col sticky>
|
<Col sticky>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user