mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-15 05:46:24 +08:00
update knowledge base api (#20426)
This commit is contained in:
parent
55371e5abf
commit
1ea4459d9f
@ -208,6 +208,28 @@ class DatasetSegmentApi(DatasetApiResource):
|
|||||||
)
|
)
|
||||||
return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200
|
return {"data": marshal(updated_segment, segment_fields), "doc_form": document.doc_form}, 200
|
||||||
|
|
||||||
|
def get(self, tenant_id, dataset_id, document_id, segment_id):
|
||||||
|
# check dataset
|
||||||
|
dataset_id = str(dataset_id)
|
||||||
|
tenant_id = str(tenant_id)
|
||||||
|
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||||
|
if not dataset:
|
||||||
|
raise NotFound("Dataset not found.")
|
||||||
|
# check user's model setting
|
||||||
|
DatasetService.check_dataset_model_setting(dataset)
|
||||||
|
# check document
|
||||||
|
document_id = str(document_id)
|
||||||
|
document = DocumentService.get_document(dataset_id, document_id)
|
||||||
|
if not document:
|
||||||
|
raise NotFound("Document not found.")
|
||||||
|
# check segment
|
||||||
|
segment_id = str(segment_id)
|
||||||
|
segment = SegmentService.get_segment_by_id(segment_id=segment_id, tenant_id=current_user.current_tenant_id)
|
||||||
|
if not segment:
|
||||||
|
raise NotFound("Segment not found.")
|
||||||
|
|
||||||
|
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
|
||||||
|
|
||||||
|
|
||||||
class ChildChunkApi(DatasetApiResource):
|
class ChildChunkApi(DatasetApiResource):
|
||||||
"""Resource for child chunks."""
|
"""Resource for child chunks."""
|
||||||
|
@ -937,6 +937,9 @@ class DatasetRetrieval:
|
|||||||
return metadata_filter_document_ids, metadata_condition
|
return metadata_filter_document_ids, metadata_condition
|
||||||
|
|
||||||
def _replace_metadata_filter_value(self, text: str, inputs: dict) -> str:
|
def _replace_metadata_filter_value(self, text: str, inputs: dict) -> str:
|
||||||
|
if not inputs:
|
||||||
|
return text
|
||||||
|
|
||||||
def replacer(match):
|
def replacer(match):
|
||||||
key = match.group(1)
|
key = match.group(1)
|
||||||
return str(inputs.get(key, f"{{{{{key}}}}}"))
|
return str(inputs.get(key, f"{{{{{key}}}}}"))
|
||||||
|
@ -2,8 +2,11 @@ import logging
|
|||||||
import time
|
import time
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from core.app.app_config.entities import ModelConfig
|
||||||
|
from core.model_runtime.entities import LLMMode
|
||||||
from core.rag.datasource.retrieval_service import RetrievalService
|
from core.rag.datasource.retrieval_service import RetrievalService
|
||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
|
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
||||||
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
from core.rag.retrieval.retrieval_methods import RetrievalMethod
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.account import Account
|
from models.account import Account
|
||||||
@ -34,7 +37,29 @@ class HitTestingService:
|
|||||||
# get retrieval model , if the model is not setting , using default
|
# get retrieval model , if the model is not setting , using default
|
||||||
if not retrieval_model:
|
if not retrieval_model:
|
||||||
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
retrieval_model = dataset.retrieval_model or default_retrieval_model
|
||||||
|
document_ids_filter = None
|
||||||
|
metadata_filtering_conditions = retrieval_model.get("metadata_filtering_conditions", {})
|
||||||
|
if metadata_filtering_conditions:
|
||||||
|
dataset_retrieval = DatasetRetrieval()
|
||||||
|
|
||||||
|
from core.app.app_config.entities import MetadataFilteringCondition
|
||||||
|
|
||||||
|
metadata_filtering_conditions = MetadataFilteringCondition(**metadata_filtering_conditions)
|
||||||
|
|
||||||
|
metadata_filter_document_ids, metadata_condition = dataset_retrieval.get_metadata_filter_condition(
|
||||||
|
dataset_ids=[dataset.id],
|
||||||
|
query=query,
|
||||||
|
metadata_filtering_mode="manual",
|
||||||
|
metadata_filtering_conditions=metadata_filtering_conditions,
|
||||||
|
inputs={},
|
||||||
|
tenant_id="",
|
||||||
|
user_id="",
|
||||||
|
metadata_model_config=ModelConfig(provider="", name="", mode=LLMMode.CHAT, completion_params={}),
|
||||||
|
)
|
||||||
|
if metadata_filter_document_ids:
|
||||||
|
document_ids_filter = metadata_filter_document_ids.get(dataset.id, [])
|
||||||
|
if metadata_condition and not document_ids_filter:
|
||||||
|
return cls.compact_retrieve_response(query, [])
|
||||||
all_documents = RetrievalService.retrieve(
|
all_documents = RetrievalService.retrieve(
|
||||||
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
@ -48,6 +73,7 @@ class HitTestingService:
|
|||||||
else None,
|
else None,
|
||||||
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
|
||||||
weights=retrieval_model.get("weights", None),
|
weights=retrieval_model.get("weights", None),
|
||||||
|
document_ids_filter=document_ids_filter,
|
||||||
)
|
)
|
||||||
|
|
||||||
end = time.perf_counter()
|
end = time.perf_counter()
|
||||||
@ -99,7 +125,7 @@ class HitTestingService:
|
|||||||
return dict(cls.compact_external_retrieve_response(dataset, query, all_documents))
|
return dict(cls.compact_external_retrieve_response(dataset, query, all_documents))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def compact_retrieve_response(cls, query: str, documents: list[Document]):
|
def compact_retrieve_response(cls, query: str, documents: list[Document]) -> dict[Any, Any]:
|
||||||
records = RetrievalService.format_retrieval_documents(documents)
|
records = RetrievalService.format_retrieval_documents(documents)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -1298,6 +1298,76 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
|
|
||||||
<hr className='ml-0 mr-0' />
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
|
<Heading
|
||||||
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
|
method='GET'
|
||||||
|
title='Get a Chunk Details in a Document'
|
||||||
|
name='#view_document_chunk'
|
||||||
|
/>
|
||||||
|
<Row>
|
||||||
|
<Col>
|
||||||
|
Get details of a specific document segment in the specified knowledge base
|
||||||
|
|
||||||
|
### Path
|
||||||
|
<Properties>
|
||||||
|
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||||
|
Knowledge Base ID
|
||||||
|
</Property>
|
||||||
|
<Property name='document_id' type='string' key='document_id'>
|
||||||
|
Document ID
|
||||||
|
</Property>
|
||||||
|
<Property name='segment_id' type='string' key='segment_id'>
|
||||||
|
Segment ID
|
||||||
|
</Property>
|
||||||
|
</Properties>
|
||||||
|
</Col>
|
||||||
|
<Col sticky>
|
||||||
|
<CodeGroup
|
||||||
|
title="Request"
|
||||||
|
tag="GET"
|
||||||
|
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
|
||||||
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
|
||||||
|
>
|
||||||
|
```bash {{ title: 'cURL' }}
|
||||||
|
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
|
||||||
|
--header 'Authorization: Bearer {api_key}'
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
<CodeGroup title="Response">
|
||||||
|
```json {{ title: 'Response' }}
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"id": "chunk_id",
|
||||||
|
"position": 2,
|
||||||
|
"document_id": "document_id",
|
||||||
|
"content": "Segment content text",
|
||||||
|
"sign_content": "Signature content text",
|
||||||
|
"answer": "Answer content (if in Q&A mode)",
|
||||||
|
"word_count": 470,
|
||||||
|
"tokens": 382,
|
||||||
|
"keywords": ["keyword1", "keyword2"],
|
||||||
|
"index_node_id": "index_node_id",
|
||||||
|
"index_node_hash": "index_node_hash",
|
||||||
|
"hit_count": 0,
|
||||||
|
"enabled": true,
|
||||||
|
"status": "completed",
|
||||||
|
"created_by": "creator_id",
|
||||||
|
"created_at": creation_timestamp,
|
||||||
|
"updated_at": update_timestamp,
|
||||||
|
"indexing_at": indexing_timestamp,
|
||||||
|
"completed_at": completion_timestamp,
|
||||||
|
"error": null,
|
||||||
|
"child_chunks": []
|
||||||
|
},
|
||||||
|
"doc_form": "text_model"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
|
||||||
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
method='DELETE'
|
method='DELETE'
|
||||||
@ -1771,20 +1841,45 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
Query keyword
|
Query keyword
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
Retrieval model (optional, if not filled, it will be recalled according to the default method)
|
Retrieval parameters (optional, if not filled, it will be recalled according to the default method)
|
||||||
- <code>search_method</code> (text) Search method: One of the following four keywords is required
|
- <code>search_method</code> (text) Search method: One of the following four keywords is required
|
||||||
- <code>keyword_search</code> Keyword search
|
- <code>keyword_search</code> Keyword search
|
||||||
- <code>semantic_search</code> Semantic search
|
- <code>semantic_search</code> Semantic search
|
||||||
- <code>full_text_search</code> Full-text search
|
- <code>full_text_search</code> Full-text search
|
||||||
- <code>hybrid_search</code> Hybrid search
|
- <code>hybrid_search</code> Hybrid search
|
||||||
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
|
- <code>reranking_enable</code> (bool) Whether to enable reranking, required if the search mode is semantic_search or hybrid_search (optional)
|
||||||
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
|
- <code>reranking_mode</code> (object) Rerank model configuration, required if reranking is enabled
|
||||||
- <code>reranking_provider_name</code> (string) Rerank model provider
|
- <code>reranking_provider_name</code> (string) Rerank model provider
|
||||||
- <code>reranking_model_name</code> (string) Rerank model name
|
- <code>reranking_model_name</code> (string) Rerank model name
|
||||||
- <code>weights</code> (float) Semantic search weight setting in hybrid search mode
|
- <code>weights</code> (float) Semantic search weight setting in hybrid search mode
|
||||||
- <code>top_k</code> (integer) Number of results to return (optional)
|
- <code>top_k</code> (integer) Number of results to return (optional)
|
||||||
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
|
- <code>score_threshold_enabled</code> (bool) Whether to enable score threshold
|
||||||
- <code>score_threshold</code> (float) Score threshold
|
- <code>score_threshold</code> (float) Score threshold
|
||||||
|
- <code>metadata_filtering_conditions</code> (object) Metadata filtering conditions
|
||||||
|
- <code>logical_operator</code> (string) Logical operator: <code>and</code> | <code>or</code>
|
||||||
|
- <code>conditions</code> (array[object]) Conditions list
|
||||||
|
- <code>name</code> (string) Metadata field name
|
||||||
|
- <code>comparison_operator</code> (string) Comparison operator, allowed values:
|
||||||
|
- String comparison:
|
||||||
|
- <code>contains</code>: Contains
|
||||||
|
- <code>not contains</code>: Does not contain
|
||||||
|
- <code>start with</code>: Starts with
|
||||||
|
- <code>end with</code>: Ends with
|
||||||
|
- <code>is</code>: Equals
|
||||||
|
- <code>is not</code>: Does not equal
|
||||||
|
- <code>empty</code>: Is empty
|
||||||
|
- <code>not empty</code>: Is not empty
|
||||||
|
- Numeric comparison:
|
||||||
|
- <code>=</code>: Equals
|
||||||
|
- <code>≠</code>: Does not equal
|
||||||
|
- <code>></code>: Greater than
|
||||||
|
- <code>< </code>: Less than
|
||||||
|
- <code>≥</code>: Greater than or equal
|
||||||
|
- <code>≤</code>: Less than or equal
|
||||||
|
- Time comparison:
|
||||||
|
- <code>before</code>: Before
|
||||||
|
- <code>after</code>: After
|
||||||
|
- <code>value</code> (string|number|null) Comparison value
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
||||||
Unused field
|
Unused field
|
||||||
@ -1809,7 +1904,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
"weights": null,
|
"weights": null,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"score_threshold_enabled": false,
|
"score_threshold_enabled": false,
|
||||||
"score_threshold": null
|
"score_threshold": null,
|
||||||
|
"metadata_filtering_conditions": {
|
||||||
|
"logical_operator": "and",
|
||||||
|
"conditions": [
|
||||||
|
{
|
||||||
|
"name": "document_name",
|
||||||
|
"comparison_operator": "contains",
|
||||||
|
"value": "test"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'`}
|
}'`}
|
||||||
>
|
>
|
||||||
@ -2089,9 +2194,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
label="/datasets/{dataset_id}/documents/metadata"
|
label="/datasets/{dataset_id}/documents/metadata"
|
||||||
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`}
|
targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/metadata' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{"operation_data": [{"document_id": "document_id", "metadata_list": [{"id": "id", "value": "value", "name": "name"}]}]}'`}
|
||||||
>
|
>
|
||||||
```bash {{ title: 'cURL' }}
|
```bash {{ title: 'cURL' }} </CodeGroup>
|
||||||
```
|
|
||||||
</CodeGroup>
|
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
|
||||||
|
@ -1057,6 +1057,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
|
method='GET'
|
||||||
|
title='ドキュメントセグメントの詳細を表示'
|
||||||
|
name='#view_document_segment'
|
||||||
|
/>
|
||||||
|
<Row>
|
||||||
|
<Col>
|
||||||
|
指定されたナレッジベース内の特定のドキュメントセグメントの詳細を表示します
|
||||||
|
|
||||||
|
### パス
|
||||||
|
<Properties>
|
||||||
|
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||||
|
ナレッジベースID
|
||||||
|
</Property>
|
||||||
|
<Property name='document_id' type='string' key='document_id'>
|
||||||
|
ドキュメントID
|
||||||
|
</Property>
|
||||||
|
<Property name='segment_id' type='string' key='segment_id'>
|
||||||
|
セグメントID
|
||||||
|
</Property>
|
||||||
|
</Properties>
|
||||||
|
</Col>
|
||||||
|
<Col sticky>
|
||||||
|
<CodeGroup
|
||||||
|
title="リクエスト"
|
||||||
|
tag="GET"
|
||||||
|
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
|
||||||
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
|
||||||
|
>
|
||||||
|
```bash {{ title: 'cURL' }}
|
||||||
|
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
|
||||||
|
--header 'Authorization: Bearer {api_key}'
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
<CodeGroup title="レスポンス">
|
||||||
|
```json {{ title: 'Response' }}
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"id": "セグメントID",
|
||||||
|
"position": 2,
|
||||||
|
"document_id": "ドキュメントID",
|
||||||
|
"content": "セグメント内容テキスト",
|
||||||
|
"sign_content": "署名内容テキスト",
|
||||||
|
"answer": "回答内容(Q&Aモードの場合)",
|
||||||
|
"word_count": 470,
|
||||||
|
"tokens": 382,
|
||||||
|
"keywords": ["キーワード1", "キーワード2"],
|
||||||
|
"index_node_id": "インデックスノードID",
|
||||||
|
"index_node_hash": "インデックスノードハッシュ",
|
||||||
|
"hit_count": 0,
|
||||||
|
"enabled": true,
|
||||||
|
"status": "completed",
|
||||||
|
"created_by": "作成者ID",
|
||||||
|
"created_at": 作成タイムスタンプ,
|
||||||
|
"updated_at": 更新タイムスタンプ,
|
||||||
|
"indexing_at": インデックス作成タイムスタンプ,
|
||||||
|
"completed_at": 完了タイムスタンプ,
|
||||||
|
"error": null,
|
||||||
|
"child_chunks": []
|
||||||
|
},
|
||||||
|
"doc_form": "text_model"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
|
||||||
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
|
<Heading
|
||||||
method='DELETE'
|
method='DELETE'
|
||||||
title='ドキュメント内のチャンクを削除'
|
title='ドキュメント内のチャンクを削除'
|
||||||
name='#delete_segment'
|
name='#delete_segment'
|
||||||
@ -1100,7 +1169,6 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
<hr className='ml-0 mr-0' />
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
|
||||||
method='POST'
|
method='POST'
|
||||||
title='ドキュメント内のチャンクを更新'
|
title='ドキュメント内のチャンクを更新'
|
||||||
name='#update_segment'
|
name='#update_segment'
|
||||||
@ -1528,20 +1596,45 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
クエリキーワード
|
クエリキーワード
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
<Property name='retrieval_model' type='object' key='retrieval_model'>
|
||||||
検索モデル (オプション、入力されない場合はデフォルトの方法でリコールされます)
|
検索パラメータ(オプション、入力されない場合はデフォルトの方法でリコールされます)
|
||||||
- <code>search_method</code> (text) 検索方法: 以下の 4 つのキーワードのいずれかが必要です
|
- <code>search_method</code> (text) 検索方法: 以下の4つのキーワードのいずれかが必要です
|
||||||
- <code>keyword_search</code> キーワード検索
|
- <code>keyword_search</code> キーワード検索
|
||||||
- <code>semantic_search</code> セマンティック検索
|
- <code>semantic_search</code> セマンティック検索
|
||||||
- <code>full_text_search</code> 全文検索
|
- <code>full_text_search</code> 全文検索
|
||||||
- <code>hybrid_search</code> ハイブリッド検索
|
- <code>hybrid_search</code> ハイブリッド検索
|
||||||
- <code>reranking_enable</code> (bool) 再ランキングを有効にするかどうか、検索モードが semantic_search または hybrid_search の場合に必須 (オプション)
|
- <code>reranking_enable</code> (bool) 再ランキングを有効にするかどうか、検索モードがsemantic_searchまたはhybrid_searchの場合に必須(オプション)
|
||||||
- <code>reranking_mode</code> (object) 再ランキングモデル構成、再ランキングが有効な場合に必須
|
- <code>reranking_mode</code> (object) 再ランキングモデル構成、再ランキングが有効な場合に必須
|
||||||
- <code>reranking_provider_name</code> (string) 再ランキングモデルプロバイダー
|
- <code>reranking_provider_name</code> (string) 再ランキングモデルプロバイダー
|
||||||
- <code>reranking_model_name</code> (string) 再ランキングモデル名
|
- <code>reranking_model_name</code> (string) 再ランキングモデル名
|
||||||
- <code>weights</code> (float) ハイブリッド検索モードでのセマンティック検索の重み設定
|
- <code>weights</code> (float) ハイブリッド検索モードでのセマンティック検索の重み設定
|
||||||
- <code>top_k</code> (integer) 返される結果の数 (オプション)
|
- <code>top_k</code> (integer) 返される結果の数(オプション)
|
||||||
- <code>score_threshold_enabled</code> (bool) スコア閾値を有効にするかどうか
|
- <code>score_threshold_enabled</code> (bool) スコア閾値を有効にするかどうか
|
||||||
- <code>score_threshold</code> (float) スコア閾値
|
- <code>score_threshold</code> (float) スコア閾値
|
||||||
|
- <code>metadata_filtering_conditions</code> (object) メタデータフィルタリング条件
|
||||||
|
- <code>logical_operator</code> (string) 論理演算子: <code>and</code> | <code>or</code>
|
||||||
|
- <code>conditions</code> (array[object]) 条件リスト
|
||||||
|
- <code>name</code> (string) メタデータフィールド名
|
||||||
|
- <code>comparison_operator</code> (string) 比較演算子、許可される値:
|
||||||
|
- 文字列比較:
|
||||||
|
- <code>contains</code>: 含む
|
||||||
|
- <code>not contains</code>: 含まない
|
||||||
|
- <code>start with</code>: で始まる
|
||||||
|
- <code>end with</code>: で終わる
|
||||||
|
- <code>is</code>: 等しい
|
||||||
|
- <code>is not</code>: 等しくない
|
||||||
|
- <code>empty</code>: 空
|
||||||
|
- <code>not empty</code>: 空でない
|
||||||
|
- 数値比較:
|
||||||
|
- <code>=</code>: 等しい
|
||||||
|
- <code>≠</code>: 等しくない
|
||||||
|
- <code>></code>: より大きい
|
||||||
|
- <code>< </code>: より小さい
|
||||||
|
- <code>≥</code>: 以上
|
||||||
|
- <code>≤</code>: 以下
|
||||||
|
- 時間比較:
|
||||||
|
- <code>before</code>: より前
|
||||||
|
- <code>after</code>: より後
|
||||||
|
- <code>value</code> (string|number|null) 比較値
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
||||||
未使用フィールド
|
未使用フィールド
|
||||||
@ -1566,7 +1659,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
"weights": null,
|
"weights": null,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"score_threshold_enabled": false,
|
"score_threshold_enabled": false,
|
||||||
"score_threshold": null
|
"score_threshold": null,
|
||||||
|
"metadata_filtering_conditions": {
|
||||||
|
"logical_operator": "and",
|
||||||
|
"conditions": [
|
||||||
|
{
|
||||||
|
"name": "document_name",
|
||||||
|
"comparison_operator": "contains",
|
||||||
|
"value": "test"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'`}
|
}'`}
|
||||||
>
|
>
|
||||||
|
@ -1351,6 +1351,75 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
|
|
||||||
<Heading
|
<Heading
|
||||||
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
url='/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}'
|
||||||
|
method='GET'
|
||||||
|
title='查看文档分段详情'
|
||||||
|
name='#view_document_segment'
|
||||||
|
/>
|
||||||
|
<Row>
|
||||||
|
<Col>
|
||||||
|
查看指定知识库中特定文档的分段详情
|
||||||
|
|
||||||
|
### Path
|
||||||
|
<Properties>
|
||||||
|
<Property name='dataset_id' type='string' key='dataset_id'>
|
||||||
|
知识库 ID
|
||||||
|
</Property>
|
||||||
|
<Property name='document_id' type='string' key='document_id'>
|
||||||
|
文档 ID
|
||||||
|
</Property>
|
||||||
|
<Property name='segment_id' type='string' key='segment_id'>
|
||||||
|
分段 ID
|
||||||
|
</Property>
|
||||||
|
</Properties>
|
||||||
|
</Col>
|
||||||
|
<Col sticky>
|
||||||
|
<CodeGroup
|
||||||
|
title="Request"
|
||||||
|
tag="GET"
|
||||||
|
label="/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}"
|
||||||
|
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
|
||||||
|
>
|
||||||
|
```bash {{ title: 'cURL' }}
|
||||||
|
curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
|
||||||
|
--header 'Authorization: Bearer {api_key}'
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
<CodeGroup title="Response">
|
||||||
|
```json {{ title: 'Response' }}
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"id": "分段唯一ID",
|
||||||
|
"position": 2,
|
||||||
|
"document_id": "所属文档ID",
|
||||||
|
"content": "分段内容文本",
|
||||||
|
"sign_content": "签名内容文本",
|
||||||
|
"answer": "答案内容(如果有)",
|
||||||
|
"word_count": 470,
|
||||||
|
"tokens": 382,
|
||||||
|
"keywords": ["关键词1", "关键词2"],
|
||||||
|
"index_node_id": "索引节点ID",
|
||||||
|
"index_node_hash": "索引节点哈希值",
|
||||||
|
"hit_count": 0,
|
||||||
|
"enabled": true,
|
||||||
|
"status": "completed",
|
||||||
|
"created_by": "创建者ID",
|
||||||
|
"created_at": 创建时间戳,
|
||||||
|
"updated_at": 更新时间戳,
|
||||||
|
"indexing_at": 索引时间戳,
|
||||||
|
"completed_at": 完成时间戳,
|
||||||
|
"error": null,
|
||||||
|
"child_chunks": []
|
||||||
|
},
|
||||||
|
"doc_form": "text_model"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
</CodeGroup>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
|
||||||
|
<hr className='ml-0 mr-0' />
|
||||||
|
|
||||||
|
<Heading
|
||||||
method='POST'
|
method='POST'
|
||||||
title='更新文档分段'
|
title='更新文档分段'
|
||||||
name='#update_segment'
|
name='#update_segment'
|
||||||
@ -1827,6 +1896,31 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
- <code>top_k</code> (integer) 返回结果数量,非必填
|
- <code>top_k</code> (integer) 返回结果数量,非必填
|
||||||
- <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
|
- <code>score_threshold_enabled</code> (bool) 是否开启 score 阈值
|
||||||
- <code>score_threshold</code> (float) Score 阈值
|
- <code>score_threshold</code> (float) Score 阈值
|
||||||
|
- <code>metadata_filtering_conditions</code> (object) 元数据过滤条件
|
||||||
|
- <code>logical_operator</code> (string) 逻辑运算符: <code>and</code> | <code>or</code>
|
||||||
|
- <code>conditions</code> (array[object]) 条件列表
|
||||||
|
- <code>name</code> (string) 元数据字段名
|
||||||
|
- <code>comparison_operator</code> (string) 比较运算符,可选值:
|
||||||
|
- 字符串比较:
|
||||||
|
- <code>contains</code>: 包含
|
||||||
|
- <code>not contains</code>: 不包含
|
||||||
|
- <code>start with</code>: 以...开头
|
||||||
|
- <code>end with</code>: 以...结尾
|
||||||
|
- <code>is</code>: 等于
|
||||||
|
- <code>is not</code>: 不等于
|
||||||
|
- <code>empty</code>: 为空
|
||||||
|
- <code>not empty</code>: 不为空
|
||||||
|
- 数值比较:
|
||||||
|
- <code>=</code>: 等于
|
||||||
|
- <code>≠</code>: 不等于
|
||||||
|
- <code>></code>: 大于
|
||||||
|
- <code> < </code>: 小于
|
||||||
|
- <code>≥</code>: 大于等于
|
||||||
|
- <code>≤</code>: 小于等于
|
||||||
|
- 时间比较:
|
||||||
|
- <code>before</code>: 早于
|
||||||
|
- <code>after</code>: 晚于
|
||||||
|
- <code>value</code> (string|number|null) 比较值
|
||||||
</Property>
|
</Property>
|
||||||
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
<Property name='external_retrieval_model' type='object' key='external_retrieval_model'>
|
||||||
未启用字段
|
未启用字段
|
||||||
@ -1851,7 +1945,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
|
|||||||
"weights": null,
|
"weights": null,
|
||||||
"top_k": 1,
|
"top_k": 1,
|
||||||
"score_threshold_enabled": false,
|
"score_threshold_enabled": false,
|
||||||
"score_threshold": null
|
"score_threshold": null,
|
||||||
|
"metadata_filtering_conditions": {
|
||||||
|
"logical_operator": "and",
|
||||||
|
"conditions": [
|
||||||
|
{
|
||||||
|
"name": "document_name",
|
||||||
|
"comparison_operator": "contains",
|
||||||
|
"value": "test"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}'`}
|
}'`}
|
||||||
>
|
>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user