diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx
index 06f3489409..f2b4e604ee 100644
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@@ -144,23 +144,18 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
### Request Body
-
- Source document ID (optional)
+
+ - original_document_id Source document ID (optional)
- Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document
- The source document cannot be an archived document
- When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by defaul
- When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required
-
-
- Files that need to be uploaded.
-
-
- Index mode
+
+ - indexing_technique Index mode
- high_quality
High quality: embedding using embedding model, built as vector database index
- economy
Economy: Build using inverted index of Keyword Table Index
-
-
- Processing rules
+
+ - process_rule Processing rules
- mode
(string) Cleaning, segmentation mode, automatic / custom
- rules
(object) Custom rules (in automatic mode, this field is empty)
- pre_processing_rules
(array[object]) Preprocessing rules
@@ -173,6 +168,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- separator
Custom segment identifier, currently only allows one delimiter to be set. Default is \n
- max_tokens
Maximum length (token) defaults to 1000
+
+ Files that need to be uploaded.
+
@@ -180,7 +178,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
- targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
+ targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx
index 2906f9c4e7..11bcd5a760 100644
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@@ -142,25 +142,20 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- ### Request Body
+ ### Request Bodys
-
- 源文档 ID (选填)
+
+ - original_document_id 源文档 ID (选填)
- 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制
- 源文档不可为归档的文档
- 当传入 original_document_id
时,代表文档进行更新操作,process_rule
为可填项目,不填默认使用源文档的分段方式
- 未传入 original_document_id
时,代表文档进行新增操作,process_rule
为必填
-
-
- 需要上传的文件。
-
-
- 索引方式
+
+ - indexing_technique 索引方式
- high_quality
高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引
- economy
经济:使用 Keyword Table Index 的倒排索引进行构建
-
-
- 处理规则
+
+ - process_rule 处理规则
- mode
(string) 清洗、分段模式 ,automatic 自动 / custom 自定义
- rules
(object) 自定义规则(自动模式下,该字段为空)
- pre_processing_rules
(array[object]) 预处理规则
@@ -173,6 +168,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
- separator
自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n
- max_tokens
最大长度 (token) 默认为 1000
+
+ 需要上传的文件。
+
@@ -180,7 +178,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
title="Request"
tag="POST"
label="/datasets/{dataset_id}/document/create_by_file"
- targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
+ targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
>
```bash {{ title: 'cURL' }}
curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \