diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index ac736db9b7..90d5dbacc3 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -71,7 +71,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Query + ### Query Page number @@ -136,7 +136,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from This api is based on an existing dataset and creates a new document through text based on this dataset. - ### Path Params + ### Params Dataset ID @@ -153,22 +153,22 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Index mode - - high_quality High quality: embedding using embedding model, built as vector database index - - economy Economy: Build using inverted index of Keyword Table Index + - high_quality High quality: embedding using embedding model, built as vector database index + - economy Economy: Build using inverted index of Keyword Table Index Processing rules - - mode (string) Cleaning, segmentation mode, automatic / custom - - rules (text) Custom rules (in automatic mode, this field is empty) - - pre_processing_rules (array[object]) Preprocessing rules - - id (string) Unique identifier for the preprocessing rule + - mode (string) Cleaning, segmentation mode, automatic / custom + - rules (object) Custom rules (in automatic mode, this field is empty) + - pre_processing_rules (array[object]) Preprocessing rules + - id (string) Unique identifier for the preprocessing rule - enumerate - - remove_extra_spaces Replace consecutive spaces, newlines, tabs - - remove_urls_emails Delete URL, email address - - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules - - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - - max_tokens Maximum length (token) defaults to 1000 + - remove_extra_spaces Replace consecutive spaces, newlines, tabs + - remove_urls_emails Delete URL, email address + - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. + - segmentation (object) segmentation rules + - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n + - max_tokens Maximum length (token) defaults to 1000 @@ -238,7 +238,8 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from This api is based on an existing dataset and creates a new document through a file based on this dataset. - ### Path Params + + ### Params Dataset ID @@ -259,22 +260,22 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Index mode - - high_quality High quality: embedding using embedding model, built as vector database index - - economy Economy: Build using inverted index of Keyword Table Index + - high_quality High quality: embedding using embedding model, built as vector database index + - economy Economy: Build using inverted index of Keyword Table Index Processing rules - - mode (string) Cleaning, segmentation mode, automatic / custom - - rules (text) Custom rules (in automatic mode, this field is empty) - - pre_processing_rules (array[object]) Preprocessing rules - - id (string) Unique identifier for the preprocessing rule + - mode (string) Cleaning, segmentation mode, automatic / custom + - rules (object) Custom rules (in automatic mode, this field is empty) + - pre_processing_rules (array[object]) Preprocessing rules + - id (string) Unique identifier for the preprocessing rule - enumerate - - remove_extra_spaces Replace consecutive spaces, newlines, tabs - - remove_urls_emails Delete URL, email address - - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules - - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - - max_tokens Maximum length (token) defaults to 1000 + - remove_extra_spaces Replace consecutive spaces, newlines, tabs + - remove_urls_emails Delete URL, email address + - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. + - segmentation (object) segmentation rules + - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n + - max_tokens Maximum length (token) defaults to 1000 @@ -338,7 +339,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from This api is based on an existing dataset and updates the document through text based on this dataset. - ### Path Params + ### Params Dataset ID @@ -358,17 +359,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Processing rules - - mode (string) Cleaning, segmentation mode, automatic / custom - - rules (text) Custom rules (in automatic mode, this field is empty) - - pre_processing_rules (array[object]) Preprocessing rules - - id (string) Unique identifier for the preprocessing rule + - mode (string) Cleaning, segmentation mode, automatic / custom + - rules (object) Custom rules (in automatic mode, this field is empty) + - pre_processing_rules (array[object]) Preprocessing rules + - id (string) Unique identifier for the preprocessing rule - enumerate - - remove_extra_spaces Replace consecutive spaces, newlines, tabs - - remove_urls_emails Delete URL, email address - - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules - - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - - max_tokens Maximum length (token) defaults to 1000 + - remove_extra_spaces Replace consecutive spaces, newlines, tabs + - remove_urls_emails Delete URL, email address + - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. + - segmentation (object) segmentation rules + - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n + - max_tokens Maximum length (token) defaults to 1000 @@ -435,7 +436,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from This api is based on an existing dataset, and updates documents through files based on this dataset - ### Path Params + ### Params Dataset ID @@ -455,17 +456,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Processing rules - - mode (string) Cleaning, segmentation mode, automatic / custom - - rules (text) Custom rules (in automatic mode, this field is empty) - - pre_processing_rules (array[object]) Preprocessing rules - - id (string) Unique identifier for the preprocessing rule + - mode (string) Cleaning, segmentation mode, automatic / custom + - rules (object) Custom rules (in automatic mode, this field is empty) + - pre_processing_rules (array[object]) Preprocessing rules + - id (string) Unique identifier for the preprocessing rule - enumerate - - remove_extra_spaces Replace consecutive spaces, newlines, tabs - - remove_urls_emails Delete URL, email address - - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. - - segmentation (object) segmentation rules - - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - - max_tokens Maximum length (token) defaults to 1000 + - remove_extra_spaces Replace consecutive spaces, newlines, tabs + - remove_urls_emails Delete URL, email address + - enabled (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value. + - segmentation (object) segmentation rules + - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n + - max_tokens Maximum length (token) defaults to 1000 @@ -527,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Params Dataset ID @@ -582,7 +583,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Params Dataset ID @@ -624,14 +625,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Params Dataset ID - ### Path Query + ### Query Search keywords, currently only search document names(optional) @@ -699,7 +700,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Params Dataset ID @@ -712,10 +713,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - segments (object list) Segmented content - - content (text) Text content/question content, required - - answer(text) Answer content, if the mode of the data set is qa mode, pass the value(optional) - - keywords(list) Keywords(optional) + - content (text) Text content/question content, required + - answer (text) Answer content, if the mode of the data set is qa mode, pass the value(optional) + - keywords (list) Keywords(optional) @@ -778,14 +778,106 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from --- -Error message -- **document_indexing**: Document indexing failed -- **provider_not_initialize**: Embedding model is not configured -- **not_found**, Document does not exist -- **dataset_name_duplicate**: Duplicate dataset name -- **provider_quota_exceeded**: Model quota exceeds limit -- **dataset_not_initialized**: The dataset has not been initialized yet -- **unsupported_file_type**: Unsupported file types. - - Currently only supports, txt, markdown, md, pdf, html, htm, xlsx, docx, csv -- **too_many_files**: There are too many files. Currently, only a single file is uploaded -- **file_too_large*: The file is too large, support below 15M based on you environment configuration + + + ### Error message + + + Error code + + + + + Error status + + + + + Error message + + + + + + ```json {{ title: 'Response' }} + { + "code": "no_file_uploaded", + "message": "Please upload your file.", + "status": 400 + } + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
codestatusmessage
no_file_uploaded400Please upload your file.
too_many_files400Only one file is allowed.
file_too_large413File size exceeded.
unsupported_file_type415File type not allowed.
high_quality_dataset_only400Current operation only supports 'high-quality' datasets.
dataset_not_initialized400The dataset is still being initialized or indexing. Please wait a moment.
archived_document_immutable403The archived document is not editable.
dataset_name_duplicate409The dataset name already exists. Please modify your dataset name.
invalid_action400Invalid action.
document_already_finished400The document has been processed. Please refresh the page or go to the document details.
document_indexing400The document is being processed and cannot be edited.
invalid_metadata400The metadata content is incorrect. Please check and verify.
+
diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 52b3934d4f..54dca02e84 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -71,7 +71,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Query + ### Query 页码 @@ -136,7 +136,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 此接口基于已存在数据集,在此数据集的基础上通过文本创建新的文档 - ### Path Params + ### Path 数据集 ID @@ -153,22 +153,22 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 索引方式 - - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 + - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 + - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 处理规则 - - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 - - rules (text) 自定义规则(自动模式下,该字段为空) - - pre_processing_rules (array[object]) 预处理规则 - - id (string) 预处理规则的唯一标识符 + - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 + - rules (object) 自定义规则(自动模式下,该字段为空) + - pre_processing_rules (array[object]) 预处理规则 + - id (string) 预处理规则的唯一标识符 - 枚举: - - remove_extra_spaces 替换连续空格、换行符、制表符 - - remove_urls_emails 删除 URL、电子邮件地址 - - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - - segmentation (object) 分段规则 - - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - remove_extra_spaces 替换连续空格、换行符、制表符 + - remove_urls_emails 删除 URL、电子邮件地址 + - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 + - segmentation (object) 分段规则 + - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n + - max_tokens 最大长度 (token) 默认为 1000 @@ -239,7 +239,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 此接口基于已存在数据集,在此数据集的基础上通过文件创建新的文档 - ### Path Params + ### Path 数据集 ID @@ -252,30 +252,30 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 源文档 ID (选填) - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制 - 源文档不可为归档的文档 - - 当传入 original_document_id 时,代表文档进行更新操作,process_rule 为可填项目,不填默认使用源文档的分段方式 - - 未传入 original_document_id 时,代表文档进行新增操作,process_rule 为必填 + - 当传入 original_document_id 时,代表文档进行更新操作,process_rule 为可填项目,不填默认使用源文档的分段方式 + - 未传入 original_document_id 时,代表文档进行新增操作,process_rule 为必填 需要上传的文件。 索引方式 - - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 + - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 + - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 处理规则 - - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 - - rules (text) 自定义规则(自动模式下,该字段为空) - - pre_processing_rules (array[object]) 预处理规则 - - id (string) 预处理规则的唯一标识符 + - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 + - rules (object) 自定义规则(自动模式下,该字段为空) + - pre_processing_rules (array[object]) 预处理规则 + - id (string) 预处理规则的唯一标识符 - 枚举: - - remove_extra_spaces 替换连续空格、换行符、制表符 - - remove_urls_emails 删除 URL、电子邮件地址 - - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值。 - - segmentation (object) 分段规则 - - separator 自定义分段标识符,目前仅允许设置一个分隔符,默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - remove_extra_spaces 替换连续空格、换行符、制表符 + - remove_urls_emails 删除 URL、电子邮件地址 + - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 + - segmentation (object) 分段规则 + - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n + - max_tokens 最大长度 (token) 默认为 1000 @@ -339,7 +339,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 此接口基于已存在数据集,在此数据集的基础上通过文本更新文档 - ### Path Params + ### Path 数据集 ID @@ -359,17 +359,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 处理规则(选填) - - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 - - rules (text) 自定义规则(自动模式下,该字段为空) - - pre_processing_rules (array[object]) 预处理规则 - - id (string) 预处理规则的唯一标识符 + - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 + - rules (object) 自定义规则(自动模式下,该字段为空) + - pre_processing_rules (array[object]) 预处理规则 + - id (string) 预处理规则的唯一标识符 - 枚举: - - remove_extra_spaces 替换连续空格、换行符、制表符 - - remove_urls_emails 删除 URL、电子邮件地址 - - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值。 - - segmentation (object) 分段规则 - - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - remove_extra_spaces 替换连续空格、换行符、制表符 + - remove_urls_emails 删除 URL、电子邮件地址 + - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 + - segmentation (object) 分段规则 + - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n + - max_tokens 最大长度 (token) 默认为 1000 @@ -436,7 +436,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 此接口基于已存在数据集,在此数据集的基础上通过文件更新文档的操作。 - ### Path Params + ### Path 数据集 ID @@ -456,17 +456,17 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 处理规则(选填) - - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义。 - - rules (text) 自定义规则(自动模式下,该字段为空) - - pre_processing_rules (array[object]) 预处理规则 - - id (string) 预处理规则的唯一标识符 + - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 + - rules (object) 自定义规则(自动模式下,该字段为空) + - pre_processing_rules (array[object]) 预处理规则 + - id (string) 预处理规则的唯一标识符 - 枚举: - - remove_extra_spaces 替换连续空格、换行符、制表符 - - remove_urls_emails 删除 URL、电子邮件地址 - - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 - - segmentation (object) 分段规则 - - separator 自定义分段标识符,目前仅允许设置一个分隔符,默认为 \n - - max_tokens 最大长度 (token) 默认为 1000 + - remove_extra_spaces 替换连续空格、换行符、制表符 + - remove_urls_emails 删除 URL、电子邮件地址 + - enabled (bool) 是否选中该规则,不传入文档 ID 时代表默认值 + - segmentation (object) 分段规则 + - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n + - max_tokens 最大长度 (token) 默认为 1000 @@ -528,7 +528,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Path 数据集 ID @@ -583,7 +583,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Path 数据集 ID @@ -625,14 +625,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Path 数据集 ID - ### Path Query + ### Query 搜索关键词,可选,目前仅搜索文档名称 @@ -700,7 +700,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from /> - ### Path Params + ### Path 数据集 ID @@ -713,10 +713,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - segments (object list) 分段内容 - - content (text) 文本内容/问题内容,必填 - - answer(text) 答案内容,非必填,如果数据集的模式为qa模式则传值 - - keywords(list) 关键字,非必填 + - content (text) 文本内容/问题内容,必填 + - answer (text) 答案内容,非必填,如果数据集的模式为qa模式则传值 + - keywords (list) 关键字,非必填 @@ -779,14 +778,106 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from --- -错误信息 -- **document_indexing**: 文档索引失败 -- **provider_not_initialize**: Embedding 模型未配置 -- **not_found**,文档不存在 -- **dataset_name_duplicate**: 数据集名称重复 -- **provider_quota_exceeded**: 模型额度超过限制 -- **dataset_not_initialized**: 数据集还未初始化 -- **unsupported_file_type**: 不支持的文件类型 - - 目前只支持:txt, markdown, md, pdf, html, htm, xlsx, docx, csv -- **too_many_files**: 文件数量过多,暂时只支持单一文件上传 -- **file_too_large*: 文件太大,默认支持15M以下, 具体需要参考环境变量配置 + + + ### 错误信息 + + + 返回的错误代码 + + + + + 返回的错误状态 + + + + + 返回的错误信息 + + + + + + ```json {{ title: 'Response' }} + { + "code": "no_file_uploaded", + "message": "Please upload your file.", + "status": 400 + } + ``` + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
codestatusmessage
no_file_uploaded400Please upload your file.
too_many_files400Only one file is allowed.
file_too_large413File size exceeded.
unsupported_file_type415File type not allowed.
high_quality_dataset_only400Current operation only supports 'high-quality' datasets.
dataset_not_initialized400The dataset is still being initialized or indexing. Please wait a moment.
archived_document_immutable403The archived document is not editable.
dataset_name_duplicate409The dataset name already exists. Please modify your dataset name.
invalid_action400Invalid action.
document_already_finished400The document has been processed. Please refresh the page or go to the document details.
document_indexing400The document is being processed and cannot be edited.
invalid_metadata400The metadata content is incorrect. Please check and verify.
+
diff --git a/web/app/layout.tsx b/web/app/layout.tsx index ea5e06f586..f70551cbd2 100644 --- a/web/app/layout.tsx +++ b/web/app/layout.tsx @@ -20,7 +20,7 @@ const LocaleLayout = ({ return (