From 2e97ba5700d7a166373805cf4307ca2477f55a1b Mon Sep 17 00:00:00 2001 From: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:44:11 +0800 Subject: [PATCH] fix: Add datasets list access control and fix datasets config display issue (#12533) Co-authored-by: nite-knite --- web/app/(commonLayout)/datasets/Container.tsx | 21 +++++-- web/app/(commonLayout)/datasets/Datasets.tsx | 10 +++- .../datasets/template/template.en.mdx | 48 +++++++++++++++- .../datasets/template/template.zh.mdx | 50 +++++++++++++++-- .../datasets/create/step-two/index.tsx | 14 +++-- .../datasets/settings/form/index.tsx | 56 ++++++++++--------- web/app/components/develop/md.tsx | 7 +++ web/i18n/en-US/dataset.ts | 2 + web/i18n/zh-Hans/dataset.ts | 2 + web/models/datasets.ts | 11 ++++ web/service/datasets.ts | 3 +- 11 files changed, 174 insertions(+), 50 deletions(-) diff --git a/web/app/(commonLayout)/datasets/Container.tsx b/web/app/(commonLayout)/datasets/Container.tsx index a0edb1cd61..c39d9c5dbf 100644 --- a/web/app/(commonLayout)/datasets/Container.tsx +++ b/web/app/(commonLayout)/datasets/Container.tsx @@ -4,7 +4,8 @@ import { useEffect, useMemo, useRef, useState } from 'react' import { useRouter } from 'next/navigation' import { useTranslation } from 'react-i18next' -import { useDebounceFn } from 'ahooks' +import { useBoolean, useDebounceFn } from 'ahooks' +import { useQuery } from '@tanstack/react-query' // Components import ExternalAPIPanel from '../../components/datasets/external-api/external-api-panel' @@ -16,7 +17,9 @@ import TabSliderNew from '@/app/components/base/tab-slider-new' import TagManagementModal from '@/app/components/base/tag-management' import TagFilter from '@/app/components/base/tag-management/filter' import Button from '@/app/components/base/button' +import Input from '@/app/components/base/input' import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development' +import CheckboxWithLabel from '@/app/components/datasets/create/website/base/checkbox-with-label' // Services import { fetchDatasetApiBaseUrl } from '@/service/datasets' @@ -26,16 +29,14 @@ import { useTabSearchParams } from '@/hooks/use-tab-searchparams' import { useStore as useTagStore } from '@/app/components/base/tag-management/store' import { useAppContext } from '@/context/app-context' import { useExternalApiPanel } from '@/context/external-api-panel-context' -// eslint-disable-next-line import/order -import { useQuery } from '@tanstack/react-query' -import Input from '@/app/components/base/input' const Container = () => { const { t } = useTranslation() const router = useRouter() - const { currentWorkspace } = useAppContext() + const { currentWorkspace, isCurrentWorkspaceOwner } = useAppContext() const showTagManagementModal = useTagStore(s => s.showTagManagementModal) const { showExternalApiPanel, setShowExternalApiPanel } = useExternalApiPanel() + const [includeAll, { toggle: toggleIncludeAll }] = useBoolean(false) const options = useMemo(() => { return [ @@ -90,6 +91,14 @@ const Container = () => { /> {activeTab === 'dataset' && (
+ {isCurrentWorkspaceOwner && } {
{activeTab === 'dataset' && ( <> - + {showTagManagementModal && ( diff --git a/web/app/(commonLayout)/datasets/Datasets.tsx b/web/app/(commonLayout)/datasets/Datasets.tsx index db6cb4a518..ea918a2b17 100644 --- a/web/app/(commonLayout)/datasets/Datasets.tsx +++ b/web/app/(commonLayout)/datasets/Datasets.tsx @@ -6,7 +6,7 @@ import { debounce } from 'lodash-es' import { useTranslation } from 'react-i18next' import NewDatasetCard from './NewDatasetCard' import DatasetCard from './DatasetCard' -import type { DataSetListResponse } from '@/models/datasets' +import type { DataSetListResponse, FetchDatasetsParams } from '@/models/datasets' import { fetchDatasets } from '@/service/datasets' import { useAppContext } from '@/context/app-context' @@ -15,13 +15,15 @@ const getKey = ( previousPageData: DataSetListResponse, tags: string[], keyword: string, + includeAll: boolean, ) => { if (!pageIndex || previousPageData.has_more) { - const params: any = { + const params: FetchDatasetsParams = { url: 'datasets', params: { page: pageIndex + 1, limit: 30, + include_all: includeAll, }, } if (tags.length) @@ -37,16 +39,18 @@ type Props = { containerRef: React.RefObject tags: string[] keywords: string + includeAll: boolean } const Datasets = ({ containerRef, tags, keywords, + includeAll, }: Props) => { const { isCurrentWorkspaceEditor } = useAppContext() const { data, isLoading, setSize, mutate } = useSWRInfinite( - (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords), + (pageIndex: number, previousPageData: DataSetListResponse) => getKey(pageIndex, previousPageData, tags, keywords, includeAll), fetchDatasets, { revalidateFirstPage: false, revalidateAll: true }, ) diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index b38cf38b9a..3fa22a1620 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -1,5 +1,5 @@ import { CodeGroup } from '@/app/components/develop/code.tsx' -import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx' +import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx' # Knowledge API @@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - max_tokens The maximum length (tokens) must be validated to be shorter than the length of the parent chunk - chunk_overlap Define the overlap between adjacent chunks (optional) + When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used. + + Retrieval model + - search_method (string) Search method + - hybrid_search Hybrid search + - semantic_search Semantic search + - full_text_search Full-text search + - reranking_enable (bool) Whether to enable reranking + - reranking_mode (object) Rerank model configuration + - reranking_provider_name (string) Rerank model provider + - reranking_model_name (string) Rerank model name + - top_k (int) Number of results to return + - score_threshold_enabled (bool) Whether to enable score threshold + - score_threshold (float) Score threshold + + + Embedding model name + + + Embedding model provider + @@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from Files that need to be uploaded. + When no parameters are set for the knowledge base, the first upload requires the following parameters to be provided; if not provided, the default parameters will be used. + + Retrieval model + - search_method (string) Search method + - hybrid_search Hybrid search + - semantic_search Semantic search + - full_text_search Full-text search + - reranking_enable (bool) Whether to enable reranking + - reranking_mode (object) Rerank model configuration + - reranking_provider_name (string) Rerank model provider + - reranking_model_name (string) Rerank model name + - top_k (int) Number of results to return + - score_threshold_enabled (bool) Whether to enable score threshold + - score_threshold (float) Score threshold + + + Embedding model name + + + Embedding model provider + @@ -1188,10 +1230,10 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - reranking_mode (object) Rerank model configuration, required if reranking is enabled - reranking_provider_name (string) Rerank model provider - reranking_model_name (string) Rerank model name - - weights (double) Semantic search weight setting in hybrid search mode + - weights (float) Semantic search weight setting in hybrid search mode - top_k (integer) Number of results to return (optional) - score_threshold_enabled (bool) Whether to enable score threshold - - score_threshold (double) Score threshold + - score_threshold (float) Score threshold Unused field diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 7bb773eee9..334591743f 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -1,5 +1,5 @@ import { CodeGroup } from '@/app/components/develop/code.tsx' -import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from '@/app/components/develop/md.tsx' +import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstruction, Paragraph } from '@/app/components/develop/md.tsx' # 知识库 API @@ -80,6 +80,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - max_tokens 最大长度 (token) 需要校验小于父级的长度 - chunk_overlap 分段重叠指的是在对数据进行分段时,段与段之间存在一定的重叠部分(选填) + 当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项: + + 检索模式 + - search_method (string) 检索方法 + - hybrid_search 混合检索 + - semantic_search 语义检索 + - full_text_search 全文检索 + - reranking_enable (bool) 是否开启rerank + - reranking_model (object) Rerank 模型配置 + - reranking_provider_name (string) Rerank 模型的提供商 + - reranking_model_name (string) Rerank 模型的名称 + - top_k (int) 召回条数 + - score_threshold_enabled (bool)是否开启召回分数限制 + - score_threshold (float) 召回分数限制 + + + Embedding 模型名称 + + + Embedding 模型供应商 + @@ -197,6 +218,27 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from 需要上传的文件。 + 当知识库未设置任何参数的时候,首次上传需要提供以下参数,未提供则使用默认选项: + + 检索模式 + - search_method (string) 检索方法 + - hybrid_search 混合检索 + - semantic_search 语义检索 + - full_text_search 全文检索 + - reranking_enable (bool) 是否开启rerank + - reranking_model (object) Rerank 模型配置 + - reranking_provider_name (string) Rerank 模型的提供商 + - reranking_model_name (string) Rerank 模型的名称 + - top_k (int) 召回条数 + - score_threshold_enabled (bool)是否开启召回分数限制 + - score_threshold (float) 召回分数限制 + + + Embedding 模型名称 + + + Embedding 模型供应商 + @@ -1186,13 +1228,13 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - full_text_search 全文检索 - hybrid_search 混合检索 - reranking_enable (bool) 是否启用 Reranking,非必填,如果检索模式为 semantic_search 模式或者 hybrid_search 则传值 - - reranking_mode (object) Rerank模型配置,非必填,如果启用了 reranking 则传值 + - reranking_mode (object) Rerank 模型配置,非必填,如果启用了 reranking 则传值 - reranking_provider_name (string) Rerank 模型提供商 - reranking_model_name (string) Rerank 模型名称 - - weights (double) 混合检索模式下语意检索的权重设置 + - weights (float) 混合检索模式下语意检索的权重设置 - top_k (integer) 返回结果数量,非必填 - score_threshold_enabled (bool) 是否开启 score 阈值 - - score_threshold (double) Score 阈值 + - score_threshold (float) Score 阈值 未启用字段 diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 27ca16579b..11984d71c6 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -575,6 +575,8 @@ const StepTwo = ({ const economyDomRef = useRef(null) const isHoveringEconomy = useHover(economyDomRef) + const isModelAndRetrievalConfigDisabled = !!datasetId && !!currentDataset?.data_source_type + return (
@@ -931,15 +933,15 @@ const StepTwo = ({
{t('datasetSettings.form.embeddingModel')}
{ setEmbeddingModel(model) }} /> - {!!datasetId && ( + {isModelAndRetrievalConfigDisabled && (
{t('datasetCreation.stepTwo.indexSettingTip')} {t('datasetCreation.stepTwo.datasetSettingLink')} @@ -950,7 +952,7 @@ const StepTwo = ({ {/* Retrieval Method Config */}
- {!datasetId + {!isModelAndRetrievalConfigDisabled ? (
{t('datasetSettings.form.retrievalSetting.title')}
@@ -971,14 +973,14 @@ const StepTwo = ({ getIndexing_technique() === IndexingType.QUALIFIED ? ( ) : ( diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index 760954d6cb..42ea7d637b 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -223,7 +223,7 @@ const Form = () => { setIndexMethod(v)} + onChange={v => setIndexMethod(v!)} docForm={currentDataset.doc_form} currentValue={currentDataset.indexing_technique} /> @@ -300,35 +300,37 @@ const Form = () => {
- : <> -
-
-
-
-
{t('datasetSettings.form.retrievalSetting.title')}
-
- {t('datasetSettings.form.retrievalSetting.learnMore')} - {t('datasetSettings.form.retrievalSetting.description')} + : indexMethod + ? <> +
+
+
+
+
{t('datasetSettings.form.retrievalSetting.title')}
+
+ {t('datasetSettings.form.retrievalSetting.learnMore')} + {t('datasetSettings.form.retrievalSetting.description')} +
+
+ {indexMethod === IndexingType.QUALIFIED + ? ( + + ) + : ( + + )} +
-
- {indexMethod === 'high_quality' - ? ( - - ) - : ( - - )} -
-
- + + : null }
diff --git a/web/app/components/develop/md.tsx b/web/app/components/develop/md.tsx index 26b4007c87..c75798fcfe 100644 --- a/web/app/components/develop/md.tsx +++ b/web/app/components/develop/md.tsx @@ -1,4 +1,5 @@ 'use client' +import type { PropsWithChildren } from 'react' import classNames from '@/utils/classnames' type IChildrenProps = { @@ -139,3 +140,9 @@ export function SubProperty({ name, type, children }: ISubProperty) { ) } + +export function PropertyInstruction({ children }: PropsWithChildren<{}>) { + return ( +
  • {children}
  • + ) +} diff --git a/web/i18n/en-US/dataset.ts b/web/i18n/en-US/dataset.ts index 6a6df700d7..4e1f2549d8 100644 --- a/web/i18n/en-US/dataset.ts +++ b/web/i18n/en-US/dataset.ts @@ -166,6 +166,8 @@ const translation = { cancel: 'Cancel', }, preprocessDocument: '{{num}} Preprocess Documents', + allKnowledge: 'All Knowledge', + allKnowledgeDescription: 'Select to display all knowledge in this workspace. Only the Workspace Owner can manage all knowledge.', } export default translation diff --git a/web/i18n/zh-Hans/dataset.ts b/web/i18n/zh-Hans/dataset.ts index d7834b4116..bedd114b73 100644 --- a/web/i18n/zh-Hans/dataset.ts +++ b/web/i18n/zh-Hans/dataset.ts @@ -166,6 +166,8 @@ const translation = { cancel: '取消', }, preprocessDocument: '{{num}} 个预处理文档', + allKnowledge: '所有知识库', + allKnowledgeDescription: '选择以显示该工作区内所有知识库。只有工作区所有者才能管理所有知识库。', } export default translation diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 9d4768b67c..673fb5fb15 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -132,6 +132,17 @@ export type FileItem = { progress: number } +export type FetchDatasetsParams = { + url: string + params: { + page: number + tag_ids?: string[] + limit: number + include_all: boolean + keyword?: string + } +} + export type DataSetListResponse = { data: DataSet[] has_more: boolean diff --git a/web/service/datasets.ts b/web/service/datasets.ts index 87f4e3a638..f2065de382 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -13,6 +13,7 @@ import type { ExternalAPIUsage, ExternalKnowledgeBaseHitTestingResponse, ExternalKnowledgeItem, + FetchDatasetsParams, FileIndexingEstimateResponse, HitTestingRecordsResponse, HitTestingResponse, @@ -67,7 +68,7 @@ export const fetchDatasetRelatedApps: Fetcher = (dat return get(`/datasets/${datasetId}/related-apps`) } -export const fetchDatasets: Fetcher = ({ url, params }) => { +export const fetchDatasets: Fetcher = ({ url, params }) => { const urlParams = qs.stringify(params, { indices: false }) return get(`${url}?${urlParams}`) }