From 2cf0cb471ff74a2a7e9983f0107d676aa8a09920 Mon Sep 17 00:00:00 2001 From: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:34:40 +0800 Subject: [PATCH] fix: fix document list overlap and optimize document list fetching (#15377) --- .../datasets/documents/detail/index.tsx | 15 +- .../components/datasets/documents/index.tsx | 57 +++-- .../components/datasets/documents/list.tsx | 214 +++++++++--------- web/service/datasets.ts | 5 - web/service/knowledge/use-document.ts | 22 +- 5 files changed, 158 insertions(+), 155 deletions(-) diff --git a/web/app/components/datasets/documents/detail/index.tsx b/web/app/components/datasets/documents/detail/index.tsx index 2b65c195fb..6f5e84971e 100644 --- a/web/app/components/datasets/documents/detail/index.tsx +++ b/web/app/components/datasets/documents/detail/index.tsx @@ -23,7 +23,7 @@ import FloatRightContainer from '@/app/components/base/float-right-container' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge' import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment' -import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document' +import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '@/service/knowledge/use-document' import { useInvalid } from '@/service/use-base' type DocumentContextValue = { @@ -152,17 +152,22 @@ const DocumentDetail: FC = ({ datasetId, documentId }) => { const invalidChunkList = useInvalid(useSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey) + const invalidDocumentList = useInvalidDocumentList(datasetId) const handleOperate = (operateName?: string) => { + invalidDocumentList() if (operateName === 'delete') { backToPrev() } else { detailMutate() - setTimeout(() => { - invalidChunkList() - invalidChildChunkList() - }, 5000) + // If operation is not rename, refresh the chunk list after 5 seconds + if (operateName) { + setTimeout(() => { + invalidChunkList() + invalidChildChunkList() + }, 5000) + } } } diff --git a/web/app/components/datasets/documents/index.tsx b/web/app/components/datasets/documents/index.tsx index bbd1c03214..b28701d668 100644 --- a/web/app/components/datasets/documents/index.tsx +++ b/web/app/components/datasets/documents/index.tsx @@ -1,11 +1,10 @@ 'use client' import type { FC } from 'react' import React, { useCallback, useEffect, useMemo, useState } from 'react' -import useSWR from 'swr' import { useTranslation } from 'react-i18next' import { useRouter } from 'next/navigation' import { useDebounce, useDebounceFn } from 'ahooks' -import { groupBy, omit } from 'lodash-es' +import { groupBy } from 'lodash-es' import { PlusIcon } from '@heroicons/react/24/solid' import { RiExternalLinkLine } from '@remixicon/react' import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document' @@ -15,16 +14,16 @@ import Loading from '@/app/components/base/loading' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import { get } from '@/service/base' -import { createDocument, fetchDocuments } from '@/service/datasets' +import { createDocument } from '@/service/datasets' import { useDatasetDetailContext } from '@/context/dataset-detail' import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector' import type { NotionPage } from '@/models/common' import type { CreateDocumentReq } from '@/models/datasets' -import { DataSourceType } from '@/models/datasets' +import { DataSourceType, ProcessMode } from '@/models/datasets' import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed' import { useProviderContext } from '@/context/provider-context' import cn from '@/utils/classnames' -import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document' +import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document' import { useInvalid } from '@/service/use-base' import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment' @@ -73,12 +72,12 @@ const EmptyElement: FC<{ canAdd: boolean; onClick: () => void; type?: 'upload' | } -interface IDocumentsProps { +type IDocumentsProps = { datasetId: string } export const fetcher = (url: string) => get(url, {}, {}) -const DEFAULT_LIMIT = 15 +const DEFAULT_LIMIT = 10 const Documents: FC = ({ datasetId }) => { const { t } = useTranslation() @@ -99,33 +98,33 @@ const Documents: FC = ({ datasetId }) => { const debouncedSearchValue = useDebounce(searchValue, { wait: 500 }) - const query = useMemo(() => { - return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' } - }, [currPage, debouncedSearchValue, isDataSourceNotion, limit]) - - const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR( - { - action: 'fetchDocuments', - datasetId, - params: query, + const { data: documentsRes, isFetching: isListLoading } = useDocumentList({ + datasetId, + query: { + page: currPage + 1, + limit, + keyword: debouncedSearchValue, }, - apiParams => fetchDocuments(omit(apiParams, 'action')), - { refreshInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0 }, - ) + refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0, + }) + + const invalidDocumentList = useInvalidDocumentList(datasetId) - const [isMuting, setIsMuting] = useState(false) useEffect(() => { - if (!isListLoading && isMuting) - setIsMuting(false) - }, [isListLoading, isMuting]) + if (documentsRes) { + const totalPages = Math.ceil(documentsRes.total / limit) + if (totalPages < currPage + 1) + setCurrPage(totalPages === 0 ? 0 : totalPages - 1) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [documentsRes]) const invalidDocumentDetail = useInvalidDocumentDetailKey() const invalidChunkList = useInvalid(useSegmentListKey) const invalidChildChunkList = useInvalid(useChildSegmentListKey) const handleUpdate = useCallback(() => { - setIsMuting(true) - mutate() + invalidDocumentList() invalidDocumentDetail() setTimeout(() => { invalidChunkList() @@ -175,8 +174,6 @@ const Documents: FC = ({ datasetId }) => { router.push(`/datasets/${datasetId}/documents/create`) } - const isLoading = isListLoading // !documentsRes && !error - const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => { const workspacesMap = groupBy(selectedPages, 'workspace_id') const workspaces = Object.keys(workspacesMap).map((workspaceId) => { @@ -209,7 +206,7 @@ const Documents: FC = ({ datasetId }) => { indexing_technique: dataset?.indexing_technique, process_rule: { rules: {}, - mode: 'automatic', + mode: ProcessMode.general, }, } as CreateDocumentReq @@ -217,7 +214,7 @@ const Documents: FC = ({ datasetId }) => { datasetId, body: params, }) - mutate() + invalidDocumentList() setTimerCanRun(true) // mutateDatasetIndexingStatus(undefined, { revalidate: true }) setNotionPageSelectorModalVisible(false) @@ -272,7 +269,7 @@ const Documents: FC = ({ datasetId }) => { )} - {(isLoading && !isMuting) + {isListLoading ? : total > 0 ? = ({ } return ( -
- - - - - - - - - - - - - - - {localDocs.map((doc, index) => { - const isFile = doc.data_source_type === DataSourceType.FILE - const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : '' - return { - router.push(`/datasets/${datasetId}/documents/${doc.id}`) - }}> - + {localDocs.map((doc, index) => { + const isFile = doc.data_source_type === DataSourceType.FILE + const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : '' + return { + router.push(`/datasets/${datasetId}/documents/${doc.id}`) + }}> + + + + + + + + + + })} + +
-
e.stopPropagation()}> - - # -
-
-
- {t('datasetDocuments.list.table.header.fileName')} -
-
{t('datasetDocuments.list.table.header.chunkingMode')}{t('datasetDocuments.list.table.header.words')}{t('datasetDocuments.list.table.header.hitCount')} -
- {t('datasetDocuments.list.table.header.uploadTime')} - -
-
{t('datasetDocuments.list.table.header.status')}{t('datasetDocuments.list.table.header.action')}
+
+
+ + + + - - - - - - + + + + + - })} - -
e.stopPropagation()}> { - onSelectedIdChange( - selectedIds.includes(doc.id) - ? selectedIds.filter(id => id !== doc.id) - : [...selectedIds, doc.id], - ) - }} + checked={isAllSelected} + mixed={!isAllSelected && isSomeSelected} + onCheck={onSelectedAll} /> - {/* {doc.position} */} - {index + 1} + #
-
-
- {doc?.data_source_type === DataSourceType.NOTION && } - {doc?.data_source_type === DataSourceType.FILE && } - {doc?.data_source_type === DataSourceType.WEB && } -
- {doc.name} -
- -
{ - e.stopPropagation() - handleShowRenameModal(doc) - }} - > - -
-
-
+
+ {t('datasetDocuments.list.table.header.fileName')}
- - {renderCount(doc.word_count)}{renderCount(doc.hit_count)} - {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)} - - { - (['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION) - ? - : - } - - + {t('datasetDocuments.list.table.header.chunkingMode')}{t('datasetDocuments.list.table.header.words')}{t('datasetDocuments.list.table.header.hitCount')} +
+ {t('datasetDocuments.list.table.header.uploadTime')} + +
{t('datasetDocuments.list.table.header.status')}{t('datasetDocuments.list.table.header.action')}
+ +
+
e.stopPropagation()}> + { + onSelectedIdChange( + selectedIds.includes(doc.id) + ? selectedIds.filter(id => id !== doc.id) + : [...selectedIds, doc.id], + ) + }} + /> + {/* {doc.position} */} + {index + 1} +
+
+
+
+ {doc?.data_source_type === DataSourceType.NOTION && } + {doc?.data_source_type === DataSourceType.FILE && } + {doc?.data_source_type === DataSourceType.WEB && } +
+ {doc.name} +
+ +
{ + e.stopPropagation() + handleShowRenameModal(doc) + }} + > + +
+
+
+
+
+ + {renderCount(doc.word_count)}{renderCount(doc.hit_count)} + {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)} + + { + (['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION) + ? + : + } + + +
+
{(selectedIds.length > 0) && ( = ({ /> )} {/* Show Pagination only if the total is more than the limit */} - {pagination.total && pagination.total > (pagination.limit || 10) && ( + {pagination.total && ( )} diff --git a/web/service/datasets.ts b/web/service/datasets.ts index f2065de382..53b55b375b 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -5,7 +5,6 @@ import type { CreateDocumentReq, DataSet, DataSetListResponse, - DocumentListResponse, ErrorDocsResponse, ExternalAPIDeleteResponse, ExternalAPIItem, @@ -122,10 +121,6 @@ export const fetchProcessRule: Fetcher('/datasets/process-rule', { params: { document_id: documentId } }) } -export const fetchDocuments: Fetcher = ({ datasetId, params }) => { - return get(`/datasets/${datasetId}/documents`, { params }) -} - export const createFirstDocument: Fetcher = ({ body }) => { return post('/datasets/init', { body }) } diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts index 02e523bd90..5bced9286e 100644 --- a/web/service/knowledge/use-document.ts +++ b/web/service/knowledge/use-document.ts @@ -4,8 +4,8 @@ import { } from '@tanstack/react-query' import { del, get, patch } from '../base' import { useInvalid } from '../use-base' -import type { MetadataType } from '../datasets' -import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets' +import type { MetadataType, SortType } from '../datasets' +import type { DocumentDetailResponse, DocumentListResponse, UpdateDocumentBatchParams } from '@/models/datasets' import { DocumentActionType } from '@/models/datasets' import type { CommonResponse } from '@/models/common' @@ -18,19 +18,23 @@ export const useDocumentList = (payload: { keyword: string page: number limit: number - } + sort?: SortType + }, + refetchInterval?: number | false }) => { - const { query, datasetId } = payload - return useQuery<{ data: SimpleDocumentDetail[] }>({ - queryKey: [...useDocumentListKey, datasetId, query], - queryFn: () => get<{ data: SimpleDocumentDetail[] }>(`/datasets/${datasetId}/documents`, { + const { query, datasetId, refetchInterval } = payload + const { keyword, page, limit, sort } = query + return useQuery({ + queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort], + queryFn: () => get(`/datasets/${datasetId}/documents`, { params: query, }), + refetchInterval, }) } -export const useInvalidDocumentList = () => { - return useInvalid(useDocumentListKey) +export const useInvalidDocumentList = (datasetId?: string) => { + return useInvalid(datasetId ? [...useDocumentListKey, datasetId] : useDocumentListKey) } const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']