fix: fix document list overlap and optimize document list fetching (#15377)

This commit is contained in:
Wu Tianwei 2025-03-10 15:34:40 +08:00 committed by GitHub
parent 269ba6add9
commit 2cf0cb471f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 158 additions and 155 deletions

View File

@ -23,7 +23,7 @@ import FloatRightContainer from '@/app/components/base/float-right-container'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { LayoutRight2LineMod } from '@/app/components/base/icons/src/public/knowledge'
import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
import { useDocumentDetail, useDocumentMetadata } from '@/service/knowledge/use-document'
import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base'
type DocumentContextValue = {
@ -152,17 +152,22 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const invalidDocumentList = useInvalidDocumentList(datasetId)
const handleOperate = (operateName?: string) => {
invalidDocumentList()
if (operateName === 'delete') {
backToPrev()
}
else {
detailMutate()
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
// If operation is not rename, refresh the chunk list after 5 seconds
if (operateName) {
setTimeout(() => {
invalidChunkList()
invalidChildChunkList()
}, 5000)
}
}
}

View File

@ -1,11 +1,10 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useMemo, useState } from 'react'
import useSWR from 'swr'
import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation'
import { useDebounce, useDebounceFn } from 'ahooks'
import { groupBy, omit } from 'lodash-es'
import { groupBy } from 'lodash-es'
import { PlusIcon } from '@heroicons/react/24/solid'
import { RiExternalLinkLine } from '@remixicon/react'
import AutoDisabledDocument from '../common/document-status-with-action/auto-disabled-document'
@ -15,16 +14,16 @@ import Loading from '@/app/components/base/loading'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import { get } from '@/service/base'
import { createDocument, fetchDocuments } from '@/service/datasets'
import { createDocument } from '@/service/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
import type { NotionPage } from '@/models/common'
import type { CreateDocumentReq } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import { DataSourceType, ProcessMode } from '@/models/datasets'
import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
import { useProviderContext } from '@/context/provider-context'
import cn from '@/utils/classnames'
import { useInvalidDocumentDetailKey } from '@/service/knowledge/use-document'
import { useDocumentList, useInvalidDocumentDetailKey, useInvalidDocumentList } from '@/service/knowledge/use-document'
import { useInvalid } from '@/service/use-base'
import { useChildSegmentListKey, useSegmentListKey } from '@/service/knowledge/use-segment'
@ -73,12 +72,12 @@ const EmptyElement: FC<{ canAdd: boolean; onClick: () => void; type?: 'upload' |
</div>
}
interface IDocumentsProps {
type IDocumentsProps = {
datasetId: string
}
export const fetcher = (url: string) => get(url, {}, {})
const DEFAULT_LIMIT = 15
const DEFAULT_LIMIT = 10
const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const { t } = useTranslation()
@ -99,33 +98,33 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })
const query = useMemo(() => {
return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
}, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
const { data: documentsRes, mutate, isLoading: isListLoading } = useSWR(
{
action: 'fetchDocuments',
datasetId,
params: query,
const { data: documentsRes, isFetching: isListLoading } = useDocumentList({
datasetId,
query: {
page: currPage + 1,
limit,
keyword: debouncedSearchValue,
},
apiParams => fetchDocuments(omit(apiParams, 'action')),
{ refreshInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0 },
)
refetchInterval: (isDataSourceNotion && timerCanRun) ? 2500 : 0,
})
const invalidDocumentList = useInvalidDocumentList(datasetId)
const [isMuting, setIsMuting] = useState(false)
useEffect(() => {
if (!isListLoading && isMuting)
setIsMuting(false)
}, [isListLoading, isMuting])
if (documentsRes) {
const totalPages = Math.ceil(documentsRes.total / limit)
if (totalPages < currPage + 1)
setCurrPage(totalPages === 0 ? 0 : totalPages - 1)
}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [documentsRes])
const invalidDocumentDetail = useInvalidDocumentDetailKey()
const invalidChunkList = useInvalid(useSegmentListKey)
const invalidChildChunkList = useInvalid(useChildSegmentListKey)
const handleUpdate = useCallback(() => {
setIsMuting(true)
mutate()
invalidDocumentList()
invalidDocumentDetail()
setTimeout(() => {
invalidChunkList()
@ -175,8 +174,6 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
router.push(`/datasets/${datasetId}/documents/create`)
}
const isLoading = isListLoading // !documentsRes && !error
const handleSaveNotionPageSelected = async (selectedPages: NotionPage[]) => {
const workspacesMap = groupBy(selectedPages, 'workspace_id')
const workspaces = Object.keys(workspacesMap).map((workspaceId) => {
@ -209,7 +206,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
indexing_technique: dataset?.indexing_technique,
process_rule: {
rules: {},
mode: 'automatic',
mode: ProcessMode.general,
},
} as CreateDocumentReq
@ -217,7 +214,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
datasetId,
body: params,
})
mutate()
invalidDocumentList()
setTimerCanRun(true)
// mutateDatasetIndexingStatus(undefined, { revalidate: true })
setNotionPageSelectorModalVisible(false)
@ -272,7 +269,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
)}
</div>
</div>
{(isLoading && !isMuting)
{isListLoading
? <Loading type='app' />
: total > 0
? <List

View File

@ -500,121 +500,123 @@ const DocumentList: FC<IDocumentListProps> = ({
}
return (
<div className='relative w-full h-full overflow-x-auto'>
<table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
<thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
<tr>
<td className='w-12'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={isAllSelected}
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/>
#
</div>
</td>
<td>
<div className='flex'>
{t('datasetDocuments.list.table.header.fileName')}
</div>
</td>
<td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
<div className='flex items-center' onClick={onClickSort}>
{t('datasetDocuments.list.table.header.uploadTime')}
<ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
</div>
</td>
<td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
<td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
</tr>
</thead>
<tbody className="text-text-secondary">
{localDocs.map((doc, index) => {
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
return <tr
key={doc.id}
className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-text-tertiary text-xs'>
<div className='flex flex-col relative w-full h-full'>
<div className='grow overflow-x-auto'>
<table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
<thead className="h-8 leading-8 border-b border-divider-subtle text-text-tertiary font-medium text-xs uppercase">
<tr>
<td className='w-12'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={selectedIds.includes(doc.id)}
onCheck={() => {
onSelectedIdChange(
selectedIds.includes(doc.id)
? selectedIds.filter(id => id !== doc.id)
: [...selectedIds, doc.id],
)
}}
checked={isAllSelected}
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/>
{/* {doc.position} */}
{index + 1}
#
</div>
</td>
<td>
<div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
<div className='shrink-0'>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex -mt-[3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
{doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex -mt-[3px] mr-1.5 align-middle' />}
</div>
<span className='text-sm truncate grow-1'>{doc.name}</span>
<div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
<Tooltip
popupContent={t('datasetDocuments.list.table.rename')}
>
<div
className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
onClick={(e) => {
e.stopPropagation()
handleShowRenameModal(doc)
}}
>
<Edit03 className='w-4 h-4 text-text-tertiary' />
</div>
</Tooltip>
</div>
<div className='flex'>
{t('datasetDocuments.list.table.header.fileName')}
</div>
</td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>
{
(['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
? <ProgressBar percent={doc.percent || 0} />
: <StatusItem status={doc.display_status} />
}
</td>
<td>
<OperationAction
embeddingAvailable={embeddingAvailable}
datasetId={datasetId}
detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
onUpdate={onUpdate}
/>
<td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
<div className='flex items-center' onClick={onClickSort}>
{t('datasetDocuments.list.table.header.uploadTime')}
<ArrowDownIcon className={cn('ml-0.5 h-3 w-3 stroke-current stroke-2 cursor-pointer', enableSort ? 'text-text-tertiary' : 'text-text-disabled')} />
</div>
</td>
<td className='w-40'>{t('datasetDocuments.list.table.header.status')}</td>
<td className='w-20'>{t('datasetDocuments.list.table.header.action')}</td>
</tr>
})}
</tbody>
</table>
</thead>
<tbody className="text-text-secondary">
{localDocs.map((doc, index) => {
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
return <tr
key={doc.id}
className={'border-b border-divider-subtle h-8 hover:bg-background-default-hover cursor-pointer'}
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-text-tertiary text-xs'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={selectedIds.includes(doc.id)}
onCheck={() => {
onSelectedIdChange(
selectedIds.includes(doc.id)
? selectedIds.filter(id => id !== doc.id)
: [...selectedIds, doc.id],
)
}}
/>
{/* {doc.position} */}
{index + 1}
</div>
</td>
<td>
<div className={'group flex items-center mr-6 hover:mr-0 max-w-[460px]'}>
<div className='shrink-0'>
{doc?.data_source_type === DataSourceType.NOTION && <NotionIcon className='inline-flex mt-[-3px] mr-1.5 align-middle' type='page' src={doc.data_source_info.notion_page_icon} />}
{doc?.data_source_type === DataSourceType.FILE && <FileTypeIcon type={extensionToFileType(doc?.data_source_info?.upload_file?.extension ?? fileType)} className='mr-1.5' />}
{doc?.data_source_type === DataSourceType.WEB && <Globe01 className='inline-flex mt-[-3px] mr-1.5 align-middle' />}
</div>
<span className='text-sm truncate grow-1'>{doc.name}</span>
<div className='group-hover:flex group-hover:ml-auto hidden shrink-0'>
<Tooltip
popupContent={t('datasetDocuments.list.table.rename')}
>
<div
className='p-1 rounded-md cursor-pointer hover:bg-state-base-hover'
onClick={(e) => {
e.stopPropagation()
handleShowRenameModal(doc)
}}
>
<Edit03 className='w-4 h-4 text-text-tertiary' />
</div>
</Tooltip>
</div>
</div>
</td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>
{
(['indexing', 'splitting', 'parsing', 'cleaning'].includes(doc.indexing_status) && doc?.data_source_type === DataSourceType.NOTION)
? <ProgressBar percent={doc.percent || 0} />
: <StatusItem status={doc.display_status} />
}
</td>
<td>
<OperationAction
embeddingAvailable={embeddingAvailable}
datasetId={datasetId}
detail={pick(doc, ['name', 'enabled', 'archived', 'id', 'data_source_type', 'doc_form'])}
onUpdate={onUpdate}
/>
</td>
</tr>
})}
</tbody>
</table>
</div>
{(selectedIds.length > 0) && (
<BatchAction
className='absolute left-0 bottom-16 z-20'
@ -629,10 +631,10 @@ const DocumentList: FC<IDocumentListProps> = ({
/>
)}
{/* Show Pagination only if the total is more than the limit */}
{pagination.total && pagination.total > (pagination.limit || 10) && (
{pagination.total && (
<Pagination
{...pagination}
className='absolute bottom-0 left-0 w-full px-0 pb-0'
className='shrink-0 w-full px-0 pb-0'
/>
)}

View File

@ -5,7 +5,6 @@ import type {
CreateDocumentReq,
DataSet,
DataSetListResponse,
DocumentListResponse,
ErrorDocsResponse,
ExternalAPIDeleteResponse,
ExternalAPIItem,
@ -122,10 +121,6 @@ export const fetchProcessRule: Fetcher<ProcessRuleResponse, { params: { document
return get<ProcessRuleResponse>('/datasets/process-rule', { params: { document_id: documentId } })
}
export const fetchDocuments: Fetcher<DocumentListResponse, { datasetId: string; params: { keyword: string; page: number; limit: number; sort?: SortType } }> = ({ datasetId, params }) => {
return get<DocumentListResponse>(`/datasets/${datasetId}/documents`, { params })
}
export const createFirstDocument: Fetcher<createDocumentResponse, { body: CreateDocumentReq }> = ({ body }) => {
return post<createDocumentResponse>('/datasets/init', { body })
}

View File

@ -4,8 +4,8 @@ import {
} from '@tanstack/react-query'
import { del, get, patch } from '../base'
import { useInvalid } from '../use-base'
import type { MetadataType } from '../datasets'
import type { DocumentDetailResponse, SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets'
import type { MetadataType, SortType } from '../datasets'
import type { DocumentDetailResponse, DocumentListResponse, UpdateDocumentBatchParams } from '@/models/datasets'
import { DocumentActionType } from '@/models/datasets'
import type { CommonResponse } from '@/models/common'
@ -18,19 +18,23 @@ export const useDocumentList = (payload: {
keyword: string
page: number
limit: number
}
sort?: SortType
},
refetchInterval?: number | false
}) => {
const { query, datasetId } = payload
return useQuery<{ data: SimpleDocumentDetail[] }>({
queryKey: [...useDocumentListKey, datasetId, query],
queryFn: () => get<{ data: SimpleDocumentDetail[] }>(`/datasets/${datasetId}/documents`, {
const { query, datasetId, refetchInterval } = payload
const { keyword, page, limit, sort } = query
return useQuery<DocumentListResponse>({
queryKey: [...useDocumentListKey, datasetId, keyword, page, limit, sort],
queryFn: () => get<DocumentListResponse>(`/datasets/${datasetId}/documents`, {
params: query,
}),
refetchInterval,
})
}
export const useInvalidDocumentList = () => {
return useInvalid(useDocumentListKey)
export const useInvalidDocumentList = (datasetId?: string) => {
return useInvalid(datasetId ? [...useDocumentListKey, datasetId] : useDocumentListKey)
}
const useAutoDisabledDocumentKey = [NAME_SPACE, 'autoDisabledDocument']