feat: allow updating chunk settings for the existing documents (#12833)

This commit is contained in:
kurokobo 2025-01-21 10:25:40 +09:00 committed by GitHub
parent 9d86147d20
commit 3defd24087
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 18 deletions

View File

@ -859,7 +859,7 @@ class DocumentService:
position = DocumentService.get_documents_position(dataset.id) position = DocumentService.get_documents_position(dataset.id)
document_ids = [] document_ids = []
duplicate_document_ids = [] duplicate_document_ids = []
if knowledge_config.data_source.info_list.data_source_type == "upload_file": if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
for file_id in upload_file_list: for file_id in upload_file_list:
file = ( file = (
@ -901,7 +901,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -916,8 +916,8 @@ class DocumentService:
document_ids.append(document.id) document_ids.append(document.id)
documents.append(document) documents.append(document)
position += 1 position += 1
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if not notion_info_list: if not notion_info_list:
raise ValueError("No notion info list found.") raise ValueError("No notion info list found.")
exist_page_ids = [] exist_page_ids = []
@ -956,7 +956,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -976,8 +976,8 @@ class DocumentService:
# delete not selected documents # delete not selected documents
if len(exist_document) > 0: if len(exist_document) > 0:
clean_notion_document_task.delay(list(exist_document.values()), dataset.id) clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if not website_info: if not website_info:
raise ValueError("No website info list found.") raise ValueError("No website info list found.")
urls = website_info.urls urls = website_info.urls
@ -996,7 +996,7 @@ class DocumentService:
document = DocumentService.build_document( document = DocumentService.build_document(
dataset, dataset,
dataset_process_rule.id, # type: ignore dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type, knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form, knowledge_config.doc_form,
knowledge_config.doc_language, knowledge_config.doc_language,
data_source_info, data_source_info,
@ -1195,20 +1195,20 @@ class DocumentService:
if features.billing.enabled: if features.billing.enabled:
count = 0 count = 0
if knowledge_config.data_source.info_list.data_source_type == "upload_file": if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = ( upload_file_list = (
knowledge_config.data_source.info_list.file_info_list.file_ids knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
if knowledge_config.data_source.info_list.file_info_list if knowledge_config.data_source.info_list.file_info_list # type: ignore
else [] else []
) )
count = len(upload_file_list) count = len(upload_file_list)
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if notion_info_list: if notion_info_list:
for notion_info in notion_info_list: for notion_info in notion_info_list:
count = count + len(notion_info.pages) count = count + len(notion_info.pages)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if website_info: if website_info:
count = len(website_info.urls) count = len(website_info.urls)
batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
@ -1239,7 +1239,7 @@ class DocumentService:
dataset = Dataset( dataset = Dataset(
tenant_id=tenant_id, tenant_id=tenant_id,
name="", name="",
data_source_type=knowledge_config.data_source.info_list.data_source_type, data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore
indexing_technique=knowledge_config.indexing_technique, indexing_technique=knowledge_config.indexing_technique,
created_by=account.id, created_by=account.id,
embedding_model=knowledge_config.embedding_model, embedding_model=knowledge_config.embedding_model,

View File

@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel):
original_document_id: Optional[str] = None original_document_id: Optional[str] = None
duplicate: bool = True duplicate: bool = True
indexing_technique: Literal["high_quality", "economy"] indexing_technique: Literal["high_quality", "economy"]
data_source: DataSource data_source: Optional[DataSource] = None
process_rule: Optional[ProcessRule] = None process_rule: Optional[ProcessRule] = None
retrieval_model: Optional[RetrievalModel] = None retrieval_model: Optional[RetrievalModel] = None
doc_form: str = "text_model" doc_form: str = "text_model"

View File

@ -1001,7 +1001,7 @@ const StepTwo = ({
) )
: ( : (
<div className='flex items-center mt-8 py-2'> <div className='flex items-center mt-8 py-2'>
{!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>} <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
<Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button> <Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
</div> </div>
)} )}

View File

@ -4,6 +4,7 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useDebounceFn } from 'ahooks' import { useDebounceFn } from 'ahooks'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { createContext, useContext, useContextSelector } from 'use-context-selector' import { createContext, useContext, useContextSelector } from 'use-context-selector'
import { usePathname } from 'next/navigation'
import { useDocumentContext } from '../index' import { useDocumentContext } from '../index'
import { ProcessStatus } from '../segment-add' import { ProcessStatus } from '../segment-add'
import s from './style.module.css' import s from './style.module.css'
@ -99,6 +100,7 @@ const Completed: FC<ICompletedProps> = ({
}) => { }) => {
const { t } = useTranslation() const { t } = useTranslation()
const { notify } = useContext(ToastContext) const { notify } = useContext(ToastContext)
const pathname = usePathname()
const datasetId = useDocumentContext(s => s.datasetId) || '' const datasetId = useDocumentContext(s => s.datasetId) || ''
const documentId = useDocumentContext(s => s.documentId) || '' const documentId = useDocumentContext(s => s.documentId) || ''
const docForm = useDocumentContext(s => s.docForm) const docForm = useDocumentContext(s => s.docForm)
@ -374,6 +376,10 @@ const Completed: FC<ICompletedProps> = ({
// eslint-disable-next-line react-hooks/exhaustive-deps // eslint-disable-next-line react-hooks/exhaustive-deps
}, [segments, datasetId, documentId]) }, [segments, datasetId, documentId])
useEffect(() => {
resetList()
}, [pathname])
useEffect(() => { useEffect(() => {
if (importStatus === ProcessStatus.COMPLETED) if (importStatus === ProcessStatus.COMPLETED)
resetList() resetList()