diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index dac0a6a772..c55555451b 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -859,7 +859,7 @@ class DocumentService: position = DocumentService.get_documents_position(dataset.id) document_ids = [] duplicate_document_ids = [] - if knowledge_config.data_source.info_list.data_source_type == "upload_file": + if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore for file_id in upload_file_list: file = ( @@ -901,7 +901,7 @@ class DocumentService: document = DocumentService.build_document( dataset, dataset_process_rule.id, # type: ignore - knowledge_config.data_source.info_list.data_source_type, + knowledge_config.data_source.info_list.data_source_type, # type: ignore knowledge_config.doc_form, knowledge_config.doc_language, data_source_info, @@ -916,8 +916,8 @@ class DocumentService: document_ids.append(document.id) documents.append(document) position += 1 - elif knowledge_config.data_source.info_list.data_source_type == "notion_import": - notion_info_list = knowledge_config.data_source.info_list.notion_info_list + elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore + notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore if not notion_info_list: raise ValueError("No notion info list found.") exist_page_ids = [] @@ -956,7 +956,7 @@ class DocumentService: document = DocumentService.build_document( dataset, dataset_process_rule.id, # type: ignore - knowledge_config.data_source.info_list.data_source_type, + knowledge_config.data_source.info_list.data_source_type, # type: ignore knowledge_config.doc_form, knowledge_config.doc_language, data_source_info, @@ -976,8 +976,8 @@ class DocumentService: # delete not selected documents if len(exist_document) > 0: clean_notion_document_task.delay(list(exist_document.values()), dataset.id) - elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": - website_info = knowledge_config.data_source.info_list.website_info_list + elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore + website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore if not website_info: raise ValueError("No website info list found.") urls = website_info.urls @@ -996,7 +996,7 @@ class DocumentService: document = DocumentService.build_document( dataset, dataset_process_rule.id, # type: ignore - knowledge_config.data_source.info_list.data_source_type, + knowledge_config.data_source.info_list.data_source_type, # type: ignore knowledge_config.doc_form, knowledge_config.doc_language, data_source_info, @@ -1195,20 +1195,20 @@ class DocumentService: if features.billing.enabled: count = 0 - if knowledge_config.data_source.info_list.data_source_type == "upload_file": + if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore upload_file_list = ( - knowledge_config.data_source.info_list.file_info_list.file_ids - if knowledge_config.data_source.info_list.file_info_list + knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore + if knowledge_config.data_source.info_list.file_info_list # type: ignore else [] ) count = len(upload_file_list) - elif knowledge_config.data_source.info_list.data_source_type == "notion_import": - notion_info_list = knowledge_config.data_source.info_list.notion_info_list + elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore + notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore if notion_info_list: for notion_info in notion_info_list: count = count + len(notion_info.pages) - elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": - website_info = knowledge_config.data_source.info_list.website_info_list + elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore + website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore if website_info: count = len(website_info.urls) batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT) @@ -1239,7 +1239,7 @@ class DocumentService: dataset = Dataset( tenant_id=tenant_id, name="", - data_source_type=knowledge_config.data_source.info_list.data_source_type, + data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore indexing_technique=knowledge_config.indexing_technique, created_by=account.id, embedding_model=knowledge_config.embedding_model, diff --git a/api/services/entities/knowledge_entities/knowledge_entities.py b/api/services/entities/knowledge_entities/knowledge_entities.py index 76d9c28812..8d6a246b64 100644 --- a/api/services/entities/knowledge_entities/knowledge_entities.py +++ b/api/services/entities/knowledge_entities/knowledge_entities.py @@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel): original_document_id: Optional[str] = None duplicate: bool = True indexing_technique: Literal["high_quality", "economy"] - data_source: DataSource + data_source: Optional[DataSource] = None process_rule: Optional[ProcessRule] = None retrieval_model: Optional[RetrievalModel] = None doc_form: str = "text_model" diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 11984d71c6..d6fa45b4fb 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1001,7 +1001,7 @@ const StepTwo = ({ ) : (