From 155a4733f6e6f7bcd41d010622138c404646d95a Mon Sep 17 00:00:00 2001 From: Matri Date: Wed, 16 Aug 2023 23:14:27 +0800 Subject: [PATCH] Feat/customizable file upload config (#818) --- api/config.py | 8 +- .../console/datasets/data_source.py | 4 - api/controllers/console/datasets/file.py | 23 +++- .../datasets/create/file-preview/index.tsx | 4 +- .../datasets/create/file-uploader/index.tsx | 106 +++++++++--------- web/app/components/datasets/create/index.tsx | 10 +- .../datasets/create/step-one/index.tsx | 8 +- .../datasets/create/step-two/index.tsx | 4 +- web/i18n/lang/dataset-creation.en.ts | 4 +- web/i18n/lang/dataset-creation.zh.ts | 4 +- web/models/common.ts | 5 + web/models/datasets.ts | 20 ++-- web/service/common.ts | 5 + 13 files changed, 122 insertions(+), 83 deletions(-) diff --git a/api/config.py b/api/config.py index 51e24cd825..aaa45dfe26 100644 --- a/api/config.py +++ b/api/config.py @@ -62,7 +62,9 @@ DEFAULTS = { 'HOSTED_ANTHROPIC_PAID_ENABLED': 'False', 'HOSTED_ANTHROPIC_PAID_INCREASE_QUOTA': 1, 'TENANT_DOCUMENT_COUNT': 100, - 'CLEAN_DAY_SETTING': 30 + 'CLEAN_DAY_SETTING': 30, + 'UPLOAD_FILE_SIZE_LIMIT': 15, + 'UPLOAD_FILE_BATCH_LIMIT': 5, } @@ -244,6 +246,10 @@ class Config: self.TENANT_DOCUMENT_COUNT = get_env('TENANT_DOCUMENT_COUNT') self.CLEAN_DAY_SETTING = get_env('CLEAN_DAY_SETTING') + # uploading settings + self.UPLOAD_FILE_SIZE_LIMIT = int(get_env('UPLOAD_FILE_SIZE_LIMIT')) + self.UPLOAD_FILE_BATCH_LIMIT = int(get_env('UPLOAD_FILE_BATCH_LIMIT')) + class CloudEditionConfig(Config): diff --git a/api/controllers/console/datasets/data_source.py b/api/controllers/console/datasets/data_source.py index 65f8225f12..d65ef851be 100644 --- a/api/controllers/console/datasets/data_source.py +++ b/api/controllers/console/datasets/data_source.py @@ -21,10 +21,6 @@ from tasks.document_indexing_sync_task import document_indexing_sync_task cache = TTLCache(maxsize=None, ttl=30) -FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB -ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm'] -PREVIEW_WORDS_LIMIT = 3000 - class DataSourceApi(Resource): integrate_icon_fields = { diff --git a/api/controllers/console/datasets/file.py b/api/controllers/console/datasets/file.py index 2d6a25e91b..d208d7a25c 100644 --- a/api/controllers/console/datasets/file.py +++ b/api/controllers/console/datasets/file.py @@ -25,12 +25,28 @@ from models.model import UploadFile cache = TTLCache(maxsize=None, ttl=30) -FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx'] PREVIEW_WORDS_LIMIT = 3000 class FileApi(Resource): + upload_config_fields = { + 'file_size_limit': fields.Integer, + 'batch_count_limit': fields.Integer + } + + @setup_required + @login_required + @account_initialization_required + @marshal_with(upload_config_fields) + def get(self): + file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT") + batch_count_limit = current_app.config.get("UPLOAD_FILE_BATCH_LIMIT") + return { + 'file_size_limit': file_size_limit, + 'batch_count_limit': batch_count_limit + }, 200 + file_fields = { 'id': fields.String, 'name': fields.String, @@ -60,8 +76,9 @@ class FileApi(Resource): file_content = file.read() file_size = len(file_content) - if file_size > FILE_SIZE_LIMIT: - message = "({file_size} > {FILE_SIZE_LIMIT})" + file_size_limit = current_app.config.get("UPLOAD_FILE_SIZE_LIMIT") * 1024 * 1024 + if file_size > file_size_limit: + message = "({file_size} > {file_size_limit})" raise FileTooLargeError(message) extension = file.filename.split('.')[-1] diff --git a/web/app/components/datasets/create/file-preview/index.tsx b/web/app/components/datasets/create/file-preview/index.tsx index b51f21c41a..cd3dcd2e45 100644 --- a/web/app/components/datasets/create/file-preview/index.tsx +++ b/web/app/components/datasets/create/file-preview/index.tsx @@ -4,7 +4,7 @@ import { useTranslation } from 'react-i18next' import cn from 'classnames' import { XMarkIcon } from '@heroicons/react/20/solid' import s from './index.module.css' -import type { File } from '@/models/datasets' +import type { CustomFile as File } from '@/models/datasets' import { fetchFilePreview } from '@/service/common' type IProps = { @@ -37,7 +37,7 @@ const FilePreview = ({ } useEffect(() => { - if (file) { + if (file?.id) { setLoading(true) getPreviewContent(file.id) } diff --git a/web/app/components/datasets/create/file-uploader/index.tsx b/web/app/components/datasets/create/file-uploader/index.tsx index eb5b007e6a..32926c733c 100644 --- a/web/app/components/datasets/create/file-uploader/index.tsx +++ b/web/app/components/datasets/create/file-uploader/index.tsx @@ -1,21 +1,23 @@ 'use client' -import React, { useEffect, useRef, useState } from 'react' +import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import cn from 'classnames' +import useSWR from 'swr' import s from './index.module.css' -import type { File as FileEntity } from '@/models/datasets' +import type { CustomFile as File, FileItem } from '@/models/datasets' import { ToastContext } from '@/app/components/base/toast' import { upload } from '@/service/base' +import { fetchFileUploadConfig } from '@/service/common' type IFileUploaderProps = { - fileList: any[] + fileList: FileItem[] titleClassName?: string - prepareFileList: (files: any[]) => void - onFileUpdate: (fileItem: any, progress: number, list: any[]) => void + prepareFileList: (files: FileItem[]) => void + onFileUpdate: (fileItem: FileItem, progress: number, list: FileItem[]) => void onFileListUpdate?: (files: any) => void - onPreview: (file: FileEntity) => void + onPreview: (file: File) => void } const ACCEPTS = [ @@ -30,9 +32,6 @@ const ACCEPTS = [ '.csv', ] -const MAX_SIZE = 15 * 1024 * 1024 -const BATCH_COUNT = 5 - const FileUploader = ({ fileList, titleClassName, @@ -48,7 +47,13 @@ const FileUploader = ({ const dragRef = useRef(null) const fileUploader = useRef(null) - const fileListRef = useRef([]) + const { data: fileUploadConfigResponse } = useSWR({ url: '/files/upload' }, fetchFileUploadConfig) + const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? { + file_size_limit: 15, + batch_count_limit: 5, + }, [fileUploadConfigResponse]) + + const fileListRef = useRef([]) // utils const getFileType = (currentFile: File) => { @@ -66,21 +71,21 @@ const FileUploader = ({ return `${(size / 1024 / 1024).toFixed(2)}MB` } - const isValid = (file: File) => { + const isValid = useCallback((file: File) => { const { size } = file const ext = `.${getFileType(file)}` const isValidType = ACCEPTS.includes(ext) if (!isValidType) notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.typeError') }) - const isValidSize = size <= MAX_SIZE + const isValidSize = size <= fileUploadConfig.file_size_limit * 1024 * 1024 if (!isValidSize) - notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size') }) + notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.size', { size: fileUploadConfig.file_size_limit }) }) return isValidType && isValidSize - } + }, [fileUploadConfig, notify, t]) - const fileUpload = async (fileItem: any) => { + const fileUpload = useCallback(async (fileItem: FileItem): Promise => { const formData = new FormData() formData.append('file', fileItem.file) const onProgress = (e: ProgressEvent) => { @@ -90,19 +95,19 @@ const FileUploader = ({ } } + const fileListCopy = fileListRef.current return upload({ xhr: new XMLHttpRequest(), data: formData, onprogress: onProgress, }) - .then((res: FileEntity) => { - const fileListCopy = fileListRef.current - + .then((res: File) => { const completeFile = { fileID: fileItem.fileID, file: res, + progress: -1, } - const index = fileListCopy.findIndex((item: any) => item.fileID === fileItem.fileID) + const index = fileListCopy.findIndex(item => item.fileID === fileItem.fileID) fileListCopy[index] = completeFile onFileUpdate(completeFile, 100, fileListCopy) return Promise.resolve({ ...completeFile }) @@ -113,42 +118,44 @@ const FileUploader = ({ return Promise.resolve({ ...fileItem }) }) .finally() - } - const uploadBatchFiles = (bFiles: any) => { - bFiles.forEach((bf: any) => (bf.progress = 0)) - return Promise.all(bFiles.map((bFile: any) => fileUpload(bFile))) - } - const uploadMultipleFiles = async (files: any) => { + }, [fileListRef, notify, onFileUpdate, t]) + + const uploadBatchFiles = useCallback((bFiles: FileItem[]) => { + bFiles.forEach(bf => (bf.progress = 0)) + return Promise.all(bFiles.map(fileUpload)) + }, [fileUpload]) + + const uploadMultipleFiles = useCallback(async (files: FileItem[]) => { + const batchCountLimit = fileUploadConfig.batch_count_limit const length = files.length let start = 0 let end = 0 while (start < length) { - if (start + BATCH_COUNT > length) + if (start + batchCountLimit > length) end = length else - end = start + BATCH_COUNT + end = start + batchCountLimit const bFiles = files.slice(start, end) await uploadBatchFiles(bFiles) start = end } - } - const initialUpload = (files: any) => { + }, [fileUploadConfig, uploadBatchFiles]) + + const initialUpload = useCallback((files: File[]) => { if (!files.length) return false - const preparedFiles = files.map((file: any, index: number) => { - const fileItem = { - fileID: `file${index}-${Date.now()}`, - file, - progress: -1, - } - return fileItem - }) + const preparedFiles = files.map((file, index) => ({ + fileID: `file${index}-${Date.now()}`, + file, + progress: -1, + })) const newFiles = [...fileListRef.current, ...preparedFiles] prepareFileList(newFiles) fileListRef.current = newFiles uploadMultipleFiles(preparedFiles) - } + }, [prepareFileList, uploadMultipleFiles]) + const handleDragEnter = (e: DragEvent) => { e.preventDefault() e.stopPropagation() @@ -164,18 +171,17 @@ const FileUploader = ({ e.target === dragRef.current && setDragging(false) } - const handleDrop = (e: DragEvent) => { + const handleDrop = useCallback((e: DragEvent) => { e.preventDefault() e.stopPropagation() setDragging(false) if (!e.dataTransfer) return - const files = [...e.dataTransfer.files] - const validFiles = files.filter(file => isValid(file)) - // fileUpload(files[0]) + const files = [...e.dataTransfer.files] as File[] + const validFiles = files.filter(isValid) initialUpload(validFiles) - } + }, [initialUpload, isValid]) const selectHandle = () => { if (fileUploader.current) @@ -186,13 +192,13 @@ const FileUploader = ({ if (fileUploader.current) fileUploader.current.value = '' - fileListRef.current = fileListRef.current.filter((item: any) => item.fileID !== fileID) + fileListRef.current = fileListRef.current.filter(item => item.fileID !== fileID) onFileListUpdate?.([...fileListRef.current]) } - const fileChangeHandle = (e: React.ChangeEvent) => { - const files = [...(e.target.files ?? [])].filter(file => isValid(file)) - initialUpload(files) - } + const fileChangeHandle = useCallback((e: React.ChangeEvent) => { + const files = [...(e.target.files ?? [])] as File[] + initialUpload(files.filter(isValid)) + }, [isValid, initialUpload]) useEffect(() => { dropRef.current?.addEventListener('dragenter', handleDragEnter) @@ -205,7 +211,7 @@ const FileUploader = ({ dropRef.current?.removeEventListener('dragleave', handleDragLeave) dropRef.current?.removeEventListener('drop', handleDrop) } - }, []) + }, [handleDrop]) return (
@@ -225,7 +231,7 @@ const FileUploader = ({ {t('datasetCreation.stepOne.uploader.button')}
-
{t('datasetCreation.stepOne.uploader.tip')}
+
{t('datasetCreation.stepOne.uploader.tip', { size: fileUploadConfig.file_size_limit })}
{dragging &&
}
diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index dd33ae7b66..8a4ae5e8fe 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -8,7 +8,7 @@ import StepOne from './step-one' import StepTwo from './step-two' import StepThree from './step-three' import { DataSourceType } from '@/models/datasets' -import type { DataSet, createDocumentResponse } from '@/models/datasets' +import type { DataSet, FileItem, createDocumentResponse } from '@/models/datasets' import { fetchDataSource, fetchTenantInfo } from '@/service/common' import { fetchDataDetail } from '@/service/datasets' import type { DataSourceNotionPage } from '@/models/common' @@ -30,7 +30,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const [dataSourceType, setDataSourceType] = useState(DataSourceType.FILE) const [step, setStep] = useState(1) const [indexingTypeCache, setIndexTypeCache] = useState('') - const [fileList, setFiles] = useState([]) + const [fileList, setFiles] = useState([]) const [result, setResult] = useState() const [hasError, setHasError] = useState(false) @@ -39,12 +39,12 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { setNotionPages(value) } - const updateFileList = (preparedFiles: any) => { + const updateFileList = (preparedFiles: FileItem[]) => { setFiles(preparedFiles) } - const updateFile = (fileItem: any, progress: number, list: any[]) => { - const targetIndex = list.findIndex((file: any) => file.fileID === fileItem.fileID) + const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => { + const targetIndex = list.findIndex(file => file.fileID === fileItem.fileID) list[targetIndex] = { ...list[targetIndex], progress, diff --git a/web/app/components/datasets/create/step-one/index.tsx b/web/app/components/datasets/create/step-one/index.tsx index f865be6573..7a9bf694ec 100644 --- a/web/app/components/datasets/create/step-one/index.tsx +++ b/web/app/components/datasets/create/step-one/index.tsx @@ -7,7 +7,7 @@ import FileUploader from '../file-uploader' import NotionPagePreview from '../notion-page-preview' import EmptyDatasetCreationModal from '../empty-dataset-creation-modal' import s from './index.module.css' -import type { File } from '@/models/datasets' +import type { FileItem } from '@/models/datasets' import type { DataSourceNotionPage } from '@/models/common' import { DataSourceType } from '@/models/datasets' import Button from '@/app/components/base/button' @@ -20,9 +20,9 @@ type IStepOneProps = { dataSourceTypeDisable: Boolean hasConnection: boolean onSetting: () => void - files: any[] - updateFileList: (files: any[]) => void - updateFile: (fileItem: any, progress: number, list: any[]) => void + files: FileItem[] + updateFileList: (files: FileItem[]) => void + updateFile: (fileItem: FileItem, progress: number, list: FileItem[]) => void notionPages?: any[] updateNotionPages: (value: any[]) => void onStepChange: () => void diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 45d0b4d74c..f72fe9ef6d 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -9,7 +9,7 @@ import Link from 'next/link' import { groupBy } from 'lodash-es' import PreviewItem, { PreviewType } from './preview-item' import s from './index.module.css' -import type { CreateDocumentReq, File, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets' +import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets' import { createDocument, createFirstDocument, @@ -39,7 +39,7 @@ type StepTwoProps = { datasetId?: string indexingType?: string dataSourceType: DataSourceType - files: File[] + files: CustomFile[] notionPages?: Page[] onStepChange?: (delta: number) => void updateIndexingTypeCache?: (type: string) => void diff --git a/web/i18n/lang/dataset-creation.en.ts b/web/i18n/lang/dataset-creation.en.ts index 2b1bd104a4..d218536e32 100644 --- a/web/i18n/lang/dataset-creation.en.ts +++ b/web/i18n/lang/dataset-creation.en.ts @@ -23,10 +23,10 @@ const translation = { title: 'Upload text file', button: 'Drag and drop file, or', browse: 'Browse', - tip: 'Supports txt, html, markdown, xlsx, and pdf. Max 15MB each.', + tip: 'Supports txt, html, markdown, xlsx, and pdf. Max {{size}}MB each.', validation: { typeError: 'File type not supported', - size: 'File too large. Maximum is 15MB', + size: 'File too large. Maximum is {{size}}MB', count: 'Multiple files not supported', }, cancel: 'Cancel', diff --git a/web/i18n/lang/dataset-creation.zh.ts b/web/i18n/lang/dataset-creation.zh.ts index 8c15c51011..65b6d13fe0 100644 --- a/web/i18n/lang/dataset-creation.zh.ts +++ b/web/i18n/lang/dataset-creation.zh.ts @@ -23,10 +23,10 @@ const translation = { title: '上传文本文件', button: '拖拽文件至此,或者', browse: '选择文件', - tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 15 MB。', + tip: '已支持 TXT、 HTML、 Markdown、 PDF、 XLSX,每个文件不超过 {{size}}MB。', validation: { typeError: '文件类型不支持', - size: '文件太大了,不能超过 15MB', + size: '文件太大了,不能超过 {{size}}MB', count: '暂不支持多个文件', }, cancel: '取消', diff --git a/web/models/common.ts b/web/models/common.ts index 7482668e7b..ec00b90ddd 100644 --- a/web/models/common.ts +++ b/web/models/common.ts @@ -168,3 +168,8 @@ export type PluginProvider = { api_key: string } | null } + +export type FileUploadConfigResponse = { + file_size_limit: number + batch_count_limit: number +} diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 7f8398a8fc..d5ff8dd973 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -24,14 +24,18 @@ export type DataSet = { word_count: number } -export type File = { - id: string - name: string - size: number - extension: string - mime_type: string - created_by: string - created_at: number +export type CustomFile = File & { + id?: string + extension?: string + mime_type?: string + created_by?: string + created_at?: number +} + +export type FileItem = { + fileID: string + file: CustomFile + progress: number } export type DataSetListResponse = { diff --git a/web/service/common.ts b/web/service/common.ts index 4b98ea003d..efcb38a502 100644 --- a/web/service/common.ts +++ b/web/service/common.ts @@ -2,6 +2,7 @@ import type { Fetcher } from 'swr' import { del, get, patch, post, put } from './base' import type { AccountIntegrate, CommonResponse, DataSourceNotion, + FileUploadConfigResponse, ICurrentWorkspace, IWorkspace, LangGeniusVersionResponse, Member, OauthResponse, PluginProvider, Provider, ProviderAnthropicToken, ProviderAzureToken, @@ -178,3 +179,7 @@ export const updateDefaultModel: Fetcher = (url) => { return post(url) as Promise<{ type: string; redirect_url?: string; result?: string }> } + +export const fetchFileUploadConfig: Fetcher = ({ url }) => { + return get(url) as Promise +}