mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-11 17:59:03 +08:00
Fix/upload limit (#2521)
Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: StyleZhang <jasonapring2015@outlook.com>
This commit is contained in:
parent
52b12ed7eb
commit
97fe817186
@ -130,3 +130,5 @@ UNSTRUCTURED_API_URL=
|
|||||||
|
|
||||||
SSRF_PROXY_HTTP_URL=
|
SSRF_PROXY_HTTP_URL=
|
||||||
SSRF_PROXY_HTTPS_URL=
|
SSRF_PROXY_HTTPS_URL=
|
||||||
|
|
||||||
|
BATCH_UPLOAD_LIMIT=10
|
@ -56,6 +56,7 @@ DEFAULTS = {
|
|||||||
'BILLING_ENABLED': 'False',
|
'BILLING_ENABLED': 'False',
|
||||||
'CAN_REPLACE_LOGO': 'False',
|
'CAN_REPLACE_LOGO': 'False',
|
||||||
'ETL_TYPE': 'dify',
|
'ETL_TYPE': 'dify',
|
||||||
|
'BATCH_UPLOAD_LIMIT': 20
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -285,6 +286,8 @@ class Config:
|
|||||||
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
|
self.BILLING_ENABLED = get_bool_env('BILLING_ENABLED')
|
||||||
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
|
self.CAN_REPLACE_LOGO = get_bool_env('CAN_REPLACE_LOGO')
|
||||||
|
|
||||||
|
self.BATCH_UPLOAD_LIMIT = get_env('BATCH_UPLOAD_LIMIT')
|
||||||
|
|
||||||
|
|
||||||
class CloudEditionConfig(Config):
|
class CloudEditionConfig(Config):
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ from models.dataset import Dataset, DatasetProcessRule, DocumentSegment
|
|||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from models.model import UploadFile
|
from models.model import UploadFile
|
||||||
from models.source import DataSourceBinding
|
from models.source import DataSourceBinding
|
||||||
|
from services.feature_service import FeatureService
|
||||||
|
|
||||||
|
|
||||||
class IndexingRunner:
|
class IndexingRunner:
|
||||||
@ -244,6 +245,14 @@ class IndexingRunner:
|
|||||||
"""
|
"""
|
||||||
Estimate the indexing for the document.
|
Estimate the indexing for the document.
|
||||||
"""
|
"""
|
||||||
|
# check document limit
|
||||||
|
features = FeatureService.get_features(tenant_id)
|
||||||
|
if features.billing.enabled:
|
||||||
|
count = len(file_details)
|
||||||
|
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
|
||||||
|
if count > batch_upload_limit:
|
||||||
|
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
|
||||||
|
|
||||||
embedding_model_instance = None
|
embedding_model_instance = None
|
||||||
if dataset_id:
|
if dataset_id:
|
||||||
dataset = Dataset.query.filter_by(
|
dataset = Dataset.query.filter_by(
|
||||||
@ -361,6 +370,14 @@ class IndexingRunner:
|
|||||||
"""
|
"""
|
||||||
Estimate the indexing for the document.
|
Estimate the indexing for the document.
|
||||||
"""
|
"""
|
||||||
|
# check document limit
|
||||||
|
features = FeatureService.get_features(tenant_id)
|
||||||
|
if features.billing.enabled:
|
||||||
|
count = len(notion_info_list)
|
||||||
|
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
|
||||||
|
if count > batch_upload_limit:
|
||||||
|
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
|
||||||
|
|
||||||
embedding_model_instance = None
|
embedding_model_instance = None
|
||||||
if dataset_id:
|
if dataset_id:
|
||||||
dataset = Dataset.query.filter_by(
|
dataset = Dataset.query.filter_by(
|
||||||
|
@ -10,6 +10,7 @@ from werkzeug.exceptions import NotFound
|
|||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from extensions.ext_redis import redis_client
|
from extensions.ext_redis import redis_client
|
||||||
from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
|
from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation
|
||||||
|
from services.feature_service import FeatureService
|
||||||
from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
|
from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
|
||||||
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
|
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
|
||||||
from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
|
from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
|
||||||
@ -284,6 +285,12 @@ class AppAnnotationService:
|
|||||||
result.append(content)
|
result.append(content)
|
||||||
if len(result) == 0:
|
if len(result) == 0:
|
||||||
raise ValueError("The CSV file is empty.")
|
raise ValueError("The CSV file is empty.")
|
||||||
|
# check annotation limit
|
||||||
|
features = FeatureService.get_features(current_user.current_tenant_id)
|
||||||
|
if features.billing.enabled:
|
||||||
|
annotation_quota_limit = features.annotation_quota_limit
|
||||||
|
if annotation_quota_limit.limit < len(result) + annotation_quota_limit.size:
|
||||||
|
raise ValueError("The number of annotations exceeds the limit of your subscription.")
|
||||||
# async job
|
# async job
|
||||||
job_id = str(uuid.uuid4())
|
job_id = str(uuid.uuid4())
|
||||||
indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
|
indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
|
||||||
|
@ -36,6 +36,7 @@ from services.errors.account import NoPermissionError
|
|||||||
from services.errors.dataset import DatasetNameDuplicateError
|
from services.errors.dataset import DatasetNameDuplicateError
|
||||||
from services.errors.document import DocumentIndexingError
|
from services.errors.document import DocumentIndexingError
|
||||||
from services.errors.file import FileNotExistsError
|
from services.errors.file import FileNotExistsError
|
||||||
|
from services.feature_service import FeatureService
|
||||||
from services.vector_service import VectorService
|
from services.vector_service import VectorService
|
||||||
from tasks.clean_notion_document_task import clean_notion_document_task
|
from tasks.clean_notion_document_task import clean_notion_document_task
|
||||||
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
||||||
@ -452,7 +453,9 @@ class DocumentService:
|
|||||||
created_from: str = 'web'):
|
created_from: str = 'web'):
|
||||||
|
|
||||||
# check document limit
|
# check document limit
|
||||||
if current_app.config['EDITION'] == 'CLOUD':
|
features = FeatureService.get_features(current_user.current_tenant_id)
|
||||||
|
|
||||||
|
if features.billing.enabled:
|
||||||
if 'original_document_id' not in document_data or not document_data['original_document_id']:
|
if 'original_document_id' not in document_data or not document_data['original_document_id']:
|
||||||
count = 0
|
count = 0
|
||||||
if document_data["data_source"]["type"] == "upload_file":
|
if document_data["data_source"]["type"] == "upload_file":
|
||||||
@ -462,6 +465,9 @@ class DocumentService:
|
|||||||
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
|
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
|
||||||
for notion_info in notion_info_list:
|
for notion_info in notion_info_list:
|
||||||
count = count + len(notion_info['pages'])
|
count = count + len(notion_info['pages'])
|
||||||
|
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
|
||||||
|
if count > batch_upload_limit:
|
||||||
|
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
|
||||||
# if dataset is empty, update dataset data_source_type
|
# if dataset is empty, update dataset data_source_type
|
||||||
if not dataset.data_source_type:
|
if not dataset.data_source_type:
|
||||||
dataset.data_source_type = document_data["data_source"]["type"]
|
dataset.data_source_type = document_data["data_source"]["type"]
|
||||||
@ -741,14 +747,20 @@ class DocumentService:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
|
def save_document_without_dataset_id(tenant_id: str, document_data: dict, account: Account):
|
||||||
count = 0
|
features = FeatureService.get_features(current_user.current_tenant_id)
|
||||||
if document_data["data_source"]["type"] == "upload_file":
|
|
||||||
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
|
if features.billing.enabled:
|
||||||
count = len(upload_file_list)
|
count = 0
|
||||||
elif document_data["data_source"]["type"] == "notion_import":
|
if document_data["data_source"]["type"] == "upload_file":
|
||||||
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
|
upload_file_list = document_data["data_source"]["info_list"]['file_info_list']['file_ids']
|
||||||
for notion_info in notion_info_list:
|
count = len(upload_file_list)
|
||||||
count = count + len(notion_info['pages'])
|
elif document_data["data_source"]["type"] == "notion_import":
|
||||||
|
notion_info_list = document_data["data_source"]['info_list']['notion_info_list']
|
||||||
|
for notion_info in notion_info_list:
|
||||||
|
count = count + len(notion_info['pages'])
|
||||||
|
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
|
||||||
|
if count > batch_upload_limit:
|
||||||
|
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
|
||||||
|
|
||||||
embedding_model = None
|
embedding_model = None
|
||||||
dataset_collection_binding_id = None
|
dataset_collection_binding_id = None
|
||||||
@ -1139,7 +1151,7 @@ class SegmentService:
|
|||||||
segment.answer = args['answer']
|
segment.answer = args['answer']
|
||||||
if 'keywords' in args and args['keywords']:
|
if 'keywords' in args and args['keywords']:
|
||||||
segment.keywords = args['keywords']
|
segment.keywords = args['keywords']
|
||||||
if'enabled' in args and args['enabled'] is not None:
|
if 'enabled' in args and args['enabled'] is not None:
|
||||||
segment.enabled = args['enabled']
|
segment.enabled = args['enabled']
|
||||||
db.session.add(segment)
|
db.session.add(segment)
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
@ -20,9 +20,9 @@ from services.errors.file import FileTooLargeError, UnsupportedFileTypeError
|
|||||||
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
|
IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'webp', 'gif', 'svg']
|
||||||
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
||||||
|
|
||||||
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv'] + IMAGE_EXTENSIONS
|
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx', 'docx', 'csv']
|
||||||
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
|
UNSTRUSTURED_ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm', 'xlsx',
|
||||||
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml'] + IMAGE_EXTENSIONS
|
'docx', 'csv', 'eml', 'msg', 'pptx', 'ppt', 'xml']
|
||||||
PREVIEW_WORDS_LIMIT = 3000
|
PREVIEW_WORDS_LIMIT = 3000
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,10 +4,12 @@ import time
|
|||||||
|
|
||||||
import click
|
import click
|
||||||
from celery import shared_task
|
from celery import shared_task
|
||||||
|
from flask import current_app
|
||||||
|
|
||||||
from core.indexing_runner import DocumentIsPausedException, IndexingRunner
|
from core.indexing_runner import DocumentIsPausedException, IndexingRunner
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
from models.dataset import Document
|
from models.dataset import Dataset, Document
|
||||||
|
from services.feature_service import FeatureService
|
||||||
|
|
||||||
|
|
||||||
@shared_task(queue='dataset')
|
@shared_task(queue='dataset')
|
||||||
@ -21,6 +23,35 @@ def document_indexing_task(dataset_id: str, document_ids: list):
|
|||||||
"""
|
"""
|
||||||
documents = []
|
documents = []
|
||||||
start_at = time.perf_counter()
|
start_at = time.perf_counter()
|
||||||
|
|
||||||
|
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
|
||||||
|
|
||||||
|
# check document limit
|
||||||
|
features = FeatureService.get_features(dataset.tenant_id)
|
||||||
|
try:
|
||||||
|
if features.billing.enabled:
|
||||||
|
vector_space = features.vector_space
|
||||||
|
count = len(document_ids)
|
||||||
|
batch_upload_limit = int(current_app.config['BATCH_UPLOAD_LIMIT'])
|
||||||
|
if count > batch_upload_limit:
|
||||||
|
raise ValueError(f"You have reached the batch upload limit of {batch_upload_limit}.")
|
||||||
|
if 0 < vector_space.limit <= vector_space.size:
|
||||||
|
raise ValueError("Your total number of documents plus the number of uploads have over the limit of "
|
||||||
|
"your subscription.")
|
||||||
|
except Exception as e:
|
||||||
|
for document_id in document_ids:
|
||||||
|
document = db.session.query(Document).filter(
|
||||||
|
Document.id == document_id,
|
||||||
|
Document.dataset_id == dataset_id
|
||||||
|
).first()
|
||||||
|
if document:
|
||||||
|
document.indexing_status = 'error'
|
||||||
|
document.error = str(e)
|
||||||
|
document.stopped_at = datetime.datetime.utcnow()
|
||||||
|
db.session.add(document)
|
||||||
|
db.session.commit()
|
||||||
|
return
|
||||||
|
|
||||||
for document_id in document_ids:
|
for document_id in document_ids:
|
||||||
logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
|
logging.info(click.style('Start process document: {}'.format(document_id), fg='green'))
|
||||||
|
|
||||||
|
@ -14,6 +14,8 @@ import { fetchSupportFileTypes } from '@/service/datasets'
|
|||||||
import I18n from '@/context/i18n'
|
import I18n from '@/context/i18n'
|
||||||
import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
|
import { LanguagesSupportedUnderscore, getModelRuntimeSupported } from '@/utils/language'
|
||||||
|
|
||||||
|
const FILES_NUMBER_LIMIT = 20
|
||||||
|
|
||||||
type IFileUploaderProps = {
|
type IFileUploaderProps = {
|
||||||
fileList: FileItem[]
|
fileList: FileItem[]
|
||||||
titleClassName?: string
|
titleClassName?: string
|
||||||
@ -176,6 +178,11 @@ const FileUploader = ({
|
|||||||
if (!files.length)
|
if (!files.length)
|
||||||
return false
|
return false
|
||||||
|
|
||||||
|
if (files.length + fileList.length > FILES_NUMBER_LIMIT) {
|
||||||
|
notify({ type: 'error', message: t('datasetCreation.stepOne.uploader.validation.filesNumber', { filesNumber: FILES_NUMBER_LIMIT }) })
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
const preparedFiles = files.map((file, index) => ({
|
const preparedFiles = files.map((file, index) => ({
|
||||||
fileID: `file${index}-${Date.now()}`,
|
fileID: `file${index}-${Date.now()}`,
|
||||||
file,
|
file,
|
||||||
@ -185,7 +192,7 @@ const FileUploader = ({
|
|||||||
prepareFileList(newFiles)
|
prepareFileList(newFiles)
|
||||||
fileListRef.current = newFiles
|
fileListRef.current = newFiles
|
||||||
uploadMultipleFiles(preparedFiles)
|
uploadMultipleFiles(preparedFiles)
|
||||||
}, [prepareFileList, uploadMultipleFiles])
|
}, [prepareFileList, uploadMultipleFiles, notify, t, fileList])
|
||||||
|
|
||||||
const handleDragEnter = (e: DragEvent) => {
|
const handleDragEnter = (e: DragEvent) => {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
|
@ -28,6 +28,7 @@ const translation = {
|
|||||||
typeError: 'File type not supported',
|
typeError: 'File type not supported',
|
||||||
size: 'File too large. Maximum is {{size}}MB',
|
size: 'File too large. Maximum is {{size}}MB',
|
||||||
count: 'Multiple files not supported',
|
count: 'Multiple files not supported',
|
||||||
|
filesNumber: 'You have reached the batch upload limit of {{filesNumber}}.',
|
||||||
},
|
},
|
||||||
cancel: 'Cancel',
|
cancel: 'Cancel',
|
||||||
change: 'Change',
|
change: 'Change',
|
||||||
|
@ -28,6 +28,7 @@ const translation = {
|
|||||||
typeError: 'Tipo de arquivo não suportado',
|
typeError: 'Tipo de arquivo não suportado',
|
||||||
size: 'Arquivo muito grande. Máximo é {{size}}MB',
|
size: 'Arquivo muito grande. Máximo é {{size}}MB',
|
||||||
count: 'Vários arquivos não suportados',
|
count: 'Vários arquivos não suportados',
|
||||||
|
filesNumber: 'Limite de upload em massa {{filesNumber}}.',
|
||||||
},
|
},
|
||||||
cancel: 'Cancelar',
|
cancel: 'Cancelar',
|
||||||
change: 'Alterar',
|
change: 'Alterar',
|
||||||
|
@ -28,6 +28,7 @@ const translation = {
|
|||||||
typeError: 'Тип файлу не підтримується',
|
typeError: 'Тип файлу не підтримується',
|
||||||
size: 'Файл занадто великий. Максимум – {{size}} МБ',
|
size: 'Файл занадто великий. Максимум – {{size}} МБ',
|
||||||
count: 'Не підтримується завантаження кількох файлів',
|
count: 'Не підтримується завантаження кількох файлів',
|
||||||
|
filesNumber: 'Ліміт масового завантаження {{filesNumber}}.',
|
||||||
},
|
},
|
||||||
cancel: 'Скасувати',
|
cancel: 'Скасувати',
|
||||||
change: 'Змінити',
|
change: 'Змінити',
|
||||||
|
@ -28,6 +28,7 @@ const translation = {
|
|||||||
typeError: '文件类型不支持',
|
typeError: '文件类型不支持',
|
||||||
size: '文件太大了,不能超过 {{size}}MB',
|
size: '文件太大了,不能超过 {{size}}MB',
|
||||||
count: '暂不支持多个文件',
|
count: '暂不支持多个文件',
|
||||||
|
filesNumber: '批量上传限制 {{filesNumber}}。',
|
||||||
},
|
},
|
||||||
cancel: '取消',
|
cancel: '取消',
|
||||||
change: '更改文件',
|
change: '更改文件',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user