diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index a13a5997a7..46ded0244f 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -61,6 +61,10 @@ class AppExecutionConfig(BaseSettings): description="Maximum number of concurrent active requests per app (0 for unlimited)", default=0, ) + APP_DAILY_RATE_LIMIT: NonNegativeInt = Field( + description="Maximum number of requests per app per day", + default=5000, + ) class CodeExecutionSandboxConfig(BaseSettings): diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index d8b7225aae..2e077d2095 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -10,9 +10,14 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound import services from configs import dify_config from controllers.console import api -from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync +from controllers.console.app.error import ( + ConversationCompletedError, + DraftWorkflowNotExist, + DraftWorkflowNotSync, +) from controllers.console.app.wraps import get_app_model from controllers.console.wraps import account_initialization_required, setup_required +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db @@ -27,6 +32,7 @@ from models.account import Account from models.model import AppMode from services.app_generate_service import AppGenerateService from services.errors.app import WorkflowHashNotEqualError +from services.errors.llm import InvokeRateLimitError from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService logger = logging.getLogger(__name__) @@ -168,6 +174,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource): raise NotFound("Conversation Not Exists.") except services.errors.conversation.ConversationCompletedError: raise ConversationCompletedError() + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except ValueError as e: raise e except Exception: @@ -344,15 +352,18 @@ class DraftWorkflowRunApi(Resource): parser.add_argument("files", type=list, required=False, location="json") args = parser.parse_args() - response = AppGenerateService.generate( - app_model=app_model, - user=current_user, - args=args, - invoke_from=InvokeFrom.DEBUGGER, - streaming=True, - ) + try: + response = AppGenerateService.generate( + app_model=app_model, + user=current_user, + args=args, + invoke_from=InvokeFrom.DEBUGGER, + streaming=True, + ) - return helper.compact_generate_response(response) + return helper.compact_generate_response(response) + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) class WorkflowTaskStopApi(Resource): diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py index 1af3cf21f7..e693a5a71b 100644 --- a/api/controllers/console/explore/completion.py +++ b/api/controllers/console/explore/completion.py @@ -16,6 +16,7 @@ from controllers.console.app.error import ( ) from controllers.console.explore.error import NotChatAppError, NotCompletionAppError from controllers.console.explore.wraps import InstalledAppResource +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ( @@ -29,6 +30,7 @@ from libs import helper from libs.helper import uuid_value from models.model import AppMode from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError # define completion api for user @@ -75,7 +77,7 @@ class CompletionApi(InstalledAppResource): raise CompletionRequestError(e.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() @@ -133,9 +135,11 @@ class ChatApi(InstalledAppResource): raise ProviderModelCurrentlyNotSupportError() except InvokeError as e: raise CompletionRequestError(e.description) + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() diff --git a/api/controllers/console/explore/workflow.py b/api/controllers/console/explore/workflow.py index bca837d66e..a2653a94f6 100644 --- a/api/controllers/console/explore/workflow.py +++ b/api/controllers/console/explore/workflow.py @@ -11,6 +11,7 @@ from controllers.console.app.error import ( ) from controllers.console.explore.error import NotWorkflowAppError from controllers.console.explore.wraps import InstalledAppResource +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ( @@ -23,6 +24,7 @@ from libs import helper from libs.login import current_user from models.model import AppMode, InstalledApp from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError logger = logging.getLogger(__name__) @@ -56,9 +58,11 @@ class InstalledAppWorkflowRunApi(InstalledAppResource): raise ProviderModelCurrentlyNotSupportError() except InvokeError as e: raise CompletionRequestError(e.description) + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() diff --git a/api/controllers/service_api/app/completion.py b/api/controllers/service_api/app/completion.py index 647efc8149..38a65b7a90 100644 --- a/api/controllers/service_api/app/completion.py +++ b/api/controllers/service_api/app/completion.py @@ -15,6 +15,7 @@ from controllers.service_api.app.error import ( ProviderQuotaExceededError, ) from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ( @@ -27,6 +28,7 @@ from libs import helper from libs.helper import uuid_value from models.model import App, AppMode, EndUser from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError class CompletionApi(Resource): @@ -75,7 +77,7 @@ class CompletionApi(Resource): raise CompletionRequestError(e.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() @@ -130,11 +132,13 @@ class ChatApi(Resource): raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except InvokeError as e: raise CompletionRequestError(e.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() diff --git a/api/controllers/service_api/app/workflow.py b/api/controllers/service_api/app/workflow.py index db8f031547..0cf852cfba 100644 --- a/api/controllers/service_api/app/workflow.py +++ b/api/controllers/service_api/app/workflow.py @@ -15,6 +15,7 @@ from controllers.service_api.app.error import ( ProviderQuotaExceededError, ) from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ( @@ -29,6 +30,7 @@ from libs import helper from models.model import App, AppMode, EndUser from models.workflow import WorkflowRun, WorkflowRunStatus from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError from services.workflow_app_service import WorkflowAppService logger = logging.getLogger(__name__) @@ -93,11 +95,13 @@ class WorkflowRunApi(Resource): raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except InvokeError as e: raise CompletionRequestError(e.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() diff --git a/api/controllers/web/workflow.py b/api/controllers/web/workflow.py index 59c5193b58..d2e183be78 100644 --- a/api/controllers/web/workflow.py +++ b/api/controllers/web/workflow.py @@ -11,6 +11,7 @@ from controllers.web.error import ( ProviderNotInitializeError, ProviderQuotaExceededError, ) +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from controllers.web.wraps import WebApiResource from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom @@ -23,6 +24,7 @@ from core.model_runtime.errors.invoke import InvokeError from libs import helper from models.model import App, AppMode, EndUser from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError logger = logging.getLogger(__name__) @@ -55,9 +57,11 @@ class WorkflowRunApi(WebApiResource): raise ProviderModelCurrentlyNotSupportError() except InvokeError as e: raise CompletionRequestError(e.description) + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except ValueError as e: raise e - except Exception as e: + except Exception: logging.exception("internal server error.") raise InternalServerError() diff --git a/api/services/app_generate_service.py b/api/services/app_generate_service.py index cbea47f0b4..245c123a04 100644 --- a/api/services/app_generate_service.py +++ b/api/services/app_generate_service.py @@ -11,13 +11,17 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator from core.app.apps.workflow.app_generator import WorkflowAppGenerator from core.app.entities.app_invoke_entities import InvokeFrom from core.app.features.rate_limiting import RateLimit +from libs.helper import RateLimiter from models.model import Account, App, AppMode, EndUser from models.workflow import Workflow +from services.billing_service import BillingService from services.errors.llm import InvokeRateLimitError from services.workflow_service import WorkflowService class AppGenerateService: + system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400) + @classmethod def generate( cls, @@ -36,6 +40,19 @@ class AppGenerateService: :param streaming: streaming :return: """ + # system level rate limiter + if dify_config.BILLING_ENABLED: + # check if it's free plan + limit_info = BillingService.get_info(app_model.tenant_id) + if limit_info["subscription"]["plan"] == "sandbox": + if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id): + raise InvokeRateLimitError( + "Rate limit exceeded, please upgrade your plan " + f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day" + ) + cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id) + + # app level rate limiter max_active_request = AppGenerateService._get_max_active_requests(app_model) rate_limit = RateLimit(app_model.id, max_active_request) request_id = RateLimit.gen_request_key()