feat: tenant app invocations limiter (#16221)

This commit is contained in:
Yeuoly 2025-03-19 17:24:02 +08:00 committed by GitHub
parent c3c957bb80
commit c07af5a1a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 68 additions and 16 deletions

View File

@ -61,6 +61,10 @@ class AppExecutionConfig(BaseSettings):
description="Maximum number of concurrent active requests per app (0 for unlimited)", description="Maximum number of concurrent active requests per app (0 for unlimited)",
default=0, default=0,
) )
APP_DAILY_RATE_LIMIT: NonNegativeInt = Field(
description="Maximum number of requests per app per day",
default=5000,
)
class CodeExecutionSandboxConfig(BaseSettings): class CodeExecutionSandboxConfig(BaseSettings):

View File

@ -10,9 +10,14 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services import services
from configs import dify_config from configs import dify_config
from controllers.console import api from controllers.console import api
from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync from controllers.console.app.error import (
ConversationCompletedError,
DraftWorkflowNotExist,
DraftWorkflowNotSync,
)
from controllers.console.app.wraps import get_app_model from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required from controllers.console.wraps import account_initialization_required, setup_required
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db from extensions.ext_database import db
@ -27,6 +32,7 @@ from models.account import Account
from models.model import AppMode from models.model import AppMode
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.app import WorkflowHashNotEqualError from services.errors.app import WorkflowHashNotEqualError
from services.errors.llm import InvokeRateLimitError
from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -168,6 +174,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
raise NotFound("Conversation Not Exists.") raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError: except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError() raise ConversationCompletedError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception: except Exception:
@ -344,15 +352,18 @@ class DraftWorkflowRunApi(Resource):
parser.add_argument("files", type=list, required=False, location="json") parser.add_argument("files", type=list, required=False, location="json")
args = parser.parse_args() args = parser.parse_args()
response = AppGenerateService.generate( try:
app_model=app_model, response = AppGenerateService.generate(
user=current_user, app_model=app_model,
args=args, user=current_user,
invoke_from=InvokeFrom.DEBUGGER, args=args,
streaming=True, invoke_from=InvokeFrom.DEBUGGER,
) streaming=True,
)
return helper.compact_generate_response(response) return helper.compact_generate_response(response)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
class WorkflowTaskStopApi(Resource): class WorkflowTaskStopApi(Resource):

View File

@ -16,6 +16,7 @@ from controllers.console.app.error import (
) )
from controllers.console.explore.error import NotChatAppError, NotCompletionAppError from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
from controllers.console.explore.wraps import InstalledAppResource from controllers.console.explore.wraps import InstalledAppResource
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import ( from core.errors.error import (
@ -29,6 +30,7 @@ from libs import helper
from libs.helper import uuid_value from libs.helper import uuid_value
from models.model import AppMode from models.model import AppMode
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
# define completion api for user # define completion api for user
@ -75,7 +77,7 @@ class CompletionApi(InstalledAppResource):
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()
@ -133,9 +135,11 @@ class ChatApi(InstalledAppResource):
raise ProviderModelCurrentlyNotSupportError() raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

View File

@ -11,6 +11,7 @@ from controllers.console.app.error import (
) )
from controllers.console.explore.error import NotWorkflowAppError from controllers.console.explore.error import NotWorkflowAppError
from controllers.console.explore.wraps import InstalledAppResource from controllers.console.explore.wraps import InstalledAppResource
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import ( from core.errors.error import (
@ -23,6 +24,7 @@ from libs import helper
from libs.login import current_user from libs.login import current_user
from models.model import AppMode, InstalledApp from models.model import AppMode, InstalledApp
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -56,9 +58,11 @@ class InstalledAppWorkflowRunApi(InstalledAppResource):
raise ProviderModelCurrentlyNotSupportError() raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

View File

@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
ProviderQuotaExceededError, ProviderQuotaExceededError,
) )
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import ( from core.errors.error import (
@ -27,6 +28,7 @@ from libs import helper
from libs.helper import uuid_value from libs.helper import uuid_value
from models.model import App, AppMode, EndUser from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
class CompletionApi(Resource): class CompletionApi(Resource):
@ -75,7 +77,7 @@ class CompletionApi(Resource):
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()
@ -130,11 +132,13 @@ class ChatApi(Resource):
raise ProviderQuotaExceededError() raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError: except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError() raise ProviderModelCurrentlyNotSupportError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

View File

@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
ProviderQuotaExceededError, ProviderQuotaExceededError,
) )
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import ( from core.errors.error import (
@ -29,6 +30,7 @@ from libs import helper
from models.model import App, AppMode, EndUser from models.model import App, AppMode, EndUser
from models.workflow import WorkflowRun, WorkflowRunStatus from models.workflow import WorkflowRun, WorkflowRunStatus
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
from services.workflow_app_service import WorkflowAppService from services.workflow_app_service import WorkflowAppService
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -93,11 +95,13 @@ class WorkflowRunApi(Resource):
raise ProviderQuotaExceededError() raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError: except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError() raise ProviderModelCurrentlyNotSupportError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

View File

@ -11,6 +11,7 @@ from controllers.web.error import (
ProviderNotInitializeError, ProviderNotInitializeError,
ProviderQuotaExceededError, ProviderQuotaExceededError,
) )
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from controllers.web.wraps import WebApiResource from controllers.web.wraps import WebApiResource
from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
@ -23,6 +24,7 @@ from core.model_runtime.errors.invoke import InvokeError
from libs import helper from libs import helper
from models.model import App, AppMode, EndUser from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -55,9 +57,11 @@ class WorkflowRunApi(WebApiResource):
raise ProviderModelCurrentlyNotSupportError() raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e: except InvokeError as e:
raise CompletionRequestError(e.description) raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e: except ValueError as e:
raise e raise e
except Exception as e: except Exception:
logging.exception("internal server error.") logging.exception("internal server error.")
raise InternalServerError() raise InternalServerError()

View File

@ -11,13 +11,17 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator
from core.app.apps.workflow.app_generator import WorkflowAppGenerator from core.app.apps.workflow.app_generator import WorkflowAppGenerator
from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.app_invoke_entities import InvokeFrom
from core.app.features.rate_limiting import RateLimit from core.app.features.rate_limiting import RateLimit
from libs.helper import RateLimiter
from models.model import Account, App, AppMode, EndUser from models.model import Account, App, AppMode, EndUser
from models.workflow import Workflow from models.workflow import Workflow
from services.billing_service import BillingService
from services.errors.llm import InvokeRateLimitError from services.errors.llm import InvokeRateLimitError
from services.workflow_service import WorkflowService from services.workflow_service import WorkflowService
class AppGenerateService: class AppGenerateService:
system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400)
@classmethod @classmethod
def generate( def generate(
cls, cls,
@ -36,6 +40,19 @@ class AppGenerateService:
:param streaming: streaming :param streaming: streaming
:return: :return:
""" """
# system level rate limiter
if dify_config.BILLING_ENABLED:
# check if it's free plan
limit_info = BillingService.get_info(app_model.tenant_id)
if limit_info["subscription"]["plan"] == "sandbox":
if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id):
raise InvokeRateLimitError(
"Rate limit exceeded, please upgrade your plan "
f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day"
)
cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id)
# app level rate limiter
max_active_request = AppGenerateService._get_max_active_requests(app_model) max_active_request = AppGenerateService._get_max_active_requests(app_model)
rate_limit = RateLimit(app_model.id, max_active_request) rate_limit = RateLimit(app_model.id, max_active_request)
request_id = RateLimit.gen_request_key() request_id = RateLimit.gen_request_key()