feat: tenant app invocations limiter (#16221)

This commit is contained in:
Yeuoly 2025-03-19 17:24:02 +08:00 committed by GitHub
parent c3c957bb80
commit c07af5a1a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 68 additions and 16 deletions

View File

@ -61,6 +61,10 @@ class AppExecutionConfig(BaseSettings):
description="Maximum number of concurrent active requests per app (0 for unlimited)",
default=0,
)
APP_DAILY_RATE_LIMIT: NonNegativeInt = Field(
description="Maximum number of requests per app per day",
default=5000,
)
class CodeExecutionSandboxConfig(BaseSettings):

View File

@ -10,9 +10,14 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
from configs import dify_config
from controllers.console import api
from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync
from controllers.console.app.error import (
ConversationCompletedError,
DraftWorkflowNotExist,
DraftWorkflowNotSync,
)
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db
@ -27,6 +32,7 @@ from models.account import Account
from models.model import AppMode
from services.app_generate_service import AppGenerateService
from services.errors.app import WorkflowHashNotEqualError
from services.errors.llm import InvokeRateLimitError
from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService
logger = logging.getLogger(__name__)
@ -168,6 +174,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e:
raise e
except Exception:
@ -344,15 +352,18 @@ class DraftWorkflowRunApi(Resource):
parser.add_argument("files", type=list, required=False, location="json")
args = parser.parse_args()
response = AppGenerateService.generate(
app_model=app_model,
user=current_user,
args=args,
invoke_from=InvokeFrom.DEBUGGER,
streaming=True,
)
try:
response = AppGenerateService.generate(
app_model=app_model,
user=current_user,
args=args,
invoke_from=InvokeFrom.DEBUGGER,
streaming=True,
)
return helper.compact_generate_response(response)
return helper.compact_generate_response(response)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
class WorkflowTaskStopApi(Resource):

View File

@ -16,6 +16,7 @@ from controllers.console.app.error import (
)
from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
from controllers.console.explore.wraps import InstalledAppResource
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
@ -29,6 +30,7 @@ from libs import helper
from libs.helper import uuid_value
from models.model import AppMode
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
# define completion api for user
@ -75,7 +77,7 @@ class CompletionApi(InstalledAppResource):
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
@ -133,9 +135,11 @@ class ChatApi(InstalledAppResource):
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -11,6 +11,7 @@ from controllers.console.app.error import (
)
from controllers.console.explore.error import NotWorkflowAppError
from controllers.console.explore.wraps import InstalledAppResource
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
@ -23,6 +24,7 @@ from libs import helper
from libs.login import current_user
from models.model import AppMode, InstalledApp
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__)
@ -56,9 +58,11 @@ class InstalledAppWorkflowRunApi(InstalledAppResource):
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
ProviderQuotaExceededError,
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
@ -27,6 +28,7 @@ from libs import helper
from libs.helper import uuid_value
from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
class CompletionApi(Resource):
@ -75,7 +77,7 @@ class CompletionApi(Resource):
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
@ -130,11 +132,13 @@ class ChatApi(Resource):
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
ProviderQuotaExceededError,
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
@ -29,6 +30,7 @@ from libs import helper
from models.model import App, AppMode, EndUser
from models.workflow import WorkflowRun, WorkflowRunStatus
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
from services.workflow_app_service import WorkflowAppService
logger = logging.getLogger(__name__)
@ -93,11 +95,13 @@ class WorkflowRunApi(Resource):
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -11,6 +11,7 @@ from controllers.web.error import (
ProviderNotInitializeError,
ProviderQuotaExceededError,
)
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from controllers.web.wraps import WebApiResource
from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
@ -23,6 +24,7 @@ from core.model_runtime.errors.invoke import InvokeError
from libs import helper
from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__)
@ -55,9 +57,11 @@ class WorkflowRunApi(WebApiResource):
raise ProviderModelCurrentlyNotSupportError()
except InvokeError as e:
raise CompletionRequestError(e.description)
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except ValueError as e:
raise e
except Exception as e:
except Exception:
logging.exception("internal server error.")
raise InternalServerError()

View File

@ -11,13 +11,17 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator
from core.app.apps.workflow.app_generator import WorkflowAppGenerator
from core.app.entities.app_invoke_entities import InvokeFrom
from core.app.features.rate_limiting import RateLimit
from libs.helper import RateLimiter
from models.model import Account, App, AppMode, EndUser
from models.workflow import Workflow
from services.billing_service import BillingService
from services.errors.llm import InvokeRateLimitError
from services.workflow_service import WorkflowService
class AppGenerateService:
system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400)
@classmethod
def generate(
cls,
@ -36,6 +40,19 @@ class AppGenerateService:
:param streaming: streaming
:return:
"""
# system level rate limiter
if dify_config.BILLING_ENABLED:
# check if it's free plan
limit_info = BillingService.get_info(app_model.tenant_id)
if limit_info["subscription"]["plan"] == "sandbox":
if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id):
raise InvokeRateLimitError(
"Rate limit exceeded, please upgrade your plan "
f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day"
)
cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id)
# app level rate limiter
max_active_request = AppGenerateService._get_max_active_requests(app_model)
rate_limit = RateLimit(app_model.id, max_active_request)
request_id = RateLimit.gen_request_key()