mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-06-04 11:14:10 +08:00
feat: tenant app invocations limiter (#16221)
This commit is contained in:
parent
c3c957bb80
commit
c07af5a1a3
@ -61,6 +61,10 @@ class AppExecutionConfig(BaseSettings):
|
|||||||
description="Maximum number of concurrent active requests per app (0 for unlimited)",
|
description="Maximum number of concurrent active requests per app (0 for unlimited)",
|
||||||
default=0,
|
default=0,
|
||||||
)
|
)
|
||||||
|
APP_DAILY_RATE_LIMIT: NonNegativeInt = Field(
|
||||||
|
description="Maximum number of requests per app per day",
|
||||||
|
default=5000,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CodeExecutionSandboxConfig(BaseSettings):
|
class CodeExecutionSandboxConfig(BaseSettings):
|
||||||
|
@ -10,9 +10,14 @@ from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
|||||||
import services
|
import services
|
||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
from controllers.console import api
|
from controllers.console import api
|
||||||
from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync
|
from controllers.console.app.error import (
|
||||||
|
ConversationCompletedError,
|
||||||
|
DraftWorkflowNotExist,
|
||||||
|
DraftWorkflowNotSync,
|
||||||
|
)
|
||||||
from controllers.console.app.wraps import get_app_model
|
from controllers.console.app.wraps import get_app_model
|
||||||
from controllers.console.wraps import account_initialization_required, setup_required
|
from controllers.console.wraps import account_initialization_required, setup_required
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from extensions.ext_database import db
|
from extensions.ext_database import db
|
||||||
@ -27,6 +32,7 @@ from models.account import Account
|
|||||||
from models.model import AppMode
|
from models.model import AppMode
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
from services.errors.app import WorkflowHashNotEqualError
|
from services.errors.app import WorkflowHashNotEqualError
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService
|
from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -168,6 +174,8 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
|
|||||||
raise NotFound("Conversation Not Exists.")
|
raise NotFound("Conversation Not Exists.")
|
||||||
except services.errors.conversation.ConversationCompletedError:
|
except services.errors.conversation.ConversationCompletedError:
|
||||||
raise ConversationCompletedError()
|
raise ConversationCompletedError()
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -344,15 +352,18 @@ class DraftWorkflowRunApi(Resource):
|
|||||||
parser.add_argument("files", type=list, required=False, location="json")
|
parser.add_argument("files", type=list, required=False, location="json")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
response = AppGenerateService.generate(
|
try:
|
||||||
app_model=app_model,
|
response = AppGenerateService.generate(
|
||||||
user=current_user,
|
app_model=app_model,
|
||||||
args=args,
|
user=current_user,
|
||||||
invoke_from=InvokeFrom.DEBUGGER,
|
args=args,
|
||||||
streaming=True,
|
invoke_from=InvokeFrom.DEBUGGER,
|
||||||
)
|
streaming=True,
|
||||||
|
)
|
||||||
|
|
||||||
return helper.compact_generate_response(response)
|
return helper.compact_generate_response(response)
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
|
|
||||||
|
|
||||||
class WorkflowTaskStopApi(Resource):
|
class WorkflowTaskStopApi(Resource):
|
||||||
|
@ -16,6 +16,7 @@ from controllers.console.app.error import (
|
|||||||
)
|
)
|
||||||
from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
|
from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
|
||||||
from controllers.console.explore.wraps import InstalledAppResource
|
from controllers.console.explore.wraps import InstalledAppResource
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.errors.error import (
|
from core.errors.error import (
|
||||||
@ -29,6 +30,7 @@ from libs import helper
|
|||||||
from libs.helper import uuid_value
|
from libs.helper import uuid_value
|
||||||
from models.model import AppMode
|
from models.model import AppMode
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
|
|
||||||
|
|
||||||
# define completion api for user
|
# define completion api for user
|
||||||
@ -75,7 +77,7 @@ class CompletionApi(InstalledAppResource):
|
|||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
@ -133,9 +135,11 @@ class ChatApi(InstalledAppResource):
|
|||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
except InvokeError as e:
|
except InvokeError as e:
|
||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from controllers.console.app.error import (
|
|||||||
)
|
)
|
||||||
from controllers.console.explore.error import NotWorkflowAppError
|
from controllers.console.explore.error import NotWorkflowAppError
|
||||||
from controllers.console.explore.wraps import InstalledAppResource
|
from controllers.console.explore.wraps import InstalledAppResource
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.errors.error import (
|
from core.errors.error import (
|
||||||
@ -23,6 +24,7 @@ from libs import helper
|
|||||||
from libs.login import current_user
|
from libs.login import current_user
|
||||||
from models.model import AppMode, InstalledApp
|
from models.model import AppMode, InstalledApp
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -56,9 +58,11 @@ class InstalledAppWorkflowRunApi(InstalledAppResource):
|
|||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
except InvokeError as e:
|
except InvokeError as e:
|
||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
|
|||||||
ProviderQuotaExceededError,
|
ProviderQuotaExceededError,
|
||||||
)
|
)
|
||||||
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.errors.error import (
|
from core.errors.error import (
|
||||||
@ -27,6 +28,7 @@ from libs import helper
|
|||||||
from libs.helper import uuid_value
|
from libs.helper import uuid_value
|
||||||
from models.model import App, AppMode, EndUser
|
from models.model import App, AppMode, EndUser
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
|
|
||||||
|
|
||||||
class CompletionApi(Resource):
|
class CompletionApi(Resource):
|
||||||
@ -75,7 +77,7 @@ class CompletionApi(Resource):
|
|||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
@ -130,11 +132,13 @@ class ChatApi(Resource):
|
|||||||
raise ProviderQuotaExceededError()
|
raise ProviderQuotaExceededError()
|
||||||
except ModelCurrentlyNotSupportError:
|
except ModelCurrentlyNotSupportError:
|
||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except InvokeError as e:
|
except InvokeError as e:
|
||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ from controllers.service_api.app.error import (
|
|||||||
ProviderQuotaExceededError,
|
ProviderQuotaExceededError,
|
||||||
)
|
)
|
||||||
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.errors.error import (
|
from core.errors.error import (
|
||||||
@ -29,6 +30,7 @@ from libs import helper
|
|||||||
from models.model import App, AppMode, EndUser
|
from models.model import App, AppMode, EndUser
|
||||||
from models.workflow import WorkflowRun, WorkflowRunStatus
|
from models.workflow import WorkflowRun, WorkflowRunStatus
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
from services.workflow_app_service import WorkflowAppService
|
from services.workflow_app_service import WorkflowAppService
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -93,11 +95,13 @@ class WorkflowRunApi(Resource):
|
|||||||
raise ProviderQuotaExceededError()
|
raise ProviderQuotaExceededError()
|
||||||
except ModelCurrentlyNotSupportError:
|
except ModelCurrentlyNotSupportError:
|
||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except InvokeError as e:
|
except InvokeError as e:
|
||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@ from controllers.web.error import (
|
|||||||
ProviderNotInitializeError,
|
ProviderNotInitializeError,
|
||||||
ProviderQuotaExceededError,
|
ProviderQuotaExceededError,
|
||||||
)
|
)
|
||||||
|
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
|
||||||
from controllers.web.wraps import WebApiResource
|
from controllers.web.wraps import WebApiResource
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
@ -23,6 +24,7 @@ from core.model_runtime.errors.invoke import InvokeError
|
|||||||
from libs import helper
|
from libs import helper
|
||||||
from models.model import App, AppMode, EndUser
|
from models.model import App, AppMode, EndUser
|
||||||
from services.app_generate_service import AppGenerateService
|
from services.app_generate_service import AppGenerateService
|
||||||
|
from services.errors.llm import InvokeRateLimitError
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -55,9 +57,11 @@ class WorkflowRunApi(WebApiResource):
|
|||||||
raise ProviderModelCurrentlyNotSupportError()
|
raise ProviderModelCurrentlyNotSupportError()
|
||||||
except InvokeError as e:
|
except InvokeError as e:
|
||||||
raise CompletionRequestError(e.description)
|
raise CompletionRequestError(e.description)
|
||||||
|
except InvokeRateLimitError as ex:
|
||||||
|
raise InvokeRateLimitHttpError(ex.description)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
raise e
|
raise e
|
||||||
except Exception as e:
|
except Exception:
|
||||||
logging.exception("internal server error.")
|
logging.exception("internal server error.")
|
||||||
raise InternalServerError()
|
raise InternalServerError()
|
||||||
|
|
||||||
|
@ -11,13 +11,17 @@ from core.app.apps.completion.app_generator import CompletionAppGenerator
|
|||||||
from core.app.apps.workflow.app_generator import WorkflowAppGenerator
|
from core.app.apps.workflow.app_generator import WorkflowAppGenerator
|
||||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||||
from core.app.features.rate_limiting import RateLimit
|
from core.app.features.rate_limiting import RateLimit
|
||||||
|
from libs.helper import RateLimiter
|
||||||
from models.model import Account, App, AppMode, EndUser
|
from models.model import Account, App, AppMode, EndUser
|
||||||
from models.workflow import Workflow
|
from models.workflow import Workflow
|
||||||
|
from services.billing_service import BillingService
|
||||||
from services.errors.llm import InvokeRateLimitError
|
from services.errors.llm import InvokeRateLimitError
|
||||||
from services.workflow_service import WorkflowService
|
from services.workflow_service import WorkflowService
|
||||||
|
|
||||||
|
|
||||||
class AppGenerateService:
|
class AppGenerateService:
|
||||||
|
system_rate_limiter = RateLimiter("app_daily_rate_limiter", dify_config.APP_DAILY_RATE_LIMIT, 86400)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def generate(
|
def generate(
|
||||||
cls,
|
cls,
|
||||||
@ -36,6 +40,19 @@ class AppGenerateService:
|
|||||||
:param streaming: streaming
|
:param streaming: streaming
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
|
# system level rate limiter
|
||||||
|
if dify_config.BILLING_ENABLED:
|
||||||
|
# check if it's free plan
|
||||||
|
limit_info = BillingService.get_info(app_model.tenant_id)
|
||||||
|
if limit_info["subscription"]["plan"] == "sandbox":
|
||||||
|
if cls.system_rate_limiter.is_rate_limited(app_model.tenant_id):
|
||||||
|
raise InvokeRateLimitError(
|
||||||
|
"Rate limit exceeded, please upgrade your plan "
|
||||||
|
f"or your RPD was {dify_config.APP_DAILY_RATE_LIMIT} requests/day"
|
||||||
|
)
|
||||||
|
cls.system_rate_limiter.increment_rate_limit(app_model.tenant_id)
|
||||||
|
|
||||||
|
# app level rate limiter
|
||||||
max_active_request = AppGenerateService._get_max_active_requests(app_model)
|
max_active_request = AppGenerateService._get_max_active_requests(app_model)
|
||||||
rate_limit = RateLimit(app_model.id, max_active_request)
|
rate_limit = RateLimit(app_model.id, max_active_request)
|
||||||
request_id = RateLimit.gen_request_key()
|
request_id = RateLimit.gen_request_key()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user