diff --git a/api/core/constant/llm_constant.py b/api/core/constant/llm_constant.py index 6879ec5b06..397a3d4c8f 100644 --- a/api/core/constant/llm_constant.py +++ b/api/core/constant/llm_constant.py @@ -4,6 +4,7 @@ models = { 'gpt-4': 'openai', # 8,192 tokens 'gpt-4-32k': 'openai', # 32,768 tokens 'gpt-3.5-turbo': 'openai', # 4,096 tokens + 'gpt-3.5-turbo-16k': 'openai', # 16384 tokens 'text-davinci-003': 'openai', # 4,097 tokens 'text-davinci-002': 'openai', # 4,097 tokens 'text-curie-001': 'openai', # 2,049 tokens @@ -16,6 +17,7 @@ max_context_token_length = { 'gpt-4': 8192, 'gpt-4-32k': 32768, 'gpt-3.5-turbo': 4096, + 'gpt-3.5-turbo-16k': 16384, 'text-davinci-003': 4097, 'text-davinci-002': 4097, 'text-curie-001': 2049, @@ -29,11 +31,13 @@ models_by_mode = { 'gpt-4', # 8,192 tokens 'gpt-4-32k', # 32,768 tokens 'gpt-3.5-turbo', # 4,096 tokens + 'gpt-3.5-turbo-16k', # 16,384 tokens ], 'completion': [ 'gpt-4', # 8,192 tokens 'gpt-4-32k', # 32,768 tokens 'gpt-3.5-turbo', # 4,096 tokens + 'gpt-3.5-turbo-16k', # 16,384 tokens 'text-davinci-003', # 4,097 tokens 'text-davinci-002' # 4,097 tokens 'text-curie-001', # 2,049 tokens @@ -57,9 +61,13 @@ model_prices = { 'completion': Decimal('0.12') }, 'gpt-3.5-turbo': { - 'prompt': Decimal('0.002'), + 'prompt': Decimal('0.0015'), 'completion': Decimal('0.002') }, + 'gpt-3.5-turbo-16k': { + 'prompt': Decimal('0.003'), + 'completion': Decimal('0.004') + }, 'text-davinci-003': { 'prompt': Decimal('0.02'), 'completion': Decimal('0.02') @@ -77,7 +85,7 @@ model_prices = { 'completion': Decimal('0.0004') }, 'text-embedding-ada-002': { - 'usage': Decimal('0.0004'), + 'usage': Decimal('0.0001'), } } diff --git a/api/core/llm/provider/azure_provider.py b/api/core/llm/provider/azure_provider.py index d87ab761d3..c64e785215 100644 --- a/api/core/llm/provider/azure_provider.py +++ b/api/core/llm/provider/azure_provider.py @@ -95,7 +95,8 @@ class AzureProvider(BaseProvider): if not models: raise ValidateFailedError("Please add deployments for 'text-davinci-003', " - "'gpt-3.5-turbo', 'text-embedding-ada-002'.") + "'gpt-3.5-turbo', 'text-embedding-ada-002' (required) " + "and 'gpt-4', 'gpt-35-turbo-16k' (optional).") fixed_model_ids = [ 'text-davinci-003', diff --git a/web/app/components/app/configuration/config-model/index.tsx b/web/app/components/app/configuration/config-model/index.tsx index 7a9601e39b..15a3754e1d 100644 --- a/web/app/components/app/configuration/config-model/index.tsx +++ b/web/app/components/app/configuration/config-model/index.tsx @@ -26,8 +26,10 @@ export type IConifgModelProps = { const options = [ { id: 'gpt-3.5-turbo', name: 'gpt-3.5-turbo', type: AppType.chat }, + { id: 'gpt-3.5-turbo-16k', name: 'gpt-3.5-turbo-16k', type: AppType.chat }, { id: 'gpt-4', name: 'gpt-4', type: AppType.chat }, // 8k version { id: 'gpt-3.5-turbo', name: 'gpt-3.5-turbo', type: AppType.completion }, + { id: 'gpt-3.5-turbo-16k', name: 'gpt-3.5-turbo-16k', type: AppType.completion }, { id: 'text-davinci-003', name: 'text-davinci-003', type: AppType.completion }, { id: 'gpt-4', name: 'gpt-4', type: AppType.completion }, // 8k version ]