From e90d3c29ab9abd86611396af0dde0b427796e5a5 Mon Sep 17 00:00:00 2001 From: takatost Date: Fri, 13 Sep 2024 02:15:19 +0800 Subject: [PATCH] feat: add OpenAI o1 series models support (#8328) --- .../model_providers/openai/llm/_position.yaml | 4 ++ .../model_providers/openai/llm/llm.py | 45 +++++++++++++++++-- .../openai/llm/o1-mini-2024-09-12.yaml | 33 ++++++++++++++ .../model_providers/openai/llm/o1-mini.yaml | 33 ++++++++++++++ .../openai/llm/o1-preview-2024-09-12.yaml | 33 ++++++++++++++ .../openai/llm/o1-preview.yaml | 33 ++++++++++++++ api/pyproject.toml | 3 +- 7 files changed, 180 insertions(+), 4 deletions(-) create mode 100644 api/core/model_runtime/model_providers/openai/llm/o1-mini-2024-09-12.yaml create mode 100644 api/core/model_runtime/model_providers/openai/llm/o1-mini.yaml create mode 100644 api/core/model_runtime/model_providers/openai/llm/o1-preview-2024-09-12.yaml create mode 100644 api/core/model_runtime/model_providers/openai/llm/o1-preview.yaml diff --git a/api/core/model_runtime/model_providers/openai/llm/_position.yaml b/api/core/model_runtime/model_providers/openai/llm/_position.yaml index ac7313aaa1..7501bc1164 100644 --- a/api/core/model_runtime/model_providers/openai/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/openai/llm/_position.yaml @@ -5,6 +5,10 @@ - chatgpt-4o-latest - gpt-4o-mini - gpt-4o-mini-2024-07-18 +- o1-preview +- o1-preview-2024-09-12 +- o1-mini +- o1-mini-2024-09-12 - gpt-4-turbo - gpt-4-turbo-2024-04-09 - gpt-4-turbo-preview diff --git a/api/core/model_runtime/model_providers/openai/llm/llm.py b/api/core/model_runtime/model_providers/openai/llm/llm.py index c4b381df41..048f27f305 100644 --- a/api/core/model_runtime/model_providers/openai/llm/llm.py +++ b/api/core/model_runtime/model_providers/openai/llm/llm.py @@ -613,6 +613,13 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): # clear illegal prompt messages prompt_messages = self._clear_illegal_prompt_messages(model, prompt_messages) + block_as_stream = False + if model.startswith("o1"): + block_as_stream = True + stream = False + if "stream_options" in extra_model_kwargs: + del extra_model_kwargs["stream_options"] + # chat model response = client.chat.completions.create( messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages], @@ -625,7 +632,39 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): if stream: return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools) - return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) + block_result = self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools) + + if block_as_stream: + return self._handle_chat_block_as_stream_response(block_result, prompt_messages) + + return block_result + + def _handle_chat_block_as_stream_response( + self, + block_result: LLMResult, + prompt_messages: list[PromptMessage], + ) -> Generator[LLMResultChunk, None, None]: + """ + Handle llm chat response + + :param model: model name + :param credentials: credentials + :param response: response + :param prompt_messages: prompt messages + :param tools: tools for tool calling + :return: llm response chunk generator + """ + yield LLMResultChunk( + model=block_result.model, + prompt_messages=prompt_messages, + system_fingerprint=block_result.system_fingerprint, + delta=LLMResultChunkDelta( + index=0, + message=block_result.message, + finish_reason="stop", + usage=block_result.usage, + ), + ) def _handle_chat_generate_response( self, @@ -960,7 +999,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): model = model.split(":")[1] # Currently, we can use gpt4o to calculate chatgpt-4o-latest's token. - if model == "chatgpt-4o-latest": + if model == "chatgpt-4o-latest" or model.startswith("o1"): model = "gpt-4o" try: @@ -975,7 +1014,7 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel): tokens_per_message = 4 # if there's a name, the role is omitted tokens_per_name = -1 - elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"): + elif model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4") or model.startswith("o1"): tokens_per_message = 3 tokens_per_name = 1 else: diff --git a/api/core/model_runtime/model_providers/openai/llm/o1-mini-2024-09-12.yaml b/api/core/model_runtime/model_providers/openai/llm/o1-mini-2024-09-12.yaml new file mode 100644 index 0000000000..07a3bc9a7a --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/o1-mini-2024-09-12.yaml @@ -0,0 +1,33 @@ +model: o1-mini-2024-09-12 +label: + zh_Hans: o1-mini-2024-09-12 + en_US: o1-mini-2024-09-12 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + default: 65563 + min: 1 + max: 65563 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: response_format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '3.00' + output: '12.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/openai/llm/o1-mini.yaml b/api/core/model_runtime/model_providers/openai/llm/o1-mini.yaml new file mode 100644 index 0000000000..3e83529201 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/o1-mini.yaml @@ -0,0 +1,33 @@ +model: o1-mini +label: + zh_Hans: o1-mini + en_US: o1-mini +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + default: 65563 + min: 1 + max: 65563 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: response_format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '3.00' + output: '12.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/openai/llm/o1-preview-2024-09-12.yaml b/api/core/model_runtime/model_providers/openai/llm/o1-preview-2024-09-12.yaml new file mode 100644 index 0000000000..c9da96f611 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/o1-preview-2024-09-12.yaml @@ -0,0 +1,33 @@ +model: o1-preview-2024-09-12 +label: + zh_Hans: o1-preview-2024-09-12 + en_US: o1-preview-2024-09-12 +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + default: 32768 + min: 1 + max: 32768 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: response_format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '15.00' + output: '60.00' + unit: '0.000001' + currency: USD diff --git a/api/core/model_runtime/model_providers/openai/llm/o1-preview.yaml b/api/core/model_runtime/model_providers/openai/llm/o1-preview.yaml new file mode 100644 index 0000000000..c83874b765 --- /dev/null +++ b/api/core/model_runtime/model_providers/openai/llm/o1-preview.yaml @@ -0,0 +1,33 @@ +model: o1-preview +label: + zh_Hans: o1-preview + en_US: o1-preview +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 128000 +parameter_rules: + - name: max_tokens + use_template: max_tokens + default: 32768 + min: 1 + max: 32768 + - name: response_format + label: + zh_Hans: 回复格式 + en_US: response_format + type: string + help: + zh_Hans: 指定模型必须输出的格式 + en_US: specifying the format that the model must output + required: false + options: + - text + - json_object +pricing: + input: '15.00' + output: '60.00' + unit: '0.000001' + currency: USD diff --git a/api/pyproject.toml b/api/pyproject.toml index eb930329e1..823dbb6512 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -60,7 +60,8 @@ ignore = [ "SIM113", # eumerate-for-loop "SIM117", # multiple-with-statements "SIM210", # if-expr-with-true-false - "SIM300", # yoda-conditions + "SIM300", # yoda-conditions, + "PT004", # pytest-no-assert ] [tool.ruff.lint.per-file-ignores]