mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 16:59:04 +08:00
fix: azure openai stream response usage missing (#1998)
This commit is contained in:
parent
c9e4147b11
commit
5e97eb1840
@ -257,6 +257,9 @@ class AppRunner:
|
|||||||
if not usage and result.delta.usage:
|
if not usage and result.delta.usage:
|
||||||
usage = result.delta.usage
|
usage = result.delta.usage
|
||||||
|
|
||||||
|
if not usage:
|
||||||
|
usage = LLMUsage.empty_usage()
|
||||||
|
|
||||||
llm_result = LLMResult(
|
llm_result = LLMResult(
|
||||||
model=model,
|
model=model,
|
||||||
prompt_messages=prompt_messages,
|
prompt_messages=prompt_messages,
|
||||||
|
@ -322,8 +322,11 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
|||||||
response: Stream[ChatCompletionChunk],
|
response: Stream[ChatCompletionChunk],
|
||||||
prompt_messages: list[PromptMessage],
|
prompt_messages: list[PromptMessage],
|
||||||
tools: Optional[list[PromptMessageTool]] = None) -> Generator:
|
tools: Optional[list[PromptMessageTool]] = None) -> Generator:
|
||||||
|
index = 0
|
||||||
full_assistant_content = ''
|
full_assistant_content = ''
|
||||||
|
real_model = model
|
||||||
|
system_fingerprint = None
|
||||||
|
completion = ''
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
if len(chunk.choices) == 0:
|
if len(chunk.choices) == 0:
|
||||||
continue
|
continue
|
||||||
@ -349,40 +352,44 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
|
|||||||
|
|
||||||
full_assistant_content += delta.delta.content if delta.delta.content else ''
|
full_assistant_content += delta.delta.content if delta.delta.content else ''
|
||||||
|
|
||||||
if delta.finish_reason is not None:
|
real_model = chunk.model
|
||||||
# calculate num tokens
|
system_fingerprint = chunk.system_fingerprint
|
||||||
prompt_tokens = self._num_tokens_from_messages(credentials, prompt_messages, tools)
|
completion += delta.delta.content if delta.delta.content else ''
|
||||||
|
|
||||||
full_assistant_prompt_message = AssistantPromptMessage(
|
yield LLMResultChunk(
|
||||||
content=full_assistant_content,
|
model=real_model,
|
||||||
tool_calls=tool_calls
|
prompt_messages=prompt_messages,
|
||||||
|
system_fingerprint=system_fingerprint,
|
||||||
|
delta=LLMResultChunkDelta(
|
||||||
|
index=index,
|
||||||
|
message=assistant_prompt_message,
|
||||||
)
|
)
|
||||||
completion_tokens = self._num_tokens_from_messages(credentials, [full_assistant_prompt_message])
|
)
|
||||||
|
|
||||||
# transform usage
|
index += 0
|
||||||
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
|
||||||
|
|
||||||
yield LLMResultChunk(
|
# calculate num tokens
|
||||||
model=chunk.model,
|
prompt_tokens = self._num_tokens_from_messages(credentials, prompt_messages, tools)
|
||||||
prompt_messages=prompt_messages,
|
|
||||||
system_fingerprint=chunk.system_fingerprint,
|
full_assistant_prompt_message = AssistantPromptMessage(
|
||||||
delta=LLMResultChunkDelta(
|
content=completion
|
||||||
index=delta.index,
|
)
|
||||||
message=assistant_prompt_message,
|
completion_tokens = self._num_tokens_from_messages(credentials, [full_assistant_prompt_message])
|
||||||
finish_reason=delta.finish_reason,
|
|
||||||
usage=usage
|
# transform usage
|
||||||
)
|
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
|
||||||
)
|
|
||||||
else:
|
yield LLMResultChunk(
|
||||||
yield LLMResultChunk(
|
model=real_model,
|
||||||
model=chunk.model,
|
prompt_messages=prompt_messages,
|
||||||
prompt_messages=prompt_messages,
|
system_fingerprint=system_fingerprint,
|
||||||
system_fingerprint=chunk.system_fingerprint,
|
delta=LLMResultChunkDelta(
|
||||||
delta=LLMResultChunkDelta(
|
index=index,
|
||||||
index=delta.index,
|
message=AssistantPromptMessage(content=''),
|
||||||
message=assistant_prompt_message,
|
finish_reason='stop',
|
||||||
)
|
usage=usage
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_response_tool_calls(response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
|
def _extract_response_tool_calls(response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
|
||||||
|
@ -190,7 +190,6 @@ def test_invoke_stream_chat_model(setup_openai_mock):
|
|||||||
assert isinstance(chunk, LLMResultChunk)
|
assert isinstance(chunk, LLMResultChunk)
|
||||||
assert isinstance(chunk.delta, LLMResultChunkDelta)
|
assert isinstance(chunk.delta, LLMResultChunkDelta)
|
||||||
assert isinstance(chunk.delta.message, AssistantPromptMessage)
|
assert isinstance(chunk.delta.message, AssistantPromptMessage)
|
||||||
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
|
|
||||||
if chunk.delta.finish_reason is not None:
|
if chunk.delta.finish_reason is not None:
|
||||||
assert chunk.delta.usage is not None
|
assert chunk.delta.usage is not None
|
||||||
assert chunk.delta.usage.completion_tokens > 0
|
assert chunk.delta.usage.completion_tokens > 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user