mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 10:48:59 +08:00
fix: array oob in azure openai embeddings (#1905)
This commit is contained in:
parent
025b859c7e
commit
18af84e193
@ -54,7 +54,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
|||||||
_iter = range(0, len(tokens), max_chunks)
|
_iter = range(0, len(tokens), max_chunks)
|
||||||
|
|
||||||
for i in _iter:
|
for i in _iter:
|
||||||
embeddings, embedding_used_tokens = self._embedding_invoke(
|
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||||
model=model,
|
model=model,
|
||||||
client=client,
|
client=client,
|
||||||
texts=tokens[i: i + max_chunks],
|
texts=tokens[i: i + max_chunks],
|
||||||
@ -62,7 +62,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
used_tokens += embedding_used_tokens
|
used_tokens += embedding_used_tokens
|
||||||
batched_embeddings += [data for data in embeddings]
|
batched_embeddings += embeddings_batch
|
||||||
|
|
||||||
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
|
||||||
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
|
||||||
@ -73,7 +73,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
|||||||
for i in range(len(texts)):
|
for i in range(len(texts)):
|
||||||
_result = results[i]
|
_result = results[i]
|
||||||
if len(_result) == 0:
|
if len(_result) == 0:
|
||||||
embeddings, embedding_used_tokens = self._embedding_invoke(
|
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
|
||||||
model=model,
|
model=model,
|
||||||
client=client,
|
client=client,
|
||||||
texts=[""],
|
texts=[""],
|
||||||
@ -81,7 +81,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
used_tokens += embedding_used_tokens
|
used_tokens += embedding_used_tokens
|
||||||
average = embeddings[0]
|
average = embeddings_batch[0]
|
||||||
else:
|
else:
|
||||||
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
|
||||||
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
embeddings[i] = (average / np.linalg.norm(average)).tolist()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user