mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 09:49:03 +08:00
Fix: Wrong cutoff length lead to empty input in openai compatible embedding model. (#7133)
This commit is contained in:
parent
2c188a45c8
commit
4cbeb6815b
@ -72,7 +72,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
|||||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||||
|
|
||||||
if num_tokens >= context_size:
|
if num_tokens >= context_size:
|
||||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||||
# if num tokens is larger than context length, only use the start
|
# if num tokens is larger than context length, only use the start
|
||||||
inputs.append(text[0: cutoff])
|
inputs.append(text[0: cutoff])
|
||||||
else:
|
else:
|
||||||
|
@ -76,7 +76,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
|
|||||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||||
|
|
||||||
if num_tokens >= context_size:
|
if num_tokens >= context_size:
|
||||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||||
# if num tokens is larger than context length, only use the start
|
# if num tokens is larger than context length, only use the start
|
||||||
inputs.append(text[0: cutoff])
|
inputs.append(text[0: cutoff])
|
||||||
else:
|
else:
|
||||||
|
@ -79,7 +79,7 @@ class OAICompatEmbeddingModel(_CommonOAI_API_Compat, TextEmbeddingModel):
|
|||||||
num_tokens = self._get_num_tokens_by_gpt2(text)
|
num_tokens = self._get_num_tokens_by_gpt2(text)
|
||||||
|
|
||||||
if num_tokens >= context_size:
|
if num_tokens >= context_size:
|
||||||
cutoff = int(len(text) * (np.floor(context_size / num_tokens)))
|
cutoff = int(np.floor(len(text) * (context_size / num_tokens)))
|
||||||
# if num tokens is larger than context length, only use the start
|
# if num tokens is larger than context length, only use the start
|
||||||
inputs.append(text[0: cutoff])
|
inputs.append(text[0: cutoff])
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user