mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-13 04:28:58 +08:00
pref: change ollama embedded api request (#6876)
This commit is contained in:
parent
f8617db012
commit
56af1a0adf
@ -59,7 +59,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
|||||||
if not endpoint_url.endswith('/'):
|
if not endpoint_url.endswith('/'):
|
||||||
endpoint_url += '/'
|
endpoint_url += '/'
|
||||||
|
|
||||||
endpoint_url = urljoin(endpoint_url, 'api/embeddings')
|
endpoint_url = urljoin(endpoint_url, 'api/embed')
|
||||||
|
|
||||||
# get model properties
|
# get model properties
|
||||||
context_size = self._get_context_size(model, credentials)
|
context_size = self._get_context_size(model, credentials)
|
||||||
@ -78,32 +78,28 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
|||||||
else:
|
else:
|
||||||
inputs.append(text)
|
inputs.append(text)
|
||||||
|
|
||||||
batched_embeddings = []
|
# Prepare the payload for the request
|
||||||
|
payload = {
|
||||||
|
'input': inputs,
|
||||||
|
'model': model,
|
||||||
|
}
|
||||||
|
|
||||||
for text in inputs:
|
# Make the request to the OpenAI API
|
||||||
# Prepare the payload for the request
|
response = requests.post(
|
||||||
payload = {
|
endpoint_url,
|
||||||
'prompt': text,
|
headers=headers,
|
||||||
'model': model,
|
data=json.dumps(payload),
|
||||||
}
|
timeout=(10, 300)
|
||||||
|
)
|
||||||
|
|
||||||
# Make the request to the OpenAI API
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
response = requests.post(
|
response_data = response.json()
|
||||||
endpoint_url,
|
|
||||||
headers=headers,
|
|
||||||
data=json.dumps(payload),
|
|
||||||
timeout=(10, 300)
|
|
||||||
)
|
|
||||||
|
|
||||||
response.raise_for_status() # Raise an exception for HTTP errors
|
# Extract embeddings and used tokens from the response
|
||||||
response_data = response.json()
|
embeddings = response_data['embeddings']
|
||||||
|
embedding_used_tokens = self.get_num_tokens(model, credentials, inputs)
|
||||||
|
|
||||||
# Extract embeddings and used tokens from the response
|
used_tokens += embedding_used_tokens
|
||||||
embeddings = response_data['embedding']
|
|
||||||
embedding_used_tokens = self.get_num_tokens(model, credentials, [text])
|
|
||||||
|
|
||||||
used_tokens += embedding_used_tokens
|
|
||||||
batched_embeddings.append(embeddings)
|
|
||||||
|
|
||||||
# calc usage
|
# calc usage
|
||||||
usage = self._calc_response_usage(
|
usage = self._calc_response_usage(
|
||||||
@ -113,7 +109,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return TextEmbeddingResult(
|
return TextEmbeddingResult(
|
||||||
embeddings=batched_embeddings,
|
embeddings=embeddings,
|
||||||
usage=usage,
|
usage=usage,
|
||||||
model=model
|
model=model
|
||||||
)
|
)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user