mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-14 09:05:54 +08:00
Added OpenAI-like completion api (#5351)
### What problem does this PR solve? Added OpenAI-like completion api, related to #4672, #4705 This function allows users to interact with a model to get responses based on a series of messages. If `stream` is set to True, the response will be streamed in chunks, mimicking the OpenAI-style API. #### Example usage: ```bash curl -X POST https://ragflow_address.com/api/v1/chats_openai/<chat_id>/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $RAGFLOW_API_KEY" \ -d '{ "model": "model", "messages": [{"role": "user", "content": "Say this is a test!"}], "stream": true }' ``` Alternatively, you can use Python's `OpenAI` client: ```python from openai import OpenAI model = "model" client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>") completion = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who you are?"}, {"role": "assistant", "content": "I am an AI assistant named..."}, {"role": "user", "content": "Can you tell me how to install neovim"}, ], stream=True ) stream = True if stream: for chunk in completion: print(chunk) else: print(completion.choices[0].message.content) ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### Related Issues Related to #4672, #4705
This commit is contained in:
parent
4e2afcd3b8
commit
5c6a7cb4b8
@ -15,13 +15,13 @@
|
|||||||
#
|
#
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
from api.db import LLMType
|
import time
|
||||||
from flask import request, Response
|
|
||||||
|
|
||||||
|
from api.db import LLMType
|
||||||
from api.db.services.conversation_service import ConversationService, iframe_completion
|
from api.db.services.conversation_service import ConversationService, iframe_completion
|
||||||
from api.db.services.conversation_service import completion as rag_completion
|
from api.db.services.conversation_service import completion as rag_completion
|
||||||
from api.db.services.canvas_service import completion as agent_completion
|
from api.db.services.canvas_service import completion as agent_completion
|
||||||
from api.db.services.dialog_service import ask
|
from api.db.services.dialog_service import ask, chat
|
||||||
from agent.canvas import Canvas
|
from agent.canvas import Canvas
|
||||||
from api.db import StatusEnum
|
from api.db import StatusEnum
|
||||||
from api.db.db_models import APIToken
|
from api.db.db_models import APIToken
|
||||||
@ -30,11 +30,12 @@ from api.db.services.canvas_service import UserCanvasService
|
|||||||
from api.db.services.dialog_service import DialogService
|
from api.db.services.dialog_service import DialogService
|
||||||
from api.db.services.knowledgebase_service import KnowledgebaseService
|
from api.db.services.knowledgebase_service import KnowledgebaseService
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.utils.api_utils import get_error_data_result
|
from api.utils.api_utils import get_error_data_result, validate_request
|
||||||
from api.utils.api_utils import get_result, token_required
|
from api.utils.api_utils import get_result, token_required
|
||||||
from api.db.services.llm_service import LLMBundle
|
from api.db.services.llm_service import LLMBundle
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
|
|
||||||
|
from flask import jsonify, request, Response
|
||||||
|
|
||||||
@manager.route('/chats/<chat_id>/sessions', methods=['POST']) # noqa: F821
|
@manager.route('/chats/<chat_id>/sessions', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
@ -184,6 +185,160 @@ def chat_completion(tenant_id, chat_id):
|
|||||||
return get_result(data=answer)
|
return get_result(data=answer)
|
||||||
|
|
||||||
|
|
||||||
|
@manager.route('chats_openai/<chat_id>/chat/completions', methods=['POST']) # noqa: F821
|
||||||
|
@validate_request("model", "messages") # noqa: F821
|
||||||
|
@token_required
|
||||||
|
def chat_completion_openai_like(tenant_id, chat_id):
|
||||||
|
"""
|
||||||
|
OpenAI-like chat completion API that simulates the behavior of OpenAI's completions endpoint.
|
||||||
|
|
||||||
|
This function allows users to interact with a model and receive responses based on a series of historical messages.
|
||||||
|
If `stream` is set to True (by default), the response will be streamed in chunks, mimicking the OpenAI-style API.
|
||||||
|
Set `stream` to False explicitly, the response will be returned in a single complete answer.
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
curl -X POST https://ragflow_address.com/api/v1/chats_openai/<chat_id>/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $RAGFLOW_API_KEY" \
|
||||||
|
-d '{
|
||||||
|
"model": "model",
|
||||||
|
"messages": [{"role": "user", "content": "Say this is a test!"}],
|
||||||
|
"stream": true
|
||||||
|
}'
|
||||||
|
|
||||||
|
Alternatively, you can use Python's `OpenAI` client:
|
||||||
|
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
model = "model"
|
||||||
|
client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")
|
||||||
|
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": "Who you are?"},
|
||||||
|
{"role": "assistant", "content": "I am an AI assistant named..."},
|
||||||
|
{"role": "user", "content": "Can you tell me how to install neovim"},
|
||||||
|
],
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
stream = True
|
||||||
|
if stream:
|
||||||
|
for chunk in completion:
|
||||||
|
print(chunk)
|
||||||
|
else:
|
||||||
|
print(completion.choices[0].message.content)
|
||||||
|
"""
|
||||||
|
req = request.json
|
||||||
|
|
||||||
|
messages = req.get("messages", [])
|
||||||
|
# To prevent empty [] input
|
||||||
|
if len(messages) < 1:
|
||||||
|
return get_error_data_result("You have to provide messages")
|
||||||
|
|
||||||
|
dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value)
|
||||||
|
if not dia:
|
||||||
|
return get_error_data_result(f"You don't own the chat {chat_id}")
|
||||||
|
dia = dia[0]
|
||||||
|
|
||||||
|
# Filter system and assistant messages
|
||||||
|
msg = None
|
||||||
|
msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)]
|
||||||
|
|
||||||
|
if req.get("stream", True):
|
||||||
|
# The value for the usage field on all chunks except for the last one will be null.
|
||||||
|
# The usage field on the last chunk contains token usage statistics for the entire request.
|
||||||
|
# The choices field on the last chunk will always be an empty array [].
|
||||||
|
def streamed_respose_generator(chat_id, dia, msg):
|
||||||
|
token_used = 0
|
||||||
|
response = {
|
||||||
|
"id": f"chatcmpl-{chat_id}",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"delta": {
|
||||||
|
"content": "",
|
||||||
|
"role": "assistant",
|
||||||
|
"function_call": None,
|
||||||
|
"tool_calls": None
|
||||||
|
},
|
||||||
|
"finish_reason": None,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": None
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": int(time.time()),
|
||||||
|
"model": "model",
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"system_fingerprint": "",
|
||||||
|
"usage": None
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
for ans in chat(dia, msg, True):
|
||||||
|
answer = ans["answer"]
|
||||||
|
incremental = answer[token_used:]
|
||||||
|
token_used += len(incremental)
|
||||||
|
response["choices"][0]["delta"]["content"] = incremental
|
||||||
|
yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
|
||||||
|
except Exception as e:
|
||||||
|
response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e)
|
||||||
|
yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
|
||||||
|
|
||||||
|
# The last chunck
|
||||||
|
response["choices"][0]["delta"]["content"] = None
|
||||||
|
response["choices"][0]["finish_reason"] = "stop"
|
||||||
|
response["usage"] = {
|
||||||
|
"prompt_tokens": len(msg),
|
||||||
|
"completion_tokens": token_used,
|
||||||
|
"total_tokens": len(msg) + token_used
|
||||||
|
}
|
||||||
|
yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
|
||||||
|
|
||||||
|
resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream")
|
||||||
|
resp.headers.add_header("Cache-control", "no-cache")
|
||||||
|
resp.headers.add_header("Connection", "keep-alive")
|
||||||
|
resp.headers.add_header("X-Accel-Buffering", "no")
|
||||||
|
resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
|
||||||
|
return resp
|
||||||
|
else:
|
||||||
|
answer = None
|
||||||
|
for ans in chat(dia, msg, False):
|
||||||
|
# focus answer content only
|
||||||
|
answer = ans
|
||||||
|
break
|
||||||
|
|
||||||
|
response = {
|
||||||
|
"id": f"chatcmpl-{chat_id}",
|
||||||
|
"object": "chat.completion",
|
||||||
|
"created": int(time.time()),
|
||||||
|
"model": req.get("model", ""),
|
||||||
|
"usage": {
|
||||||
|
"prompt_tokens": len(messages),
|
||||||
|
"completion_tokens": len(answer),
|
||||||
|
"total_tokens": len(messages) + len(answer),
|
||||||
|
"completion_tokens_details": {
|
||||||
|
"reasoning_tokens": len(answer),
|
||||||
|
"accepted_prediction_tokens": len(answer),
|
||||||
|
"rejected_prediction_tokens": len(answer)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"message": {
|
||||||
|
"role": "assistant",
|
||||||
|
"content": answer["answer"]
|
||||||
|
},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
return jsonify(response)
|
||||||
|
|
||||||
|
|
||||||
@manager.route('/agents/<agent_id>/completions', methods=['POST']) # noqa: F821
|
@manager.route('/agents/<agent_id>/completions', methods=['POST']) # noqa: F821
|
||||||
@token_required
|
@token_required
|
||||||
def agent_completions(tenant_id, agent_id):
|
def agent_completions(tenant_id, agent_id):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user