Added OpenAI-like completion api (#5351)

### What problem does this PR solve? Added OpenAI-like completion api, related to #4672, #4705 This function allows users to interact with a model to get responses based on a series of messages. If `stream` is set to True, the response will be streamed in chunks, mimicking the OpenAI-style API. #### Example usage: ```bash curl -X POST https://ragflow_address.com/api/v1/chats_openai/<chat_id>/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $RAGFLOW_API_KEY" \ -d '{ "model": "model", "messages": [{"role": "user", "content": "Say this is a test!"}], "stream": true }' ``` Alternatively, you can use Python's `OpenAI` client: ```python from openai import OpenAI model = "model" client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>") completion = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who you are?"}, {"role": "assistant", "content": "I am an AI assistant named..."}, {"role": "user", "content": "Can you tell me how to install neovim"}, ], stream=True ) stream = True if stream: for chunk in completion: print(chunk) else: print(completion.choices[0].message.content) ``` ### Type of change - [x] New Feature (non-breaking change which adds functionality) ### Related Issues Related to #4672, #4705
2025-08-14 05:45:53 +08:00 · 2025-02-26 11:37:29 +08:00 · 2025-02-26 11:37:29 +08:00 · 5c6a7cb4b8
commit 5c6a7cb4b8
parent 4e2afcd3b8
1 changed files with 159 additions and 4 deletions
--- a/api/apps/sdk/session.py
+++ b/api/apps/sdk/session.py
@ -15,13 +15,13 @@
 #
 import re
 import json
-from api.db import LLMType
-from flask import request, Response
+import time

+from api.db import LLMType
 from api.db.services.conversation_service import ConversationService, iframe_completion
 from api.db.services.conversation_service import completion as rag_completion
 from api.db.services.canvas_service import completion as agent_completion
-from api.db.services.dialog_service import ask
+from api.db.services.dialog_service import ask, chat
 from agent.canvas import Canvas
 from api.db import StatusEnum
 from api.db.db_models import APIToken
@ -30,11 +30,12 @@ from api.db.services.canvas_service import UserCanvasService
 from api.db.services.dialog_service import DialogService
 from api.db.services.knowledgebase_service import KnowledgebaseService
 from api.utils import get_uuid
-from api.utils.api_utils import get_error_data_result
+from api.utils.api_utils import get_error_data_result, validate_request
 from api.utils.api_utils import get_result, token_required
 from api.db.services.llm_service import LLMBundle
 from api.db.services.file_service import FileService

+from flask import jsonify, request, Response

@manager.route('/chats/<chat_id>/sessions', methods=['POST'])  # noqa: F821
@token_required
@ -184,6 +185,160 @@ def chat_completion(tenant_id, chat_id):
        return get_result(data=answer)


+@manager.route('chats_openai/<chat_id>/chat/completions', methods=['POST'])  # noqa: F821
+@validate_request("model", "messages")  # noqa: F821
+@token_required
+def chat_completion_openai_like(tenant_id, chat_id):
+    """
+    OpenAI-like chat completion API that simulates the behavior of OpenAI's completions endpoint.
+    
+    This function allows users to interact with a model and receive responses based on a series of historical messages.
+    If `stream` is set to True (by default), the response will be streamed in chunks, mimicking the OpenAI-style API.
+    Set `stream` to False explicitly, the response will be returned in a single complete answer.
+    Example usage:
+
+    curl -X POST https://ragflow_address.com/api/v1/chats_openai/<chat_id>/chat/completions \
+        -H "Content-Type: application/json" \
+        -H "Authorization: Bearer $RAGFLOW_API_KEY" \
+        -d '{
+            "model": "model",
+            "messages": [{"role": "user", "content": "Say this is a test!"}],
+            "stream": true
+        }'
+
+    Alternatively, you can use Python's `OpenAI` client:
+
+    from openai import OpenAI
+
+    model = "model"
+    client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/<chat_id>")
+    
+    completion = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who you are?"},
+            {"role": "assistant", "content": "I am an AI assistant named..."},
+            {"role": "user", "content": "Can you tell me how to install neovim"},
+        ],
+        stream=True
+    )
+    
+    stream = True
+    if stream:
+        for chunk in completion:
+            print(chunk)
+    else:
+        print(completion.choices[0].message.content)
+    """
+    req = request.json
+
+    messages = req.get("messages", [])
+    # To prevent empty [] input
+    if len(messages) < 1:
+        return get_error_data_result("You have to provide messages")
+
+    dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value)
+    if not dia:
+        return get_error_data_result(f"You don't own the chat {chat_id}")
+    dia = dia[0]
+
+    # Filter system and assistant messages
+    msg = None
+    msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)]
+
+    if req.get("stream", True):
+        # The value for the usage field on all chunks except for the last one will be null.
+        # The usage field on the last chunk contains token usage statistics for the entire request.
+        # The choices field on the last chunk will always be an empty array [].
+        def streamed_respose_generator(chat_id, dia, msg):
+            token_used = 0
+            response = {
+                "id": f"chatcmpl-{chat_id}",
+                "choices": [
+                    {
+                        "delta": {
+                            "content": "",
+                            "role": "assistant",
+                            "function_call": None,
+                            "tool_calls": None
+                        },
+                        "finish_reason": None,
+                        "index": 0,
+                        "logprobs": None
+                    }
+                ],
+                "created": int(time.time()),
+                "model": "model",
+                "object": "chat.completion.chunk",
+                "system_fingerprint": "",
+                "usage": None
+            }
+
+            try:
+                for ans in chat(dia, msg, True):
+                    answer = ans["answer"]
+                    incremental = answer[token_used:]
+                    token_used += len(incremental)
+                    response["choices"][0]["delta"]["content"] = incremental
+                    yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
+            except Exception as e:
+                response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e)
+                yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
+
+            # The last chunck
+            response["choices"][0]["delta"]["content"] = None
+            response["choices"][0]["finish_reason"] = "stop"
+            response["usage"] = {
+                "prompt_tokens": len(msg),
+                "completion_tokens": token_used,
+                "total_tokens": len(msg) + token_used
+            }
+            yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8")
+
+        resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream")
+        resp.headers.add_header("Cache-control", "no-cache")
+        resp.headers.add_header("Connection", "keep-alive")
+        resp.headers.add_header("X-Accel-Buffering", "no")
+        resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8")
+        return resp
+    else:
+        answer = None
+        for ans in chat(dia, msg, False):
+            # focus answer content only
+            answer = ans
+            break
+
+        response  = {
+            "id": f"chatcmpl-{chat_id}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": req.get("model", ""),
+            "usage": {
+                "prompt_tokens": len(messages),
+                "completion_tokens": len(answer),
+                "total_tokens": len(messages) + len(answer),
+                "completion_tokens_details": {
+                    "reasoning_tokens": len(answer),
+                    "accepted_prediction_tokens": len(answer),
+                    "rejected_prediction_tokens": len(answer)
+                }
+            },
+            "choices": [
+                {
+                    "message": {
+                        "role": "assistant",
+                        "content": answer["answer"]
+                    },
+                    "logprobs": None,
+                    "finish_reason": "stop",
+                    "index": 0
+                }
+            ]
+        }
+        return jsonify(response)
+
+
@manager.route('/agents/<agent_id>/completions', methods=['POST'])  # noqa: F821
@token_required
 def agent_completions(tenant_id, agent_id):