diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index f2fd6ab66..486dd5afe 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -15,13 +15,13 @@ # import re import json -from api.db import LLMType -from flask import request, Response +import time +from api.db import LLMType from api.db.services.conversation_service import ConversationService, iframe_completion from api.db.services.conversation_service import completion as rag_completion from api.db.services.canvas_service import completion as agent_completion -from api.db.services.dialog_service import ask +from api.db.services.dialog_service import ask, chat from agent.canvas import Canvas from api.db import StatusEnum from api.db.db_models import APIToken @@ -30,11 +30,12 @@ from api.db.services.canvas_service import UserCanvasService from api.db.services.dialog_service import DialogService from api.db.services.knowledgebase_service import KnowledgebaseService from api.utils import get_uuid -from api.utils.api_utils import get_error_data_result +from api.utils.api_utils import get_error_data_result, validate_request from api.utils.api_utils import get_result, token_required from api.db.services.llm_service import LLMBundle from api.db.services.file_service import FileService +from flask import jsonify, request, Response @manager.route('/chats//sessions', methods=['POST']) # noqa: F821 @token_required @@ -184,6 +185,160 @@ def chat_completion(tenant_id, chat_id): return get_result(data=answer) +@manager.route('chats_openai//chat/completions', methods=['POST']) # noqa: F821 +@validate_request("model", "messages") # noqa: F821 +@token_required +def chat_completion_openai_like(tenant_id, chat_id): + """ + OpenAI-like chat completion API that simulates the behavior of OpenAI's completions endpoint. + + This function allows users to interact with a model and receive responses based on a series of historical messages. + If `stream` is set to True (by default), the response will be streamed in chunks, mimicking the OpenAI-style API. + Set `stream` to False explicitly, the response will be returned in a single complete answer. + Example usage: + + curl -X POST https://ragflow_address.com/api/v1/chats_openai//chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $RAGFLOW_API_KEY" \ + -d '{ + "model": "model", + "messages": [{"role": "user", "content": "Say this is a test!"}], + "stream": true + }' + + Alternatively, you can use Python's `OpenAI` client: + + from openai import OpenAI + + model = "model" + client = OpenAI(api_key="ragflow-api-key", base_url=f"http://ragflow_address/api/v1/chats_openai/") + + completion = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Who you are?"}, + {"role": "assistant", "content": "I am an AI assistant named..."}, + {"role": "user", "content": "Can you tell me how to install neovim"}, + ], + stream=True + ) + + stream = True + if stream: + for chunk in completion: + print(chunk) + else: + print(completion.choices[0].message.content) + """ + req = request.json + + messages = req.get("messages", []) + # To prevent empty [] input + if len(messages) < 1: + return get_error_data_result("You have to provide messages") + + dia = DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value) + if not dia: + return get_error_data_result(f"You don't own the chat {chat_id}") + dia = dia[0] + + # Filter system and assistant messages + msg = None + msg = [m for m in messages if m["role"] != "system" and (m["role"] != "assistant" or msg)] + + if req.get("stream", True): + # The value for the usage field on all chunks except for the last one will be null. + # The usage field on the last chunk contains token usage statistics for the entire request. + # The choices field on the last chunk will always be an empty array []. + def streamed_respose_generator(chat_id, dia, msg): + token_used = 0 + response = { + "id": f"chatcmpl-{chat_id}", + "choices": [ + { + "delta": { + "content": "", + "role": "assistant", + "function_call": None, + "tool_calls": None + }, + "finish_reason": None, + "index": 0, + "logprobs": None + } + ], + "created": int(time.time()), + "model": "model", + "object": "chat.completion.chunk", + "system_fingerprint": "", + "usage": None + } + + try: + for ans in chat(dia, msg, True): + answer = ans["answer"] + incremental = answer[token_used:] + token_used += len(incremental) + response["choices"][0]["delta"]["content"] = incremental + yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") + except Exception as e: + response["choices"][0]["delta"]["content"] = "**ERROR**: " + str(e) + yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") + + # The last chunck + response["choices"][0]["delta"]["content"] = None + response["choices"][0]["finish_reason"] = "stop" + response["usage"] = { + "prompt_tokens": len(msg), + "completion_tokens": token_used, + "total_tokens": len(msg) + token_used + } + yield f"data:{json.dumps(response, ensure_ascii=False)}\n\n".encode("utf-8") + + resp = Response(streamed_respose_generator(chat_id, dia, msg), mimetype="text/event-stream") + resp.headers.add_header("Cache-control", "no-cache") + resp.headers.add_header("Connection", "keep-alive") + resp.headers.add_header("X-Accel-Buffering", "no") + resp.headers.add_header("Content-Type", "text/event-stream; charset=utf-8") + return resp + else: + answer = None + for ans in chat(dia, msg, False): + # focus answer content only + answer = ans + break + + response = { + "id": f"chatcmpl-{chat_id}", + "object": "chat.completion", + "created": int(time.time()), + "model": req.get("model", ""), + "usage": { + "prompt_tokens": len(messages), + "completion_tokens": len(answer), + "total_tokens": len(messages) + len(answer), + "completion_tokens_details": { + "reasoning_tokens": len(answer), + "accepted_prediction_tokens": len(answer), + "rejected_prediction_tokens": len(answer) + } + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": answer["answer"] + }, + "logprobs": None, + "finish_reason": "stop", + "index": 0 + } + ] + } + return jsonify(response) + + @manager.route('/agents//completions', methods=['POST']) # noqa: F821 @token_required def agent_completions(tenant_id, agent_id):