From 722545e5e043b83f9a956909cdaa4878915166f8 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Thu, 26 Dec 2024 16:08:17 +0800 Subject: [PATCH] Fix bugs (#4241) ### What problem does this PR solve? 1. Refactor error message 2. Fix knowledges are created on ES and can't be found in Infinity. The document chunk fetch error. ### Type of change - [x] Fix bug - [x] Refactoring --------- Signed-off-by: jinhai --- api/apps/api_app.py | 24 ++++++++++++------------ api/apps/canvas_app.py | 2 +- api/apps/conversation_app.py | 2 +- api/apps/sdk/session.py | 4 ++-- api/utils/api_utils.py | 2 +- rag/utils/infinity_conn.py | 20 ++++++++++++++------ 6 files changed, 31 insertions(+), 23 deletions(-) diff --git a/api/apps/api_app.py b/api/apps/api_app.py index b38c62f88..095c5b231 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -141,7 +141,7 @@ def set_conversation(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) try: if objs[0].source == "agent": e, cvs = UserCanvasService.get_by_id(objs[0].dialog_id) @@ -182,7 +182,7 @@ def completion(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) req = request.json e, conv = API4ConversationService.get_by_id(req["conversation_id"]) if not e: @@ -348,7 +348,7 @@ def get(conversation_id): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) try: e, conv = API4ConversationService.get_by_id(conversation_id) @@ -357,7 +357,7 @@ def get(conversation_id): conv = conv.to_dict() if token != APIToken.query(dialog_id=conv['dialog_id'])[0].token: - return get_json_result(data=False, message='Token is not valid for this conversation_id!"', + return get_json_result(data=False, message='Authentication error: API key is invalid for this conversation_id!"', code=settings.RetCode.AUTHENTICATION_ERROR) for referenct_i in conv['reference']: @@ -379,7 +379,7 @@ def upload(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) kb_name = request.form.get("kb_name").strip() tenant_id = objs[0].tenant_id @@ -491,7 +491,7 @@ def upload_parse(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) if 'file' not in request.files: return get_json_result( @@ -514,7 +514,7 @@ def list_chunks(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) req = request.json @@ -554,7 +554,7 @@ def list_kb_docs(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) req = request.json tenant_id = objs[0].tenant_id @@ -594,7 +594,7 @@ def docinfos(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) req = request.json doc_ids = req["doc_ids"] docs = DocumentService.get_by_ids(doc_ids) @@ -608,7 +608,7 @@ def document_rm(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) tenant_id = objs[0].tenant_id req = request.json @@ -670,7 +670,7 @@ def completion_faq(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) e, conv = API4ConversationService.get_by_id(req["conversation_id"]) if not e: @@ -809,7 +809,7 @@ def retrieval(): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!"', code=settings.RetCode.AUTHENTICATION_ERROR) + data=False, message='Authentication error: API key is invalid!"', code=settings.RetCode.AUTHENTICATION_ERROR) req = request.json kb_ids = req.get("kb_id", []) diff --git a/api/apps/canvas_app.py b/api/apps/canvas_app.py index 335570c12..bd4ae2b77 100644 --- a/api/apps/canvas_app.py +++ b/api/apps/canvas_app.py @@ -94,7 +94,7 @@ def getsse(canvas_id): token = token[1] objs = APIToken.query(beta=token) if not objs: - return get_data_error_result(message='Token is not valid!"') + return get_data_error_result(message='Authentication error: API key is invalid!"') e, c = UserCanvasService.get_by_id(canvas_id) if not e: return get_data_error_result(message="canvas not found.") diff --git a/api/apps/conversation_app.py b/api/apps/conversation_app.py index dd7090e22..222a3179b 100644 --- a/api/apps/conversation_app.py +++ b/api/apps/conversation_app.py @@ -126,7 +126,7 @@ def getsse(dialog_id): token = token[1] objs = APIToken.query(beta=token) if not objs: - return get_data_error_result(message='Token is not valid!"') + return get_data_error_result(message='Authentication error: API key is invalid!"') try: e, conv = DialogService.get_by_id(dialog_id) if not e: diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index db00d95b9..6c89dc99d 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -405,7 +405,7 @@ def chatbot_completions(dialog_id): token = token[1] objs = APIToken.query(beta=token) if not objs: - return get_error_data_result(message='Token is not valid!"') + return get_error_data_result(message='Authentication error: API key is invalid!"') if "quote" not in req: req["quote"] = False @@ -432,7 +432,7 @@ def agent_bot_completions(agent_id): token = token[1] objs = APIToken.query(beta=token) if not objs: - return get_error_data_result(message='Token is not valid!"') + return get_error_data_result(message='Authentication error: API key is invalid!"') if "quote" not in req: req["quote"] = False diff --git a/api/utils/api_utils.py b/api/utils/api_utils.py index 2b9fcff48..062cf9e5a 100644 --- a/api/utils/api_utils.py +++ b/api/utils/api_utils.py @@ -293,7 +293,7 @@ def token_required(func): objs = APIToken.query(token=token) if not objs: return get_json_result( - data=False, message='Token is not valid!', code=settings.RetCode.AUTHENTICATION_ERROR + data=False, message='Authentication error: API key is invalid!', code=settings.RetCode.AUTHENTICATION_ERROR ) kwargs['tenant_id'] = objs[0].tenant_id return func(*args, **kwargs) diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index d5b175290..034e53785 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -26,7 +26,8 @@ from rag.utils.doc_store_conn import ( logger = logging.getLogger('ragflow.infinity_conn') -def equivalent_condition_to_str(condition: dict) -> str|None: + +def equivalent_condition_to_str(condition: dict) -> str | None: assert "_id" not in condition cond = list() for k, v in condition.items(): @@ -59,12 +60,13 @@ def concat_dataframes(df_list: list[pl.DataFrame], selectFields: list[str]) -> p return pl.concat(df_list) schema = dict() for field_name in selectFields: - if field_name == 'score()': # Workaround: fix schema is changed to score() + if field_name == 'score()': # Workaround: fix schema is changed to score() schema['SCORE'] = str else: schema[field_name] = str return pl.DataFrame(schema=schema) + @singleton class InfinityConnection(DocStoreConnection): def __init__(self): @@ -80,7 +82,7 @@ class InfinityConnection(DocStoreConnection): connPool = ConnectionPool(infinity_uri) inf_conn = connPool.get_conn() res = inf_conn.show_current_node() - if res.error_code == ErrorCode.OK and res.server_status=="started": + if res.error_code == ErrorCode.OK and res.server_status == "started": self._migrate_db(inf_conn) self.connPool = connPool connPool.release_conn(inf_conn) @@ -360,7 +362,13 @@ class InfinityConnection(DocStoreConnection): for knowledgebaseId in knowledgebaseIds: table_name = f"{indexName}_{knowledgebaseId}" table_list.append(table_name) - table_instance = db_instance.get_table(table_name) + table_instance = None + try: + table_instance = db_instance.get_table(table_name) + except Exception: + logger.warning( + f"Table not found: {table_name}, this knowledge base isn't created in Infinity. Maybe it is created in other document engine.") + continue kb_res, _ = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl() logger.debug(f"INFINITY get table: {str(table_list)}, result: {str(kb_res)}") df_list.append(kb_res) @@ -403,7 +411,7 @@ class InfinityConnection(DocStoreConnection): d[k] = "###".join(v) elif k == 'kb_id': if isinstance(d[k], list): - d[k] = d[k][0] # since d[k] is a list, but we need a str + d[k] = d[k][0] # since d[k] is a list, but we need a str elif k == "position_int": assert isinstance(v, list) arr = [num for row in v for num in row] @@ -440,7 +448,7 @@ class InfinityConnection(DocStoreConnection): newValue[k] = " ".join(v) elif k == 'kb_id': if isinstance(newValue[k], list): - newValue[k] = newValue[k][0] # since d[k] is a list, but we need a str + newValue[k] = newValue[k][0] # since d[k] is a list, but we need a str elif k == "position_int": assert isinstance(v, list) arr = [num for row in v for num in row]