From 6307adfba1048c01a9954723f8d16b02fe984470 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Mon, 25 Mar 2024 23:47:08 -0700 Subject: [PATCH] feat: better error handling --- backend/apps/rag/main.py | 93 +++++++++++++++++++++++----------------- backend/constants.py | 2 + 2 files changed, 55 insertions(+), 40 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 1e50ef20b..d87f7bc73 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -115,6 +115,7 @@ class CollectionNameForm(BaseModel): class StoreWebForm(CollectionNameForm): url: str + @app.get("/") async def get_status(): return { @@ -297,13 +298,18 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)): def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool: + text_splitter = RecursiveCharacterTextSplitter( chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP, add_start_index=True, ) docs = text_splitter.split_documents(data) - return store_docs_in_vector_db(docs, collection_name, overwrite) + + if len(docs) > 0: + return store_docs_in_vector_db(docs, collection_name, overwrite), None + else: + raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) def store_text_in_vector_db( @@ -319,6 +325,7 @@ def store_text_in_vector_db( def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool: + texts = [doc.page_content for doc in docs] metadatas = [doc.metadata for doc in docs] @@ -455,19 +462,21 @@ def store_doc( loader, known_type = get_loader(file.filename, file.content_type, file_path) data = loader.load() - result = store_data_in_vector_db(data, collection_name) - if result: - return { - "status": True, - "collection_name": collection_name, - "filename": filename, - "known_type": known_type, - } - else: + try: + result = store_data_in_vector_db(data, collection_name) + + if result: + return { + "status": True, + "collection_name": collection_name, + "filename": filename, + "known_type": known_type, + } + except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=ERROR_MESSAGES.DEFAULT(), + detail=e, ) except Exception as e: log.exception(e) @@ -532,38 +541,42 @@ def scan_docs_dir(user=Depends(get_admin_user)): ) data = loader.load() - result = store_data_in_vector_db(data, collection_name) + try: + result = store_data_in_vector_db(data, collection_name) - if result: - sanitized_filename = sanitize_filename(filename) - doc = Documents.get_doc_by_name(sanitized_filename) + if result: + sanitized_filename = sanitize_filename(filename) + doc = Documents.get_doc_by_name(sanitized_filename) - if doc == None: - doc = Documents.insert_new_doc( - user.id, - DocumentForm( - **{ - "name": sanitized_filename, - "title": filename, - "collection_name": collection_name, - "filename": filename, - "content": ( - json.dumps( - { - "tags": list( - map( - lambda name: {"name": name}, - tags, + if doc == None: + doc = Documents.insert_new_doc( + user.id, + DocumentForm( + **{ + "name": sanitized_filename, + "title": filename, + "collection_name": collection_name, + "filename": filename, + "content": ( + json.dumps( + { + "tags": list( + map( + lambda name: {"name": name}, + tags, + ) ) - ) - } - ) - if len(tags) - else "{}" - ), - } - ), - ) + } + ) + if len(tags) + else "{}" + ), + } + ), + ) + except Exception as e: + print(e) + pass except Exception as e: log.exception(e) diff --git a/backend/constants.py b/backend/constants.py index 42c5c85eb..8bcdd0789 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -60,3 +60,5 @@ class ERROR_MESSAGES(str, Enum): MODEL_NOT_FOUND = lambda name="": f"Model '{name}' was not found" OPENAI_NOT_FOUND = lambda name="": f"OpenAI API was not found" OLLAMA_NOT_FOUND = "WebUI could not connect to Ollama" + + EMPTY_CONTENT = "The content provided is empty. Please ensure that there is text or data present before proceeding."