From bc7622c0fe33c08014e77d83002e2afda33e8d4a Mon Sep 17 00:00:00 2001 From: execgit Date: Tue, 29 Oct 2024 14:31:47 +0200 Subject: [PATCH 1/2] Avoid logging file contents at level INFO I had problems with document handling in rootless containers. Long documents caused the container to hang. Reducing the verbosity of logging from retrieval.main seemed to fix the issues I was experiencing. --- backend/open_webui/apps/retrieval/main.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index e67d1df23..fe99c566e 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -636,6 +636,25 @@ async def update_query_settings( #################################### +def _get_docs_info( + docs: list[Document] +) -> str: + docs_info = set() + + # Trying to select relevant metadata identifying the document. + for doc in docs: + metadata = getattr(doc, 'metadata', {}) + doc_name = metadata.get('name', '') + if not doc_name: + doc_name = metadata.get('title', '') + if not doc_name: + doc_name = metadata.get('source', '') + if doc_name: + docs_info.add(doc_name) + + return ', '.join(docs_info) + + def save_docs_to_vector_db( docs, collection_name, @@ -644,7 +663,7 @@ def save_docs_to_vector_db( split: bool = True, add: bool = False, ) -> bool: - log.info(f"save_docs_to_vector_db {docs} {collection_name}") + log.info(f"save_docs_to_vector_db: document {_get_docs_info(docs)} {collection_name}") # Check if entries with the same hash (metadata.hash) already exist if metadata and "hash" in metadata: From 03e9add96d58c4cc5253e9981933d5b65c4820b6 Mon Sep 17 00:00:00 2001 From: execgit Date: Tue, 29 Oct 2024 14:33:37 +0200 Subject: [PATCH 2/2] retrieval.utils: omit logging file contents at level INFO --- backend/open_webui/apps/retrieval/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index 153bd804f..1866d6d2f 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -76,7 +76,7 @@ def query_doc( limit=k, ) - log.info(f"query_doc:result {result}") + log.info(f"query_doc:result {result.ids} {result.metadatas}") return result except Exception as e: print(e) @@ -127,7 +127,10 @@ def query_doc_with_hybrid_search( "metadatas": [[d.metadata for d in result]], } - log.info(f"query_doc_with_hybrid_search:result {result}") + log.info( + "query_doc_with_hybrid_search:result " + + f"{result.metadatas} {result.distances}" + ) return result except Exception as e: raise e