diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 761c1c756b..f23ed0f2b5 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -7,6 +7,7 @@ import time import uuid from typing import Optional, List, cast +from flask import current_app, Flask from flask_login import current_user from langchain.schema import Document from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter @@ -522,7 +523,8 @@ class IndexingRunner: sub_documents = all_documents[i:i + 10] for doc in sub_documents: document_format_thread = threading.Thread(target=self.format_qa_document, kwargs={ - 'tenant_id': tenant_id, 'document_node': doc, 'all_qa_documents': all_qa_documents}) + 'flask_app': current_app._get_current_object(), 'tenant_id': tenant_id, 'document_node': doc, + 'all_qa_documents': all_qa_documents}) threads.append(document_format_thread) document_format_thread.start() for thread in threads: @@ -530,28 +532,29 @@ class IndexingRunner: return all_qa_documents return all_documents - def format_qa_document(self, tenant_id: str, document_node, all_qa_documents): + def format_qa_document(self, flask_app: Flask, tenant_id: str, document_node, all_qa_documents): format_documents = [] if document_node.page_content is None or not document_node.page_content.strip(): return - try: - # qa model document - response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content) - document_qa_list = self.format_split_text(response) - qa_documents = [] - for result in document_qa_list: - qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy()) - doc_id = str(uuid.uuid4()) - hash = helper.generate_text_hash(result['question']) - qa_document.metadata['answer'] = result['answer'] - qa_document.metadata['doc_id'] = doc_id - qa_document.metadata['doc_hash'] = hash - qa_documents.append(qa_document) - format_documents.extend(qa_documents) - except Exception as e: - logging.exception(e) + with flask_app.app_context(): + try: + # qa model document + response = LLMGenerator.generate_qa_document(tenant_id, document_node.page_content) + document_qa_list = self.format_split_text(response) + qa_documents = [] + for result in document_qa_list: + qa_document = Document(page_content=result['question'], metadata=document_node.metadata.copy()) + doc_id = str(uuid.uuid4()) + hash = helper.generate_text_hash(result['question']) + qa_document.metadata['answer'] = result['answer'] + qa_document.metadata['doc_id'] = doc_id + qa_document.metadata['doc_hash'] = hash + qa_documents.append(qa_document) + format_documents.extend(qa_documents) + except Exception as e: + logging.exception(e) - all_qa_documents.extend(format_documents) + all_qa_documents.extend(format_documents) def _split_to_documents_for_estimate(self, text_docs: List[Document], splitter: TextSplitter,