diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index cd882c48c..16482c88c 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -29,7 +29,6 @@ from peewee import fn from api.db.db_utils import bulk_insert_into_db from api import settings from api.utils import current_timestamp, get_format_time, get_uuid -from graphrag.general.mind_map_extractor import MindMapExtractor from rag.settings import SVR_QUEUE_NAME from rag.utils.storage_factory import STORAGE_IMPL from rag.nlp import search, rag_tokenizer @@ -584,6 +583,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): cks = [c for c in docs if c["doc_id"] == doc_id] if parser_ids[doc_id] != ParserType.PICTURE.value: + from graphrag.general.mind_map_extractor import MindMapExtractor mindmap = MindMapExtractor(llm_bdl) try: mind_map = trio.run(mindmap, [c["content_with_weight"] for c in docs if c["doc_id"] == doc_id]) diff --git a/graphrag/general/extractor.py b/graphrag/general/extractor.py index c2a9bf127..81e597e4a 100644 --- a/graphrag/general/extractor.py +++ b/graphrag/general/extractor.py @@ -24,6 +24,7 @@ from graphrag.general.graph_prompt import SUMMARIZE_DESCRIPTIONS_PROMPT from graphrag.utils import get_llm_cache, set_llm_cache, handle_single_entity_extraction, \ handle_single_relationship_extraction, split_string_by_multi_markers, flat_uniq_list, chat_limiter from rag.llm.chat_model import Base as CompletionLLM +from rag.prompts import message_fit_in from rag.utils import truncate GRAPH_FIELD_SEP = "" @@ -58,7 +59,8 @@ class Extractor: response = get_llm_cache(self._llm.llm_name, system, hist, conf) if response: return response - response = self._llm.chat(system, hist, conf) + _, system_msg = message_fit_in([{"role": "system", "content": system}], int(self._llm.max_length * 0.97)) + response = self._llm.chat(system_msg[0]["content"], hist, conf) response = re.sub(r".*", "", response, flags=re.DOTALL) if response.find("**ERROR**") >= 0: raise Exception(response)