From e7dd487779795c9ec9c01585264df2a2bb3d6f25 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Wed, 18 Sep 2024 09:22:14 +0800 Subject: [PATCH] fix ppt file from filemanager error (#2470) ### What problem does this PR solve? #2467 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/file2document_app.py | 1 + api/db/services/file_service.py | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/api/apps/file2document_app.py b/api/apps/file2document_app.py index 0e7301bac..124f9c57e 100644 --- a/api/apps/file2document_app.py +++ b/api/apps/file2document_app.py @@ -85,6 +85,7 @@ def convert(): "location": file.location, "size": file.size }) + FileService.set_constant_parser(doc, file.name) file2document = File2DocumentService.insert({ "id": get_uuid(), "file_id": id, diff --git a/api/db/services/file_service.py b/api/db/services/file_service.py index 3e8a80420..a81f0c6d7 100644 --- a/api/db/services/file_service.py +++ b/api/db/services/file_service.py @@ -366,14 +366,7 @@ class FileService(CommonService): "size": len(blob), "thumbnail": thumbnail(filename, blob) } - if doc["type"] == FileType.VISUAL: - doc["parser_id"] = ParserType.PICTURE.value - if doc["type"] == FileType.AURAL: - doc["parser_id"] = ParserType.AUDIO.value - if re.search(r"\.(ppt|pptx|pages)$", filename): - doc["parser_id"] = ParserType.PRESENTATION.value - if re.search(r"\.(eml)$", filename): - doc["parser_id"] = ParserType.EMAIL.value + self.set_constant_parser(doc, filename) DocumentService.insert(doc) FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id) @@ -381,4 +374,15 @@ class FileService(CommonService): except Exception as e: err.append(file.filename + ": " + str(e)) - return err, files \ No newline at end of file + return err, files + + @staticmethod + def set_constant_parser(doc, filename): + if doc["type"] == FileType.VISUAL: + doc["parser_id"] = ParserType.PICTURE.value + if doc["type"] == FileType.AURAL: + doc["parser_id"] = ParserType.AUDIO.value + if re.search(r"\.(ppt|pptx|pages)$", filename): + doc["parser_id"] = ParserType.PRESENTATION.value + if re.search(r"\.(eml)$", filename): + doc["parser_id"] = ParserType.EMAIL.value \ No newline at end of file