mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-11 16:48:58 +08:00
fix parser for pptx of which files are from filemanager (#2482)
### What problem does this PR solve? #2467 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
2b0dc01a88
commit
2324b88579
@ -77,7 +77,7 @@ def convert():
|
||||
doc = DocumentService.insert({
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
"parser_id": kb.parser_id,
|
||||
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
|
||||
"parser_config": kb.parser_config,
|
||||
"created_by": current_user.id,
|
||||
"type": file.type,
|
||||
@ -85,7 +85,6 @@ def convert():
|
||||
"location": file.location,
|
||||
"size": file.size
|
||||
})
|
||||
FileService.set_constant_parser(doc, file.name)
|
||||
file2document = File2DocumentService.insert({
|
||||
"id": get_uuid(),
|
||||
"file_id": id,
|
||||
|
@ -357,7 +357,7 @@ class FileService(CommonService):
|
||||
doc = {
|
||||
"id": get_uuid(),
|
||||
"kb_id": kb.id,
|
||||
"parser_id": kb.parser_id,
|
||||
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
|
||||
"parser_config": kb.parser_config,
|
||||
"created_by": user_id,
|
||||
"type": filetype,
|
||||
@ -366,7 +366,6 @@ class FileService(CommonService):
|
||||
"size": len(blob),
|
||||
"thumbnail": thumbnail(filename, blob)
|
||||
}
|
||||
self.set_constant_parser(doc, filename)
|
||||
DocumentService.insert(doc)
|
||||
|
||||
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
|
||||
@ -377,12 +376,13 @@ class FileService(CommonService):
|
||||
return err, files
|
||||
|
||||
@staticmethod
|
||||
def set_constant_parser(doc, filename):
|
||||
if doc["type"] == FileType.VISUAL:
|
||||
doc["parser_id"] = ParserType.PICTURE.value
|
||||
if doc["type"] == FileType.AURAL:
|
||||
doc["parser_id"] = ParserType.AUDIO.value
|
||||
def get_parser(doc_type, filename, default):
|
||||
if doc_type == FileType.VISUAL:
|
||||
return ParserType.PICTURE.value
|
||||
if doc_type == FileType.AURAL:
|
||||
return ParserType.AUDIO.value
|
||||
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
||||
doc["parser_id"] = ParserType.PRESENTATION.value
|
||||
return ParserType.PRESENTATION.value
|
||||
if re.search(r"\.(eml)$", filename):
|
||||
doc["parser_id"] = ParserType.EMAIL.value
|
||||
return ParserType.EMAIL.value
|
||||
return default
|
Loading…
x
Reference in New Issue
Block a user