force eml file to be parsed by EMAIL (#2615)

### What problem does this PR solve?
#2613
### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2024-09-27 10:29:30 +08:00 committed by GitHub
parent b16f16e19e
commit 297b2d0ac9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 0 deletions

View File

@ -381,6 +381,8 @@ def upload_documents(dataset_id):
doc["parser_id"] = ParserType.AUDIO.value
if re.search(r"\.(ppt|pptx|pages)$", filename):
doc["parser_id"] = ParserType.PRESENTATION.value
if re.search(r"\.(eml)$", filename):
doc["parser_id"] = ParserType.EMAIL.value
DocumentService.insert(doc)
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)

View File

@ -139,6 +139,8 @@ def web_crawl():
doc["parser_id"] = ParserType.AUDIO.value
if re.search(r"\.(ppt|pptx|pages)$", filename):
doc["parser_id"] = ParserType.PRESENTATION.value
if re.search(r"\.(eml)$", filename):
doc["parser_id"] = ParserType.EMAIL.value
DocumentService.insert(doc)
FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
except Exception as e: