mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-10 21:39:02 +08:00
Fix: docx get image exception. (#7636)
### What problem does this PR solve? Close #7631 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
e7a6a9e47e
commit
bfe97d896d
@ -60,6 +60,9 @@ class Docx(DocxParser):
|
|||||||
except InvalidImageStreamError:
|
except InvalidImageStreamError:
|
||||||
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
||||||
return None
|
return None
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
logging.info("The recognized image stream appears to be corrupted. Skipping image.")
|
||||||
|
return None
|
||||||
try:
|
try:
|
||||||
image = Image.open(BytesIO(image_blob)).convert('RGB')
|
image = Image.open(BytesIO(image_blob)).convert('RGB')
|
||||||
return image
|
return image
|
||||||
|
@ -44,6 +44,7 @@ def chunks_format(reference):
|
|||||||
"similarity": chunk.get("similarity"),
|
"similarity": chunk.get("similarity"),
|
||||||
"vector_similarity": chunk.get("vector_similarity"),
|
"vector_similarity": chunk.get("vector_similarity"),
|
||||||
"term_similarity": chunk.get("term_similarity"),
|
"term_similarity": chunk.get("term_similarity"),
|
||||||
|
"doc_type": chunk.get("doc_type_kwd"),
|
||||||
}
|
}
|
||||||
for chunk in reference.get("chunks", [])
|
for chunk in reference.get("chunks", [])
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user