diff --git a/rag/app/naive.py b/rag/app/naive.py index e2e4a6a45..28e3bbbcc 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -60,6 +60,9 @@ class Docx(DocxParser): except InvalidImageStreamError: logging.info("The recognized image stream appears to be corrupted. Skipping image.") return None + except UnicodeDecodeError: + logging.info("The recognized image stream appears to be corrupted. Skipping image.") + return None try: image = Image.open(BytesIO(image_blob)).convert('RGB') return image diff --git a/rag/prompts.py b/rag/prompts.py index 4a61de557..cb1e1108b 100644 --- a/rag/prompts.py +++ b/rag/prompts.py @@ -44,6 +44,7 @@ def chunks_format(reference): "similarity": chunk.get("similarity"), "vector_similarity": chunk.get("vector_similarity"), "term_similarity": chunk.get("term_similarity"), + "doc_type": chunk.get("doc_type_kwd"), } for chunk in reference.get("chunks", []) ]