From e05cdc2f9c6d7e3041f6f91e56c747cff8ec5497 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Thu, 13 Mar 2025 10:47:58 +0800 Subject: [PATCH] Fix: encode detect error. (#6006) ### What problem does this PR solve? #5967 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index eb14adeb0..bf6b25c04 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -53,7 +53,8 @@ all_codecs = [ def find_codec(blob): detected = chardet.detect(blob[:1024]) if detected['confidence'] > 0.5: - return detected['encoding'] + if detected['encoding'] == "ascii": + return "utf-8" for c in all_codecs: try: