From 78856703c4465fd36dcef57d923f0cd2ea6bfae2 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 20 Sep 2024 15:33:38 +0800 Subject: [PATCH] make excel parsing configurable (#2517) ### What problem does this PR solve? #2516 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- rag/app/naive.py | 5 ++++- rag/llm/chat_model.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/rag/app/naive.py b/rag/app/naive.py index 9e0724de5..101adca43 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -221,7 +221,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") excel_parser = ExcelParser() - sections = [(l, "") for l in excel_parser.html(binary) if l] + if parser_config.get("html4excel"): + sections = [(l, "") for l in excel_parser.html(binary, 12) if l] + else: + sections = [(l, "") for l in excel_parser(binary) if l] elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 4126ca773..2d626d1af 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -689,6 +689,7 @@ class BedrockChat(Base): yield num_tokens_from_string(ans) + class GeminiChat(Base): def __init__(self, key, model_name,base_url=None):