make excel parsing configurable (#2517)

### What problem does this PR solve?

#2516

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Kevin Hu 2024-09-20 15:33:38 +08:00 committed by GitHub
parent 099c37ba95
commit 78856703c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 1 deletions

View File

@ -221,7 +221,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE): elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.") callback(0.1, "Start to parse.")
excel_parser = ExcelParser() excel_parser = ExcelParser()
sections = [(l, "") for l in excel_parser.html(binary) if l] if parser_config.get("html4excel"):
sections = [(l, "") for l in excel_parser.html(binary, 12) if l]
else:
sections = [(l, "") for l in excel_parser(binary) if l]
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE): elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.") callback(0.1, "Start to parse.")

View File

@ -689,6 +689,7 @@ class BedrockChat(Base):
yield num_tokens_from_string(ans) yield num_tokens_from_string(ans)
class GeminiChat(Base): class GeminiChat(Base):
def __init__(self, key, model_name,base_url=None): def __init__(self, key, model_name,base_url=None):