set DLA active for KG (#3386)

### What problem does this PR solve?

### Type of change


- [x] Refactoring
This commit is contained in:
Kevin Hu 2024-11-13 16:59:19 +08:00 committed by GitHub
parent 7ab9715b0e
commit 83c6b1f308
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 3 deletions

View File

@ -537,7 +537,7 @@ def parse():
options.add_argument('--disable-dev-shm-usage')
driver = Chrome(options=options)
driver.get(url)
sections = RAGFlowHtmlParser()(driver.page_source)
sections = RAGFlowHtmlParser()("", binary=driver.page_source)
return get_json_result(data="\n".join(sections))
if 'file' not in request.files:

View File

@ -9,10 +9,10 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
lang="Chinese", callback=None, **kwargs):
parser_config = kwargs.get(
"parser_config", {
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": False})
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
eng = lang.lower() == "english"
parser_config["layout_recognize"] = False
parser_config["layout_recognize"] = True
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
parser_config=parser_config, callback=callback)
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,