mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 11:49:05 +08:00
set DLA active for KG (#3386)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
parent
7ab9715b0e
commit
83c6b1f308
@ -537,7 +537,7 @@ def parse():
|
|||||||
options.add_argument('--disable-dev-shm-usage')
|
options.add_argument('--disable-dev-shm-usage')
|
||||||
driver = Chrome(options=options)
|
driver = Chrome(options=options)
|
||||||
driver.get(url)
|
driver.get(url)
|
||||||
sections = RAGFlowHtmlParser()(driver.page_source)
|
sections = RAGFlowHtmlParser()("", binary=driver.page_source)
|
||||||
return get_json_result(data="\n".join(sections))
|
return get_json_result(data="\n".join(sections))
|
||||||
|
|
||||||
if 'file' not in request.files:
|
if 'file' not in request.files:
|
||||||
|
@ -9,10 +9,10 @@ def chunk(filename, binary, tenant_id, from_page=0, to_page=100000,
|
|||||||
lang="Chinese", callback=None, **kwargs):
|
lang="Chinese", callback=None, **kwargs):
|
||||||
parser_config = kwargs.get(
|
parser_config = kwargs.get(
|
||||||
"parser_config", {
|
"parser_config", {
|
||||||
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": False})
|
"chunk_token_num": 512, "delimiter": "\n!?。;!?", "layout_recognize": True})
|
||||||
eng = lang.lower() == "english"
|
eng = lang.lower() == "english"
|
||||||
|
|
||||||
parser_config["layout_recognize"] = False
|
parser_config["layout_recognize"] = True
|
||||||
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
sections = naive.chunk(filename, binary, from_page=from_page, to_page=to_page, section_only=True,
|
||||||
parser_config=parser_config, callback=callback)
|
parser_config=parser_config, callback=callback)
|
||||||
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|
chunks = build_knowledge_graph_chunks(tenant_id, sections, callback,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user