mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-30 09:15:10 +08:00
Fix: Improve First Chunk Size (#7806)
### What problem does this PR solve? https://github.com/infiniflow/ragflow/issues/7790 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
e6cd799d8a
commit
db4371c745
@ -524,7 +524,7 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
|||||||
if tnum < 8:
|
if tnum < 8:
|
||||||
pos = ""
|
pos = ""
|
||||||
# Ensure that the length of the merged chunk does not exceed chunk_token_num
|
# Ensure that the length of the merged chunk does not exceed chunk_token_num
|
||||||
if tk_nums[-1] > chunk_token_num:
|
if cks[-1] == "" or tk_nums[-1] > chunk_token_num:
|
||||||
|
|
||||||
if t.find(pos) < 0:
|
if t.find(pos) < 0:
|
||||||
t += pos
|
t += pos
|
||||||
@ -560,7 +560,7 @@ def naive_merge_with_images(texts, images, chunk_token_num=128, delimiter="\n。
|
|||||||
if tnum < 8:
|
if tnum < 8:
|
||||||
pos = ""
|
pos = ""
|
||||||
# Ensure that the length of the merged chunk does not exceed chunk_token_num
|
# Ensure that the length of the merged chunk does not exceed chunk_token_num
|
||||||
if tk_nums[-1] > chunk_token_num:
|
if cks[-1] == "" or tk_nums[-1] > chunk_token_num:
|
||||||
if t.find(pos) < 0:
|
if t.find(pos) < 0:
|
||||||
t += pos
|
t += pos
|
||||||
cks.append(t)
|
cks.append(t)
|
||||||
@ -627,7 +627,7 @@ def naive_merge_docx(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
|||||||
tnum = num_tokens_from_string(t)
|
tnum = num_tokens_from_string(t)
|
||||||
if tnum < 8:
|
if tnum < 8:
|
||||||
pos = ""
|
pos = ""
|
||||||
if tk_nums[-1] > chunk_token_num:
|
if cks[-1] == "" or tk_nums[-1] > chunk_token_num:
|
||||||
if t.find(pos) < 0:
|
if t.find(pos) < 0:
|
||||||
t += pos
|
t += pos
|
||||||
cks.append(t)
|
cks.append(t)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user