mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-20 19:48:35 +08:00
fix bug [ERROR][Exception]: 8 vs. 9 (#6955)
### What problem does this PR solve? Sometimes, the **s** in **chunks (s, a)** is an empty string. This causes the condition **if s and len(a) > 0** in the line **chunks = [(s, a) for s, a in chunks if s and len(a) > 0]** to fail, which changes the length of the new chunks. As a result, the final assertion **assert len(chunks) - end == n_clusters, "{} vs. {}".format(len(chunks) - end, n_clusters)** fails and raises a confusing error like 7 vs. 8 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe):
This commit is contained in:
parent
056ea68e52
commit
e54c0e39b5
@ -77,11 +77,11 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
return optimal_clusters
|
return optimal_clusters
|
||||||
|
|
||||||
async def __call__(self, chunks, random_state, callback=None):
|
async def __call__(self, chunks, random_state, callback=None):
|
||||||
layers = [(0, len(chunks))]
|
|
||||||
start, end = 0, len(chunks)
|
|
||||||
if len(chunks) <= 1:
|
if len(chunks) <= 1:
|
||||||
return []
|
return []
|
||||||
chunks = [(s, a) for s, a in chunks if s and len(a) > 0]
|
chunks = [(s, a) for s, a in chunks if s and len(a) > 0]
|
||||||
|
layers = [(0, len(chunks))]
|
||||||
|
start, end = 0, len(chunks)
|
||||||
|
|
||||||
async def summarize(ck_idx: list[int]):
|
async def summarize(ck_idx: list[int]):
|
||||||
nonlocal chunks
|
nonlocal chunks
|
||||||
|
Loading…
x
Reference in New Issue
Block a user