From e54c0e39b58ebdc15cbf6ad4db88c2f4394ca315 Mon Sep 17 00:00:00 2001 From: dylan Date: Fri, 11 Apr 2025 17:01:49 +0800 Subject: [PATCH] fix bug [ERROR][Exception]: 8 vs. 9 (#6955) ### What problem does this PR solve? Sometimes, the **s** in **chunks (s, a)** is an empty string. This causes the condition **if s and len(a) > 0** in the line **chunks = [(s, a) for s, a in chunks if s and len(a) > 0]** to fail, which changes the length of the new chunks. As a result, the final assertion **assert len(chunks) - end == n_clusters, "{} vs. {}".format(len(chunks) - end, n_clusters)** fails and raises a confusing error like 7 vs. 8 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): --- rag/raptor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rag/raptor.py b/rag/raptor.py index 99155e19..d09ea571 100644 --- a/rag/raptor.py +++ b/rag/raptor.py @@ -77,11 +77,11 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval: return optimal_clusters async def __call__(self, chunks, random_state, callback=None): - layers = [(0, len(chunks))] - start, end = 0, len(chunks) if len(chunks) <= 1: return [] chunks = [(s, a) for s, a in chunks if s and len(a) > 0] + layers = [(0, len(chunks))] + start, end = 0, len(chunks) async def summarize(ck_idx: list[int]): nonlocal chunks