mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-20 05:00:01 +08:00
### Related Issue:
https://github.com/infiniflow/ragflow/issues/6741
### Environment:
Using nightly version
Commit version:
[[6051abb](6051abb4a3
)]
### Bug Description:
The retrieval function in rag/nlp/search.py returns the original total
chunks number
even after chunks are filtered by similarity_threshold. This creates
inconsistency
between the actual returned chunks and the reported total.
### Changes Made:
Added code to count how many search results actually meet or exceed the
configured similarity threshold
Positioned the calculation after the doc_ids conditional logic to ensure
special cases are handled correctly
Updated the ranks["total"] value to store this filtered count instead of
using the raw search result count
Using NumPy leverages optimized C-level batch operations to optimize
speed
This commit is contained in:
parent
6051abb4a3
commit
d9266ed65a
@ -363,7 +363,6 @@ class Dealer:
|
|||||||
|
|
||||||
sres = self.search(req, [index_name(tid) for tid in tenant_ids],
|
sres = self.search(req, [index_name(tid) for tid in tenant_ids],
|
||||||
kb_ids, embd_mdl, highlight, rank_feature=rank_feature)
|
kb_ids, embd_mdl, highlight, rank_feature=rank_feature)
|
||||||
ranks["total"] = sres.total
|
|
||||||
|
|
||||||
if rerank_mdl and sres.total > 0:
|
if rerank_mdl and sres.total > 0:
|
||||||
sim, tsim, vsim = self.rerank_by_model(rerank_mdl,
|
sim, tsim, vsim = self.rerank_by_model(rerank_mdl,
|
||||||
@ -383,6 +382,9 @@ class Dealer:
|
|||||||
if doc_ids:
|
if doc_ids:
|
||||||
similarity_threshold = 0
|
similarity_threshold = 0
|
||||||
page_size = 30
|
page_size = 30
|
||||||
|
sim_np = np.array(sim)
|
||||||
|
filtered_count = (sim_np >= similarity_threshold).sum()
|
||||||
|
ranks["total"] = int(filtered_count) # Convert from np.int64 to Python int otherwise JSON serializable error
|
||||||
for i in idx:
|
for i in idx:
|
||||||
if sim[i] < similarity_threshold:
|
if sim[i] < similarity_threshold:
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user