mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 19:49:02 +08:00
Fetch chunk by batches. (#4177)
### What problem does this PR solve? #4173 ### Type of change - [x] Performance Improvement
This commit is contained in:
parent
2cbe064080
commit
31d67c850e
@ -70,7 +70,7 @@ class Dealer:
|
|||||||
pg = int(req.get("page", 1)) - 1
|
pg = int(req.get("page", 1)) - 1
|
||||||
topk = int(req.get("topk", 1024))
|
topk = int(req.get("topk", 1024))
|
||||||
ps = int(req.get("size", topk))
|
ps = int(req.get("size", topk))
|
||||||
offset, limit = pg * ps, (pg + 1) * ps
|
offset, limit = pg * ps, ps
|
||||||
|
|
||||||
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", "position_int",
|
src = req.get("fields", ["docnm_kwd", "content_ltks", "kb_id", "img_id", "title_tks", "important_kwd", "position_int",
|
||||||
"doc_id", "page_num_int", "top_int", "create_timestamp_flt", "knowledge_graph_kwd", "question_kwd", "question_tks",
|
"doc_id", "page_num_int", "top_int", "create_timestamp_flt", "knowledge_graph_kwd", "question_kwd", "question_tks",
|
||||||
@ -380,6 +380,13 @@ class Dealer:
|
|||||||
|
|
||||||
def chunk_list(self, doc_id: str, tenant_id: str, kb_ids: list[str], max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]):
|
def chunk_list(self, doc_id: str, tenant_id: str, kb_ids: list[str], max_count=1024, fields=["docnm_kwd", "content_with_weight", "img_id"]):
|
||||||
condition = {"doc_id": doc_id}
|
condition = {"doc_id": doc_id}
|
||||||
res = self.dataStore.search(fields, [], condition, [], OrderByExpr(), 0, max_count, index_name(tenant_id), kb_ids)
|
res = []
|
||||||
dict_chunks = self.dataStore.getFields(res, fields)
|
bs = 128
|
||||||
return dict_chunks.values()
|
for p in range(0, max_count, bs):
|
||||||
|
res = self.dataStore.search(fields, [], condition, [], OrderByExpr(), p, bs, index_name(tenant_id), kb_ids)
|
||||||
|
dict_chunks = self.dataStore.getFields(res, fields)
|
||||||
|
if dict_chunks:
|
||||||
|
res.extend(dict_chunks.values())
|
||||||
|
if len(dict_chunks.values()) < bs:
|
||||||
|
break
|
||||||
|
return res
|
||||||
|
@ -196,7 +196,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
s = s.sort(*orders)
|
s = s.sort(*orders)
|
||||||
|
|
||||||
if limit > 0:
|
if limit > 0:
|
||||||
s = s[offset:limit]
|
s = s[offset:offset+limit]
|
||||||
q = s.to_dict()
|
q = s.to_dict()
|
||||||
logger.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q))
|
logger.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user