Fix: improve retrieval API (#6744)

### What problem does this PR solve?

Get the highlight parameter from the request to keep consistency with
the document

> 
- Method: POST
- URL: `/api/v1/retrieval`
- Headers:
  - `'content-Type: application/json'`
  - `'Authorization: Bearer <YOUR_API_KEY>'`
- Body:
  - `"question"`: `string`  
  - `"dataset_ids"`: `list[string]`  
  - `"document_ids"`: `list[string]`
  - `"page"`: `integer`  
  - `"page_size"`: `integer`  
  - `"similarity_threshold"`: `float`  
  - `"vector_similarity_weight"`: `float`  
  - `"top_k"`: `integer`  
  - `"rerank_id"`: `string`  
  - `"keyword"`: `boolean`  
  - `"highlight"`: `boolean`
>

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Stephen Hu 2025-04-24 09:29:00 +08:00 committed by GitHub
parent 4e31eea55f
commit 0b460a9a12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -822,6 +822,7 @@ def retrieval():
similarity_threshold = float(req.get("similarity_threshold", 0.2))
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
top = int(req.get("top_k", 1024))
highlight = bool(req.get("highlight", False))
try:
kbs = KnowledgebaseService.get_by_ids(kb_ids)
@ -842,7 +843,7 @@ def retrieval():
question += keyword_extraction(chat_mdl, question)
ranks = settings.retrievaler.retrieval(question, embd_mdl, kbs[0].tenant_id, kb_ids, page, size,
similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl,
doc_ids, rerank_mdl=rerank_mdl, highlight= highlight,
rank_feature=label_question(question, kbs))
for c in ranks["chunks"]:
c.pop("vector", None)