diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py index fb9ed5bd0..0a4ab0f0e 100644 --- a/api/db/services/dialog_service.py +++ b/api/db/services/dialog_service.py @@ -859,7 +859,7 @@ Output: def reasoning(chunk_info: dict, question: str, chat_mdl: LLMBundle, embd_mdl: LLMBundle, - tenant_ids: list[str], kb_ids: list[str], prompt_config, MAX_SEARCH_LIMIT: int = 3, + tenant_ids: list[str], kb_ids: list[str], prompt_config, MAX_SEARCH_LIMIT: int = 6, top_n: int = 5, similarity_threshold: float = 0.4, vector_similarity_weight: float = 0.3): BEGIN_SEARCH_QUERY = "<|begin_search_query|>" END_SEARCH_QUERY = "<|end_search_query|>" diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 6caa28b0c..443075153 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -130,12 +130,12 @@ class DefaultRerank(Base): self._dynamic_batch_size = old_dynamic_batch_size return np.array(res) - def _compute_batch_scores(self, batch_pairs, max_length=None): if max_length is None: - max_length = self._model.max_length - scores = self._model.compute_score(batch_pairs, max_length=max_length) + scores = self._model.compute_score(batch_pairs) + else: + scores = self._model.compute_score(batch_pairs, max_length=max_length) scores = sigmoid(np.array(scores)).tolist() return scores