From e5f7733b31d0bb624861ddafa34ff5499abcca59 Mon Sep 17 00:00:00 2001 From: Ziyu Huang Date: Mon, 21 Oct 2024 10:06:29 +0800 Subject: [PATCH] Resolves #2905 openai compatible model provider add llama.cpp rerank support (#2906) ### What problem does this PR solve? Resolve #2905 due to the in-consistent of token size, I make it safe to limit 500 in code, since there is no config param to control my llama.cpp run set -ub to 1024: ${llama_path}/bin/llama-server --host 0.0.0.0 --port 9901 -ub 1024 -ngl 99 -m $gguf_file --reranking "$@" ### Type of change - [x] New Feature (non-breaking change which adds functionality) Here is my test Ragflow use llama.cpp ``` lot update_slots: id 0 | task 458 | prompt done, n_past = 416, n_tokens = 416 slot release: id 0 | task 458 | stop processing: n_past = 416, truncated = 0 slot launch_slot_: id 0 | task 459 | processing task slot update_slots: id 0 | task 459 | tokenizing prompt, len = 2 slot update_slots: id 0 | task 459 | prompt tokenized, n_ctx_slot = 8192, n_keep = 0, n_prompt_tokens = 111 slot update_slots: id 0 | task 459 | kv cache rm [0, end) slot update_slots: id 0 | task 459 | prompt processing progress, n_past = 111, n_tokens = 111, progress = 1.000000 slot update_slots: id 0 | task 459 | prompt done, n_past = 111, n_tokens = 111 slot release: id 0 | task 459 | stop processing: n_past = 111, truncated = 0 srv update_slots: all slots are idle request: POST /rerank 172.23.0.4 200 ``` --- rag/llm/rerank_model.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index 6b9cbae7b..7ef8bcf5d 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -242,10 +242,46 @@ class LmStudioRerank(Base): class OpenAI_APIRerank(Base): def __init__(self, key, model_name, base_url): - pass + if base_url.find("/rerank") == -1: + self.base_url = urljoin(base_url, "/rerank") + else: + self.base_url = base_url + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {key}" + } + self.model_name = model_name def similarity(self, query: str, texts: list): - raise NotImplementedError("The api has not been implement") + # noway to config Ragflow , use fix setting + texts = [truncate(t, 500) for t in texts] + data = { + "model": self.model_name, + "query": query, + "documents": texts, + "top_n": len(texts), + } + token_count = 0 + for t in texts: + token_count += num_tokens_from_string(t) + res = requests.post(self.base_url, headers=self.headers, json=data).json() + rank = np.zeros(len(texts), dtype=float) + if 'results' not in res: + raise ValueError("response not contains results\n" + str(res)) + for d in res["results"]: + rank[d["index"]] = d["relevance_score"] + + # Normalize the rank values to the range 0 to 1 + min_rank = np.min(rank) + max_rank = np.max(rank) + + # Avoid division by zero if all ranks are identical + if max_rank - min_rank != 0: + rank = (rank - min_rank) / (max_rank - min_rank) + else: + rank = np.zeros_like(rank) + + return rank, token_count class CoHereRerank(Base):