Resolves #2905 openai compatible model provider add llama.cpp rerank support (#2906)

### What problem does this PR solve? Resolve #2905 due to the in-consistent of token size, I make it safe to limit 500 in code, since there is no config param to control my llama.cpp run set -ub to 1024: ${llama_path}/bin/llama-server --host 0.0.0.0 --port 9901 -ub 1024 -ngl 99 -m $gguf_file --reranking "$@" ### Type of change - [x] New Feature (non-breaking change which adds functionality) Here is my test Ragflow use llama.cpp ``` lot update_slots: id 0 | task 458 | prompt done, n_past = 416, n_tokens = 416 slot release: id 0 | task 458 | stop processing: n_past = 416, truncated = 0 slot launch_slot_: id 0 | task 459 | processing task slot update_slots: id 0 | task 459 | tokenizing prompt, len = 2 slot update_slots: id 0 | task 459 | prompt tokenized, n_ctx_slot = 8192, n_keep = 0, n_prompt_tokens = 111 slot update_slots: id 0 | task 459 | kv cache rm [0, end) slot update_slots: id 0 | task 459 | prompt processing progress, n_past = 111, n_tokens = 111, progress = 1.000000 slot update_slots: id 0 | task 459 | prompt done, n_past = 111, n_tokens = 111 slot release: id 0 | task 459 | stop processing: n_past = 111, truncated = 0 srv update_slots: all slots are idle request: POST /rerank 172.23.0.4 200 ```
2025-08-12 07:19:04 +08:00 · 2024-10-21 10:06:29 +08:00 · 2024-10-21 10:06:29 +08:00 · e5f7733b31
commit e5f7733b31
parent 5aec1e3e17
1 changed files with 38 additions and 2 deletions
--- a/rag/llm/rerank_model.py
+++ b/rag/llm/rerank_model.py
@ -242,10 +242,46 @@ class LmStudioRerank(Base):

 class OpenAI_APIRerank(Base):
    def __init__(self, key, model_name, base_url):
-        pass
+        if base_url.find("/rerank") == -1:
+            self.base_url = urljoin(base_url, "/rerank")
+        else:
+            self.base_url = base_url
+        self.headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {key}"
+        }
+        self.model_name = model_name

    def similarity(self, query: str, texts: list):
-        raise NotImplementedError("The api has not been implement")
+        # noway to config Ragflow , use fix setting
+        texts = [truncate(t, 500) for t in texts]
+        data = {
+            "model": self.model_name,
+            "query": query,
+            "documents": texts,
+            "top_n": len(texts),
+        }
+        token_count = 0
+        for t in texts:
+            token_count += num_tokens_from_string(t)
+        res = requests.post(self.base_url, headers=self.headers, json=data).json()
+        rank = np.zeros(len(texts), dtype=float)
+        if 'results' not in res:
+            raise ValueError("response not contains results\n" + str(res))
+        for d in res["results"]:
+            rank[d["index"]] = d["relevance_score"]
+
+        # Normalize the rank values to the range 0 to 1
+        min_rank = np.min(rank)
+        max_rank = np.max(rank)
+
+        # Avoid division by zero if all ranks are identical
+        if max_rank - min_rank != 0:
+            rank = (rank - min_rank) / (max_rank - min_rank)
+        else:
+            rank = np.zeros_like(rank)
+
+        return rank, token_count


 class CoHereRerank(Base):