Added LocalAI support for rerank models (#3446)

### What problem does this PR solve?

Hi there!
LocalAI added support of rerank models
https://localai.io/features/reranker/

I've implemented LocalAIRerank class (typically copied it from
OpenAI_APIRerank class).
Also, LocalAI model response with 500 error code if len of "documents"
is less than 2 in similarity check.
So I've added the second "document" on RERANK model connection check in
`api/apps/llm_app.py`.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
This commit is contained in:
shizzgar 2024-11-18 07:05:52 +03:00 committed by GitHub
parent 70cd5c1599
commit 4b3eeaa6ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 39 additions and 3 deletions

View File

@ -238,7 +238,7 @@ def add_llm():
base_url=llm["api_base"]
)
try:
arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!"])
arr, tc = mdl.similarity("Hello~ Ragflower!", ["Hi, there!", "Ohh, my friend!"])
if len(arr) == 0 or tc == 0:
raise Exception("Not known.")
except Exception as e:

View File

@ -110,6 +110,7 @@ ChatModel = {
}
RerankModel = {
"LocalAI":LocalAIRerank,
"BAAI": DefaultRerank,
"Jina": JinaRerank,
"Youdao": YoudaoRerank,

View File

@ -185,11 +185,46 @@ class XInferenceRerank(Base):
class LocalAIRerank(Base):
def __init__(self, key, model_name, base_url):
pass
if base_url.find("/rerank") == -1:
self.base_url = urljoin(base_url, "/rerank")
else:
self.base_url = base_url
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {key}"
}
self.model_name = model_name.replace("___LocalAI","")
def similarity(self, query: str, texts: list):
raise NotImplementedError("The LocalAIRerank has not been implement")
# noway to config Ragflow , use fix setting
texts = [truncate(t, 500) for t in texts]
data = {
"model": self.model_name,
"query": query,
"documents": texts,
"top_n": len(texts),
}
token_count = 0
for t in texts:
token_count += num_tokens_from_string(t)
res = requests.post(self.base_url, headers=self.headers, json=data).json()
rank = np.zeros(len(texts), dtype=float)
if 'results' not in res:
raise ValueError("response not contains results\n" + str(res))
for d in res["results"]:
rank[d["index"]] = d["relevance_score"]
# Normalize the rank values to the range 0 to 1
min_rank = np.min(rank)
max_rank = np.max(rank)
# Avoid division by zero if all ranks are identical
if max_rank - min_rank != 0:
rank = (rank - min_rank) / (max_rank - min_rank)
else:
rank = np.zeros_like(rank)
return rank, token_count
class NvidiaRerank(Base):
def __init__(