fix: synonym bug (#3423)

### What problem does this PR solve?


### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu 2024-11-15 10:14:51 +08:00 committed by GitHub
parent 6878d23a57
commit 220aaddc62
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 6 additions and 4 deletions

View File

@ -104,6 +104,7 @@ class Generate(ComponentBase):
retrieval_res = [] retrieval_res = []
self._param.inputs = [] self._param.inputs = []
for para in self._param.parameters: for para in self._param.parameters:
if not para.get("component_id"): continue
if para["component_id"].split("@")[0].lower().find("begin") > 0: if para["component_id"].split("@")[0].lower().find("begin") > 0:
cpn_id, key = para["component_id"].split("@") cpn_id, key = para["component_id"].split("@")
for p in self._canvas.get_component(cpn_id)["obj"]._param.query: for p in self._canvas.get_component(cpn_id)["obj"]._param.query:

View File

@ -27,6 +27,7 @@ from api.settings import retrievaler, docStoreConn
from api.utils import get_uuid from api.utils import get_uuid
from rag.nlp import tokenize, search from rag.nlp import tokenize, search
from ranx import evaluate from ranx import evaluate
from ranx import Qrels, Run
import pandas as pd import pandas as pd
from tqdm import tqdm from tqdm import tqdm
@ -247,14 +248,14 @@ class Benchmark:
self.index_name = search.index_name(self.tenant_id) self.index_name = search.index_name(self.tenant_id)
qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1") qrels, texts = self.ms_marco_index(file_path, "benchmark_ms_marco_v1.1")
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"])) print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)
if dataset == "trivia_qa": if dataset == "trivia_qa":
self.tenant_id = "benchmark_trivia_qa" self.tenant_id = "benchmark_trivia_qa"
self.index_name = search.index_name(self.tenant_id) self.index_name = search.index_name(self.tenant_id)
qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa") qrels, texts = self.trivia_qa_index(file_path, "benchmark_trivia_qa")
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"])) print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)
if dataset == "miracl": if dataset == "miracl":
for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th', for lang in ['ar', 'bn', 'de', 'en', 'es', 'fa', 'fi', 'fr', 'hi', 'id', 'ja', 'ko', 'ru', 'sw', 'te', 'th',
@ -278,7 +279,7 @@ class Benchmark:
os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang), os.path.join(miracl_corpus, 'miracl-corpus-v1.0-' + lang),
"benchmark_miracl_" + lang) "benchmark_miracl_" + lang)
run = self._get_retrieval(qrels) run = self._get_retrieval(qrels)
print(dataset, evaluate(qrels, run, ["ndcg@10", "map@5", "mrr"])) print(dataset, evaluate(Qrels(qrels), Run(run), ["ndcg@10", "map@5", "mrr@10"]))
self.save_results(qrels, run, texts, dataset, file_path) self.save_results(qrels, run, texts, dataset, file_path)

View File

@ -88,7 +88,7 @@ class FulltextQueryer:
syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn] syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn]
syns.append(" ".join(syn)) syns.append(" ".join(syn))
q = ["({}^{:.4f}".format(tk, w) + " %s)".format() for (tk, w), syn in zip(tks_w, syns)] q = ["({}^{:.4f}".format(tk, w) + " {})".format(syn) for (tk, w), syn in zip(tks_w, syns)]
for i in range(1, len(tks_w)): for i in range(1, len(tks_w)):
q.append( q.append(
'"%s %s"^%.4f' '"%s %s"^%.4f'