mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
Cut down the attempt times of ES (#3550)
### What problem does this PR solve? #3541 ### Type of change - [x] Refactoring - [x] Performance Improvement
This commit is contained in:
parent
58a2200b80
commit
0ac6dc8f8c
@ -16,13 +16,15 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr,
|
|||||||
FusionExpr
|
FusionExpr
|
||||||
from rag.nlp import is_english, rag_tokenizer
|
from rag.nlp import is_english, rag_tokenizer
|
||||||
|
|
||||||
|
ATTEMPT_TIME = 2
|
||||||
|
|
||||||
|
|
||||||
@singleton
|
@singleton
|
||||||
class ESConnection(DocStoreConnection):
|
class ESConnection(DocStoreConnection):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.info = {}
|
self.info = {}
|
||||||
logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.")
|
logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.")
|
||||||
for _ in range(24):
|
for _ in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
self.es = Elasticsearch(
|
self.es = Elasticsearch(
|
||||||
settings.ES["hosts"].split(","),
|
settings.ES["hosts"].split(","),
|
||||||
@ -92,7 +94,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
|
|
||||||
def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
|
def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
|
||||||
s = Index(indexName, self.es)
|
s = Index(indexName, self.es)
|
||||||
for i in range(3):
|
for i in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
return s.exists()
|
return s.exists()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@ -144,9 +146,9 @@ class ESConnection(DocStoreConnection):
|
|||||||
if "minimum_should_match" in m.extra_options:
|
if "minimum_should_match" in m.extra_options:
|
||||||
minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%"
|
minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%"
|
||||||
bqry.must.append(Q("query_string", fields=m.fields,
|
bqry.must.append(Q("query_string", fields=m.fields,
|
||||||
type="best_fields", query=m.matching_text,
|
type="best_fields", query=m.matching_text,
|
||||||
minimum_should_match=minimum_should_match,
|
minimum_should_match=minimum_should_match,
|
||||||
boost=1))
|
boost=1))
|
||||||
bqry.boost = 1.0 - vector_similarity_weight
|
bqry.boost = 1.0 - vector_similarity_weight
|
||||||
|
|
||||||
elif isinstance(m, MatchDenseExpr):
|
elif isinstance(m, MatchDenseExpr):
|
||||||
@ -180,7 +182,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
q = s.to_dict()
|
q = s.to_dict()
|
||||||
logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q))
|
logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q))
|
||||||
|
|
||||||
for i in range(3):
|
for i in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
res = self.es.search(index=indexNames,
|
res = self.es.search(index=indexNames,
|
||||||
body=q,
|
body=q,
|
||||||
@ -201,7 +203,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
raise Exception("ESConnection.search timeout.")
|
raise Exception("ESConnection.search timeout.")
|
||||||
|
|
||||||
def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
|
def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
|
||||||
for i in range(3):
|
for i in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
res = self.es.get(index=(indexName),
|
res = self.es.get(index=(indexName),
|
||||||
id=chunkId, source=True, )
|
id=chunkId, source=True, )
|
||||||
@ -233,7 +235,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
operations.append(d_copy)
|
operations.append(d_copy)
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
for _ in range(100):
|
for _ in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
r = self.es.bulk(index=(indexName), operations=operations,
|
r = self.es.bulk(index=(indexName), operations=operations,
|
||||||
refresh=False, timeout="600s")
|
refresh=False, timeout="600s")
|
||||||
@ -258,7 +260,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
if "id" in condition and isinstance(condition["id"], str):
|
if "id" in condition and isinstance(condition["id"], str):
|
||||||
# update specific single document
|
# update specific single document
|
||||||
chunkId = condition["id"]
|
chunkId = condition["id"]
|
||||||
for i in range(3):
|
for i in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
self.es.update(index=indexName, id=chunkId, doc=doc)
|
self.es.update(index=indexName, id=chunkId, doc=doc)
|
||||||
return True
|
return True
|
||||||
@ -326,7 +328,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
else:
|
else:
|
||||||
raise Exception("Condition value must be int, str or list.")
|
raise Exception("Condition value must be int, str or list.")
|
||||||
logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict()))
|
logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict()))
|
||||||
for _ in range(10):
|
for _ in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
res = self.es.delete_by_query(
|
res = self.es.delete_by_query(
|
||||||
index=indexName,
|
index=indexName,
|
||||||
@ -437,7 +439,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
sql = sql.replace(p, r, 1)
|
sql = sql.replace(p, r, 1)
|
||||||
logging.debug(f"ESConnection.sql to es: {sql}")
|
logging.debug(f"ESConnection.sql to es: {sql}")
|
||||||
|
|
||||||
for i in range(3):
|
for i in range(ATTEMPT_TIME):
|
||||||
try:
|
try:
|
||||||
res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format,
|
res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format,
|
||||||
request_timeout="2s")
|
request_timeout="2s")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user