mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-04-19 20:50:00 +08:00
Cut down the attempt times of ES (#3550)
### What problem does this PR solve? #3541 ### Type of change - [x] Refactoring - [x] Performance Improvement
This commit is contained in:
parent
58a2200b80
commit
0ac6dc8f8c
@ -16,13 +16,15 @@ from rag.utils.doc_store_conn import DocStoreConnection, MatchExpr, OrderByExpr,
|
||||
FusionExpr
|
||||
from rag.nlp import is_english, rag_tokenizer
|
||||
|
||||
ATTEMPT_TIME = 2
|
||||
|
||||
|
||||
@singleton
|
||||
class ESConnection(DocStoreConnection):
|
||||
def __init__(self):
|
||||
self.info = {}
|
||||
logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.")
|
||||
for _ in range(24):
|
||||
for _ in range(ATTEMPT_TIME):
|
||||
try:
|
||||
self.es = Elasticsearch(
|
||||
settings.ES["hosts"].split(","),
|
||||
@ -92,7 +94,7 @@ class ESConnection(DocStoreConnection):
|
||||
|
||||
def indexExist(self, indexName: str, knowledgebaseId: str) -> bool:
|
||||
s = Index(indexName, self.es)
|
||||
for i in range(3):
|
||||
for i in range(ATTEMPT_TIME):
|
||||
try:
|
||||
return s.exists()
|
||||
except Exception as e:
|
||||
@ -144,9 +146,9 @@ class ESConnection(DocStoreConnection):
|
||||
if "minimum_should_match" in m.extra_options:
|
||||
minimum_should_match = str(int(m.extra_options["minimum_should_match"] * 100)) + "%"
|
||||
bqry.must.append(Q("query_string", fields=m.fields,
|
||||
type="best_fields", query=m.matching_text,
|
||||
minimum_should_match=minimum_should_match,
|
||||
boost=1))
|
||||
type="best_fields", query=m.matching_text,
|
||||
minimum_should_match=minimum_should_match,
|
||||
boost=1))
|
||||
bqry.boost = 1.0 - vector_similarity_weight
|
||||
|
||||
elif isinstance(m, MatchDenseExpr):
|
||||
@ -180,7 +182,7 @@ class ESConnection(DocStoreConnection):
|
||||
q = s.to_dict()
|
||||
logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q))
|
||||
|
||||
for i in range(3):
|
||||
for i in range(ATTEMPT_TIME):
|
||||
try:
|
||||
res = self.es.search(index=indexNames,
|
||||
body=q,
|
||||
@ -201,7 +203,7 @@ class ESConnection(DocStoreConnection):
|
||||
raise Exception("ESConnection.search timeout.")
|
||||
|
||||
def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None:
|
||||
for i in range(3):
|
||||
for i in range(ATTEMPT_TIME):
|
||||
try:
|
||||
res = self.es.get(index=(indexName),
|
||||
id=chunkId, source=True, )
|
||||
@ -233,7 +235,7 @@ class ESConnection(DocStoreConnection):
|
||||
operations.append(d_copy)
|
||||
|
||||
res = []
|
||||
for _ in range(100):
|
||||
for _ in range(ATTEMPT_TIME):
|
||||
try:
|
||||
r = self.es.bulk(index=(indexName), operations=operations,
|
||||
refresh=False, timeout="600s")
|
||||
@ -258,7 +260,7 @@ class ESConnection(DocStoreConnection):
|
||||
if "id" in condition and isinstance(condition["id"], str):
|
||||
# update specific single document
|
||||
chunkId = condition["id"]
|
||||
for i in range(3):
|
||||
for i in range(ATTEMPT_TIME):
|
||||
try:
|
||||
self.es.update(index=indexName, id=chunkId, doc=doc)
|
||||
return True
|
||||
@ -326,7 +328,7 @@ class ESConnection(DocStoreConnection):
|
||||
else:
|
||||
raise Exception("Condition value must be int, str or list.")
|
||||
logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict()))
|
||||
for _ in range(10):
|
||||
for _ in range(ATTEMPT_TIME):
|
||||
try:
|
||||
res = self.es.delete_by_query(
|
||||
index=indexName,
|
||||
@ -437,7 +439,7 @@ class ESConnection(DocStoreConnection):
|
||||
sql = sql.replace(p, r, 1)
|
||||
logging.debug(f"ESConnection.sql to es: {sql}")
|
||||
|
||||
for i in range(3):
|
||||
for i in range(ATTEMPT_TIME):
|
||||
try:
|
||||
res = self.es.sql.query(body={"query": sql, "fetch_size": fetch_size}, format=format,
|
||||
request_timeout="2s")
|
||||
|
Loading…
x
Reference in New Issue
Block a user