mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 21:25:53 +08:00
Make infinity adapt to condition exist
. (#4657)
### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
b4303f6010
commit
c354239b79
@ -24,6 +24,7 @@ from api.db.services.document_service import DocumentService
|
|||||||
from api.db.services.file2document_service import File2DocumentService
|
from api.db.services.file2document_service import File2DocumentService
|
||||||
from api.db.services.file_service import FileService
|
from api.db.services.file_service import FileService
|
||||||
from api.db.services.user_service import TenantService, UserTenantService
|
from api.db.services.user_service import TenantService, UserTenantService
|
||||||
|
from api.settings import DOC_ENGINE
|
||||||
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request, not_allowed_parameters
|
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request, not_allowed_parameters
|
||||||
from api.utils import get_uuid
|
from api.utils import get_uuid
|
||||||
from api.db import StatusEnum, FileSource
|
from api.db import StatusEnum, FileSource
|
||||||
@ -96,6 +97,13 @@ def update():
|
|||||||
return get_data_error_result(
|
return get_data_error_result(
|
||||||
message="Can't find this knowledgebase!")
|
message="Can't find this knowledgebase!")
|
||||||
|
|
||||||
|
if req.get("parser_id", "") == "tag" and DOC_ENGINE == "infinity":
|
||||||
|
return get_json_result(
|
||||||
|
data=False,
|
||||||
|
message='The chunk method Tag has not been supported by Infinity yet.',
|
||||||
|
code=settings.RetCode.OPERATING_ERROR
|
||||||
|
)
|
||||||
|
|
||||||
if req["name"].lower() != kb.name.lower() \
|
if req["name"].lower() != kb.name.lower() \
|
||||||
and len(
|
and len(
|
||||||
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
|
KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
|
||||||
@ -112,7 +120,7 @@ def update():
|
|||||||
search.index_name(kb.tenant_id), kb.id)
|
search.index_name(kb.tenant_id), kb.id)
|
||||||
else:
|
else:
|
||||||
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
# Elasticsearch requires PAGERANK_FLD be non-zero!
|
||||||
settings.docStoreConn.update({"exist": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
|
settings.docStoreConn.update({"exists": PAGERANK_FLD}, {"remove": PAGERANK_FLD},
|
||||||
search.index_name(kb.tenant_id), kb.id)
|
search.index_name(kb.tenant_id), kb.id)
|
||||||
|
|
||||||
e, kb = KnowledgebaseService.get_by_id(kb.id)
|
e, kb = KnowledgebaseService.get_by_id(kb.id)
|
||||||
|
@ -71,7 +71,7 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
start, end = 0, len(chunks)
|
start, end = 0, len(chunks)
|
||||||
if len(chunks) <= 1:
|
if len(chunks) <= 1:
|
||||||
return
|
return
|
||||||
chunks = [(s, a) for s, a in chunks if len(a) > 0]
|
chunks = [(s, a) for s, a in chunks if s and len(a) > 0]
|
||||||
|
|
||||||
def summarize(ck_idx, lock):
|
def summarize(ck_idx, lock):
|
||||||
nonlocal chunks
|
nonlocal chunks
|
||||||
@ -125,6 +125,8 @@ class RecursiveAbstractiveProcessing4TreeOrganizedRetrieval:
|
|||||||
threads = []
|
threads = []
|
||||||
for c in range(n_clusters):
|
for c in range(n_clusters):
|
||||||
ck_idx = [i + start for i in range(len(lbls)) if lbls[i] == c]
|
ck_idx = [i + start for i in range(len(lbls)) if lbls[i] == c]
|
||||||
|
if not ck_idx:
|
||||||
|
continue
|
||||||
threads.append(executor.submit(summarize, ck_idx, lock))
|
threads.append(executor.submit(summarize, ck_idx, lock))
|
||||||
wait(threads, return_when=ALL_COMPLETED)
|
wait(threads, return_when=ALL_COMPLETED)
|
||||||
for th in threads:
|
for th in threads:
|
||||||
|
@ -336,7 +336,7 @@ class ESConnection(DocStoreConnection):
|
|||||||
for k, v in condition.items():
|
for k, v in condition.items():
|
||||||
if not isinstance(k, str) or not v:
|
if not isinstance(k, str) or not v:
|
||||||
continue
|
continue
|
||||||
if k == "exist":
|
if k == "exists":
|
||||||
bqry.filter.append(Q("exists", field=v))
|
bqry.filter.append(Q("exists", field=v))
|
||||||
continue
|
continue
|
||||||
if isinstance(v, list):
|
if isinstance(v, list):
|
||||||
|
@ -44,8 +44,23 @@ from rag.utils.doc_store_conn import (
|
|||||||
logger = logging.getLogger('ragflow.infinity_conn')
|
logger = logging.getLogger('ragflow.infinity_conn')
|
||||||
|
|
||||||
|
|
||||||
def equivalent_condition_to_str(condition: dict) -> str | None:
|
def equivalent_condition_to_str(condition: dict, table_instance=None) -> str | None:
|
||||||
assert "_id" not in condition
|
assert "_id" not in condition
|
||||||
|
clmns = {}
|
||||||
|
if table_instance:
|
||||||
|
for n, ty, de, _ in table_instance.show_columns().rows():
|
||||||
|
clmns[n] = (ty, de)
|
||||||
|
|
||||||
|
def exists(cln):
|
||||||
|
nonlocal clmns
|
||||||
|
assert cln in clmns, f"'{cln}' should be in '{clmns}'."
|
||||||
|
ty, de = clmns[cln]
|
||||||
|
if ty.lower().find("cha"):
|
||||||
|
if not de:
|
||||||
|
de = ""
|
||||||
|
return f" {cln}!='{de}' "
|
||||||
|
return f"{cln}!={de}"
|
||||||
|
|
||||||
cond = list()
|
cond = list()
|
||||||
for k, v in condition.items():
|
for k, v in condition.items():
|
||||||
if not isinstance(k, str) or k in ["kb_id"] or not v:
|
if not isinstance(k, str) or k in ["kb_id"] or not v:
|
||||||
@ -61,8 +76,15 @@ def equivalent_condition_to_str(condition: dict) -> str | None:
|
|||||||
strInCond = ", ".join(inCond)
|
strInCond = ", ".join(inCond)
|
||||||
strInCond = f"{k} IN ({strInCond})"
|
strInCond = f"{k} IN ({strInCond})"
|
||||||
cond.append(strInCond)
|
cond.append(strInCond)
|
||||||
|
elif k == "must_not":
|
||||||
|
if isinstance(v, dict):
|
||||||
|
for kk, vv in v.items():
|
||||||
|
if kk == "exists":
|
||||||
|
cond.append("NOT (%s)" % exists(vv))
|
||||||
elif isinstance(v, str):
|
elif isinstance(v, str):
|
||||||
cond.append(f"{k}='{v}'")
|
cond.append(f"{k}='{v}'")
|
||||||
|
elif k == "exists":
|
||||||
|
cond.append(exists(v))
|
||||||
else:
|
else:
|
||||||
cond.append(f"{k}={str(v)}")
|
cond.append(f"{k}={str(v)}")
|
||||||
return " AND ".join(cond) if cond else "1=1"
|
return " AND ".join(cond) if cond else "1=1"
|
||||||
@ -294,7 +316,11 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
filter_cond = None
|
filter_cond = None
|
||||||
filter_fulltext = ""
|
filter_fulltext = ""
|
||||||
if condition:
|
if condition:
|
||||||
filter_cond = equivalent_condition_to_str(condition)
|
for indexName in indexNames:
|
||||||
|
table_name = f"{indexName}_{knowledgebaseIds[0]}"
|
||||||
|
filter_cond = equivalent_condition_to_str(condition, db_instance.get_table(table_name))
|
||||||
|
break
|
||||||
|
|
||||||
for matchExpr in matchExprs:
|
for matchExpr in matchExprs:
|
||||||
if isinstance(matchExpr, MatchTextExpr):
|
if isinstance(matchExpr, MatchTextExpr):
|
||||||
if filter_cond and "filter" not in matchExpr.extra_options:
|
if filter_cond and "filter" not in matchExpr.extra_options:
|
||||||
@ -434,12 +460,21 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
self.createIdx(indexName, knowledgebaseId, vector_size)
|
self.createIdx(indexName, knowledgebaseId, vector_size)
|
||||||
table_instance = db_instance.get_table(table_name)
|
table_instance = db_instance.get_table(table_name)
|
||||||
|
|
||||||
|
# embedding fields can't have a default value....
|
||||||
|
embedding_clmns = []
|
||||||
|
clmns = table_instance.show_columns().rows()
|
||||||
|
for n, ty, _, _ in clmns:
|
||||||
|
r = re.search(r"Embedding\([a-z]+,([0-9]+)\)", ty)
|
||||||
|
if not r:
|
||||||
|
continue
|
||||||
|
embedding_clmns.append((n, int(r.group(1))))
|
||||||
|
|
||||||
docs = copy.deepcopy(documents)
|
docs = copy.deepcopy(documents)
|
||||||
for d in docs:
|
for d in docs:
|
||||||
assert "_id" not in d
|
assert "_id" not in d
|
||||||
assert "id" in d
|
assert "id" in d
|
||||||
for k, v in d.items():
|
for k, v in d.items():
|
||||||
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd", "source_id"]:
|
||||||
assert isinstance(v, list)
|
assert isinstance(v, list)
|
||||||
d[k] = "###".join(v)
|
d[k] = "###".join(v)
|
||||||
elif re.search(r"_feas$", k):
|
elif re.search(r"_feas$", k):
|
||||||
@ -454,6 +489,11 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
elif k in ["page_num_int", "top_int"]:
|
elif k in ["page_num_int", "top_int"]:
|
||||||
assert isinstance(v, list)
|
assert isinstance(v, list)
|
||||||
d[k] = "_".join(f"{num:08x}" for num in v)
|
d[k] = "_".join(f"{num:08x}" for num in v)
|
||||||
|
|
||||||
|
for n, vs in embedding_clmns:
|
||||||
|
if n in d:
|
||||||
|
continue
|
||||||
|
d[n] = [0] * vs
|
||||||
ids = ["'{}'".format(d["id"]) for d in docs]
|
ids = ["'{}'".format(d["id"]) for d in docs]
|
||||||
str_ids = ", ".join(ids)
|
str_ids = ", ".join(ids)
|
||||||
str_filter = f"id IN ({str_ids})"
|
str_filter = f"id IN ({str_ids})"
|
||||||
@ -475,11 +515,11 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
db_instance = inf_conn.get_database(self.dbName)
|
db_instance = inf_conn.get_database(self.dbName)
|
||||||
table_name = f"{indexName}_{knowledgebaseId}"
|
table_name = f"{indexName}_{knowledgebaseId}"
|
||||||
table_instance = db_instance.get_table(table_name)
|
table_instance = db_instance.get_table(table_name)
|
||||||
if "exist" in condition:
|
#if "exists" in condition:
|
||||||
del condition["exist"]
|
# del condition["exists"]
|
||||||
filter = equivalent_condition_to_str(condition)
|
filter = equivalent_condition_to_str(condition, table_instance)
|
||||||
for k, v in list(newValue.items()):
|
for k, v in list(newValue.items()):
|
||||||
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
if k in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd", "source_id"]:
|
||||||
assert isinstance(v, list)
|
assert isinstance(v, list)
|
||||||
newValue[k] = "###".join(v)
|
newValue[k] = "###".join(v)
|
||||||
elif re.search(r"_feas$", k):
|
elif re.search(r"_feas$", k):
|
||||||
@ -496,9 +536,11 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
elif k in ["page_num_int", "top_int"]:
|
elif k in ["page_num_int", "top_int"]:
|
||||||
assert isinstance(v, list)
|
assert isinstance(v, list)
|
||||||
newValue[k] = "_".join(f"{num:08x}" for num in v)
|
newValue[k] = "_".join(f"{num:08x}" for num in v)
|
||||||
elif k == "remove" and v in [PAGERANK_FLD]:
|
elif k == "remove":
|
||||||
del newValue[k]
|
del newValue[k]
|
||||||
newValue[v] = 0
|
if v in [PAGERANK_FLD]:
|
||||||
|
newValue[v] = 0
|
||||||
|
|
||||||
logger.debug(f"INFINITY update table {table_name}, filter {filter}, newValue {newValue}.")
|
logger.debug(f"INFINITY update table {table_name}, filter {filter}, newValue {newValue}.")
|
||||||
table_instance.update(filter, newValue)
|
table_instance.update(filter, newValue)
|
||||||
self.connPool.release_conn(inf_conn)
|
self.connPool.release_conn(inf_conn)
|
||||||
@ -508,14 +550,14 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
inf_conn = self.connPool.get_conn()
|
inf_conn = self.connPool.get_conn()
|
||||||
db_instance = inf_conn.get_database(self.dbName)
|
db_instance = inf_conn.get_database(self.dbName)
|
||||||
table_name = f"{indexName}_{knowledgebaseId}"
|
table_name = f"{indexName}_{knowledgebaseId}"
|
||||||
filter = equivalent_condition_to_str(condition)
|
|
||||||
try:
|
try:
|
||||||
table_instance = db_instance.get_table(table_name)
|
table_instance = db_instance.get_table(table_name)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Skipped deleting `{filter}` from table {table_name} since the table doesn't exist."
|
f"Skipped deleting from table {table_name} since the table doesn't exist."
|
||||||
)
|
)
|
||||||
return 0
|
return 0
|
||||||
|
filter = equivalent_condition_to_str(condition, table_instance)
|
||||||
logger.debug(f"INFINITY delete table {table_name}, filter {filter}.")
|
logger.debug(f"INFINITY delete table {table_name}, filter {filter}.")
|
||||||
res = table_instance.delete(filter)
|
res = table_instance.delete(filter)
|
||||||
self.connPool.release_conn(inf_conn)
|
self.connPool.release_conn(inf_conn)
|
||||||
@ -553,7 +595,7 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
v = res[fieldnm][i]
|
v = res[fieldnm][i]
|
||||||
if isinstance(v, Series):
|
if isinstance(v, Series):
|
||||||
v = list(v)
|
v = list(v)
|
||||||
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd"]:
|
elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd", "tag_kwd", "source_id"]:
|
||||||
assert isinstance(v, str)
|
assert isinstance(v, str)
|
||||||
v = [kwd for kwd in v.split("###") if kwd]
|
v = [kwd for kwd in v.split("###") if kwd]
|
||||||
elif fieldnm == "position_int":
|
elif fieldnm == "position_int":
|
||||||
@ -584,6 +626,8 @@ class InfinityConnection(DocStoreConnection):
|
|||||||
ans = {}
|
ans = {}
|
||||||
num_rows = len(res)
|
num_rows = len(res)
|
||||||
column_id = res["id"]
|
column_id = res["id"]
|
||||||
|
if fieldnm not in res:
|
||||||
|
return {}
|
||||||
for i in range(num_rows):
|
for i in range(num_rows):
|
||||||
id = column_id[i]
|
id = column_id[i]
|
||||||
txt = res[fieldnm][i]
|
txt = res[fieldnm][i]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user