Fix logs. Use dict.pop instead of del. Close #3473 (#3484)

### What problem does this PR solve?

Fix logs. Use dict.pop instead of del.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Zhichang Yu 2024-11-19 14:15:25 +08:00 committed by GitHub
parent d0f94a42ff
commit dec9b3e540
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 93 additions and 79 deletions

View File

@ -839,8 +839,7 @@ def retrieval():
similarity_threshold, vector_similarity_weight, top, similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl) doc_ids, rerank_mdl=rerank_mdl)
for c in ranks["chunks"]: for c in ranks["chunks"]:
if "vector" in c: c.pop("vector", None)
del c["vector"]
return get_json_result(data=ranks) return get_json_result(data=ranks)
except Exception as e: except Exception as e:
if str(e).find("not_found") > 0: if str(e).find("not_found") > 0:

View File

@ -287,8 +287,7 @@ def retrieval_test():
similarity_threshold, vector_similarity_weight, top, similarity_threshold, vector_similarity_weight, top,
doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight")) doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight"))
for c in ranks["chunks"]: for c in ranks["chunks"]:
if "vector" in c: c.pop("vector", None)
del c["vector"]
return get_json_result(data=ranks) return get_json_result(data=ranks)
except Exception as e: except Exception as e:

View File

@ -58,8 +58,7 @@ def retrieval(tenant_id):
) )
records = [] records = []
for c in ranks["chunks"]: for c in ranks["chunks"]:
if "vector" in c: c.pop("vector", None)
del c["vector"]
records.append({ records.append({
"content": c["content_ltks"], "content": c["content_ltks"],
"score": c["similarity"], "score": c["similarity"],

View File

@ -37,7 +37,6 @@ from api.db.services.document_service import DocumentService
from api.db.services.file2document_service import File2DocumentService from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService from api.db.services.file_service import FileService
from api.db.services.knowledgebase_service import KnowledgebaseService from api.db.services.knowledgebase_service import KnowledgebaseService
from api import settings
from api.utils.api_utils import construct_json_result, get_parser_config from api.utils.api_utils import construct_json_result, get_parser_config
from rag.nlp import search from rag.nlp import search
from rag.utils import rmSpace from rag.utils import rmSpace
@ -1342,8 +1341,7 @@ def retrieval_test(tenant_id):
highlight=highlight, highlight=highlight,
) )
for c in ranks["chunks"]: for c in ranks["chunks"]:
if "vector" in c: c.pop("vector", None)
del c["vector"]
##rename keys ##rename keys
renamed_chunks = [] renamed_chunks = []

View File

@ -696,8 +696,7 @@ def set_tenant_info():
""" """
req = request.json req = request.json
try: try:
tid = req["tenant_id"] tid = req.pop("tenant_id")
del req["tenant_id"]
TenantService.update_by_id(tid, req) TenantService.update_by_id(tid, req)
return get_json_result(data=True) return get_json_result(data=True)
except Exception as e: except Exception as e:

View File

@ -500,7 +500,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue()) STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue())
d["img_id"] = "{}-{}".format(kb.id, d["id"]) d["img_id"] = "{}-{}".format(kb.id, d["id"])
del d["image"] d.pop("image", None)
docs.append(d) docs.append(d)
parser_ids = {d["id"]: d["parser_id"] for d, _ in files} parser_ids = {d["id"]: d["parser_id"] for d, _ in files}

View File

@ -49,5 +49,6 @@ def initRootLogger(logfile_basename: str, log_level: int = logging.INFO, log_for
handler2.setFormatter(formatter) handler2.setFormatter(formatter)
logger.addHandler(handler2) logger.addHandler(handler2)
logging.captureWarnings(True)
msg = f"{logfile_basename} log path: {log_path}" msg = f"{logfile_basename} log path: {log_path}"
logger.info(msg) logger.info(msg)

View File

@ -9,7 +9,7 @@ import logging
import numbers import numbers
import re import re
import traceback import traceback
from typing import Any, Callable from typing import Any, Callable, Mapping
from dataclasses import dataclass from dataclasses import dataclass
import tiktoken import tiktoken
from graphrag.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT from graphrag.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT

View File

@ -59,8 +59,7 @@ class Benchmark:
del qrels[query] del qrels[query]
continue continue
for c in ranks["chunks"]: for c in ranks["chunks"]:
if "vector" in c: c.pop("vector", None)
del c["vector"]
run[query][c["chunk_id"]] = c["similarity"] run[query][c["chunk_id"]] = c["similarity"]
return run return run

View File

@ -106,8 +106,7 @@ class Dealer:
# If result is empty, try again with lower min_match # If result is empty, try again with lower min_match
if total == 0: if total == 0:
matchText, _ = self.qryr.question(qst, min_match=0.1) matchText, _ = self.qryr.question(qst, min_match=0.1)
if "doc_ids" in filters: filters.pop("doc_ids", None)
del filters["doc_ids"]
matchDense.extra_options["similarity"] = 0.17 matchDense.extra_options["similarity"] = 0.17
res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idx_names, kb_ids) res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idx_names, kb_ids)
total=self.dataStore.getTotal(res) total=self.dataStore.getTotal(res)

View File

@ -5,7 +5,7 @@ import time
import os import os
import copy import copy
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch_dsl import UpdateByQuery, Q, Search, Index from elasticsearch_dsl import UpdateByQuery, Q, Search, Index
from elastic_transport import ConnectionTimeout from elastic_transport import ConnectionTimeout
from rag import settings from rag import settings
@ -82,7 +82,9 @@ class ESConnection(DocStoreConnection):
def deleteIdx(self, indexName: str, knowledgebaseId: str): def deleteIdx(self, indexName: str, knowledgebaseId: str):
try: try:
return self.es.indices.delete(indexName, allow_no_indices=True) self.es.indices.delete(index=indexName, allow_no_indices=True)
except NotFoundError:
pass
except Exception: except Exception:
logging.exception("ES delete index error %s" % (indexName)) logging.exception("ES delete index error %s" % (indexName))
@ -146,6 +148,7 @@ class ESConnection(DocStoreConnection):
similarity=similarity, similarity=similarity,
) )
condition["kb_id"] = knowledgebaseIds
if condition: if condition:
if not bqry: if not bqry:
bqry = Q("bool", must=[]) bqry = Q("bool", must=[])
@ -226,8 +229,7 @@ class ESConnection(DocStoreConnection):
assert "_id" not in d assert "_id" not in d
assert "id" in d assert "id" in d
d_copy = copy.deepcopy(d) d_copy = copy.deepcopy(d)
meta_id = d_copy["id"] meta_id = d_copy.pop("id", "")
del d_copy["id"]
operations.append( operations.append(
{"index": {"_index": indexName, "_id": meta_id}}) {"index": {"_index": indexName, "_id": meta_id}})
operations.append(d_copy) operations.append(d_copy)
@ -254,7 +256,7 @@ class ESConnection(DocStoreConnection):
def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool: def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool:
doc = copy.deepcopy(newValue) doc = copy.deepcopy(newValue)
del doc['id'] doc.pop("id", None)
if "id" in condition and isinstance(condition["id"], str): if "id" in condition and isinstance(condition["id"], str):
# update specific single document # update specific single document
chunkId = condition["id"] chunkId = condition["id"]

View File

@ -78,10 +78,13 @@ class RAGFlowMinio(object):
def obj_exist(self, bucket, fnm): def obj_exist(self, bucket, fnm):
try: try:
if self.conn.stat_object(bucket, fnm):return True if not self.conn.bucket_exists(bucket):
return False
if self.conn.stat_object(bucket, fnm):
return True
return False return False
except Exception: except Exception:
logging.exception(f"Fail put {bucket}/{fnm}:") logging.exception(f"RAGFlowMinio.obj_exist {bucket}/{fnm} got exception:")
return False return False

View File

@ -12,7 +12,7 @@ class Payload:
self.__queue_name = queue_name self.__queue_name = queue_name
self.__group_name = group_name self.__group_name = group_name
self.__msg_id = msg_id self.__msg_id = msg_id
self.__message = json.loads(message['message']) self.__message = json.loads(message["message"])
def ack(self): def ack(self):
try: try:
@ -35,19 +35,20 @@ class RedisDB:
def __open__(self): def __open__(self):
try: try:
self.REDIS = redis.StrictRedis(host=self.config["host"].split(":")[0], self.REDIS = redis.StrictRedis(
host=self.config["host"].split(":")[0],
port=int(self.config.get("host", ":6379").split(":")[1]), port=int(self.config.get("host", ":6379").split(":")[1]),
db=int(self.config.get("db", 1)), db=int(self.config.get("db", 1)),
password=self.config.get("password"), password=self.config.get("password"),
decode_responses=True) decode_responses=True,
)
except Exception: except Exception:
logging.warning("Redis can't be connected.") logging.warning("Redis can't be connected.")
return self.REDIS return self.REDIS
def health(self): def health(self):
self.REDIS.ping() self.REDIS.ping()
a, b = 'xx', 'yy' a, b = "xx", "yy"
self.REDIS.set(a, b, 3) self.REDIS.set(a, b, 3)
if self.REDIS.get(a) == b: if self.REDIS.get(a) == b:
@ -57,19 +58,21 @@ class RedisDB:
return self.REDIS is not None return self.REDIS is not None
def exist(self, k): def exist(self, k):
if not self.REDIS: return if not self.REDIS:
return
try: try:
return self.REDIS.exists(k) return self.REDIS.exists(k)
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]exist" + str(k) + "||" + str(e)) logging.warning("RedisDB.exist " + str(k) + " got exception: " + str(e))
self.__open__() self.__open__()
def get(self, k): def get(self, k):
if not self.REDIS: return if not self.REDIS:
return
try: try:
return self.REDIS.get(k) return self.REDIS.get(k)
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]get" + str(k) + "||" + str(e)) logging.warning("RedisDB.get " + str(k) + " got exception: " + str(e))
self.__open__() self.__open__()
def set_obj(self, k, obj, exp=3600): def set_obj(self, k, obj, exp=3600):
@ -77,7 +80,7 @@ class RedisDB:
self.REDIS.set(k, json.dumps(obj, ensure_ascii=False), exp) self.REDIS.set(k, json.dumps(obj, ensure_ascii=False), exp)
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]set_obj" + str(k) + "||" + str(e)) logging.warning("RedisDB.set_obj " + str(k) + " got exception: " + str(e))
self.__open__() self.__open__()
return False return False
@ -86,7 +89,7 @@ class RedisDB:
self.REDIS.set(k, v, exp) self.REDIS.set(k, v, exp)
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]set" + str(k) + "||" + str(e)) logging.warning("RedisDB.set " + str(k) + " got exception: " + str(e))
self.__open__() self.__open__()
return False return False
@ -95,7 +98,7 @@ class RedisDB:
self.REDIS.sadd(key, member) self.REDIS.sadd(key, member)
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]sadd" + str(key) + "||" + str(e)) logging.warning("RedisDB.sadd " + str(key) + " got exception: " + str(e))
self.__open__() self.__open__()
return False return False
@ -104,7 +107,7 @@ class RedisDB:
self.REDIS.srem(key, member) self.REDIS.srem(key, member)
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]srem" + str(key) + "||" + str(e)) logging.warning("RedisDB.srem " + str(key) + " got exception: " + str(e))
self.__open__() self.__open__()
return False return False
@ -113,7 +116,9 @@ class RedisDB:
res = self.REDIS.smembers(key) res = self.REDIS.smembers(key)
return res return res
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]smembers" + str(key) + "||" + str(e)) logging.warning(
"RedisDB.smembers " + str(key) + " got exception: " + str(e)
)
self.__open__() self.__open__()
return None return None
@ -122,7 +127,7 @@ class RedisDB:
self.REDIS.zadd(key, {member: score}) self.REDIS.zadd(key, {member: score})
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]zadd" + str(key) + "||" + str(e)) logging.warning("RedisDB.zadd " + str(key) + " got exception: " + str(e))
self.__open__() self.__open__()
return False return False
@ -131,7 +136,7 @@ class RedisDB:
res = self.REDIS.zcount(key, min, max) res = self.REDIS.zcount(key, min, max)
return res return res
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]spopmin" + str(key) + "||" + str(e)) logging.warning("RedisDB.zcount " + str(key) + " got exception: " + str(e))
self.__open__() self.__open__()
return 0 return 0
@ -140,7 +145,7 @@ class RedisDB:
res = self.REDIS.zpopmin(key, count) res = self.REDIS.zpopmin(key, count)
return res return res
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]spopmin" + str(key) + "||" + str(e)) logging.warning("RedisDB.zpopmin " + str(key) + " got exception: " + str(e))
self.__open__() self.__open__()
return None return None
@ -149,7 +154,9 @@ class RedisDB:
res = self.REDIS.zrangebyscore(key, min, max) res = self.REDIS.zrangebyscore(key, min, max)
return res return res
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]srangebyscore" + str(key) + "||" + str(e)) logging.warning(
"RedisDB.zrangebyscore " + str(key) + " got exception: " + str(e)
)
self.__open__() self.__open__()
return None return None
@ -160,7 +167,9 @@ class RedisDB:
pipeline.execute() pipeline.execute()
return True return True
except Exception as e: except Exception as e:
logging.warning("[EXCEPTION]set" + str(key) + "||" + str(e)) logging.warning(
"RedisDB.transaction " + str(key) + " got exception: " + str(e)
)
self.__open__() self.__open__()
return False return False
@ -170,23 +179,22 @@ class RedisDB:
payload = {"message": json.dumps(message)} payload = {"message": json.dumps(message)}
pipeline = self.REDIS.pipeline() pipeline = self.REDIS.pipeline()
pipeline.xadd(queue, payload) pipeline.xadd(queue, payload)
#pipeline.expire(queue, exp) # pipeline.expire(queue, exp)
pipeline.execute() pipeline.execute()
return True return True
except Exception: except Exception as e:
logging.exception("producer" + str(queue) + " got exception") logging.exception(
"RedisDB.queue_product " + str(queue) + " got exception: " + str(e)
)
return False return False
def queue_consumer(self, queue_name, group_name, consumer_name, msg_id=b">") -> Payload: def queue_consumer(
self, queue_name, group_name, consumer_name, msg_id=b">"
) -> Payload:
try: try:
group_info = self.REDIS.xinfo_groups(queue_name) group_info = self.REDIS.xinfo_groups(queue_name)
if not any(e["name"] == group_name for e in group_info): if not any(e["name"] == group_name for e in group_info):
self.REDIS.xgroup_create( self.REDIS.xgroup_create(queue_name, group_name, id="0", mkstream=True)
queue_name,
group_name,
id="0",
mkstream=True
)
args = { args = {
"groupname": group_name, "groupname": group_name,
"consumername": consumer_name, "consumername": consumer_name,
@ -202,10 +210,15 @@ class RedisDB:
res = Payload(self.REDIS, queue_name, group_name, msg_id, payload) res = Payload(self.REDIS, queue_name, group_name, msg_id, payload)
return res return res
except Exception as e: except Exception as e:
if 'key' in str(e): if "key" in str(e):
pass pass
else: else:
logging.exception("consumer: " + str(queue_name) + " got exception") logging.exception(
"RedisDB.queue_consumer "
+ str(queue_name)
+ " got exception: "
+ str(e)
)
return None return None
def get_unacked_for(self, consumer_name, queue_name, group_name): def get_unacked_for(self, consumer_name, queue_name, group_name):
@ -213,36 +226,39 @@ class RedisDB:
group_info = self.REDIS.xinfo_groups(queue_name) group_info = self.REDIS.xinfo_groups(queue_name)
if not any(e["name"] == group_name for e in group_info): if not any(e["name"] == group_name for e in group_info):
return return
pendings = self.REDIS.xpending_range(queue_name, group_name, min=0, max=10000000000000, count=1, consumername=consumer_name) pendings = self.REDIS.xpending_range(
if not pendings: return queue_name,
group_name,
min=0,
max=10000000000000,
count=1,
consumername=consumer_name,
)
if not pendings:
return
msg_id = pendings[0]["message_id"] msg_id = pendings[0]["message_id"]
msg = self.REDIS.xrange(queue_name, min=msg_id, count=1) msg = self.REDIS.xrange(queue_name, min=msg_id, count=1)
_, payload = msg[0] _, payload = msg[0]
return Payload(self.REDIS, queue_name, group_name, msg_id, payload) return Payload(self.REDIS, queue_name, group_name, msg_id, payload)
except Exception as e: except Exception as e:
if 'key' in str(e): if "key" in str(e):
return return
logging.exception("xpending_range: " + consumer_name + " got exception") logging.exception(
"RedisDB.get_unacked_for " + consumer_name + " got exception: " + str(e)
)
self.__open__() self.__open__()
def queue_info(self, queue, group_name) -> dict: def queue_info(self, queue, group_name) -> dict | None:
for _ in range(3):
try: try:
groups = self.REDIS.xinfo_groups(queue) groups = self.REDIS.xinfo_groups(queue)
for group in groups: for group in groups:
if group["name"] == group_name: if group["name"] == group_name:
return group return group
except Exception: except Exception as e:
logging.exception("queue_length" + str(queue) + " got exception") logging.warning(
"RedisDB.queue_info " + str(queue) + " got exception: " + str(e)
)
return None return None
def queue_head(self, queue) -> int:
for _ in range(3):
try:
ent = self.REDIS.xrange(queue, count=1)
return ent[0]
except Exception:
logging.exception("queue_head" + str(queue) + " got exception")
return 0
REDIS_CONN = RedisDB() REDIS_CONN = RedisDB()