From dec9b3e5400464a6dab3d9a3052f4e7e0428a321 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Tue, 19 Nov 2024 14:15:25 +0800 Subject: [PATCH] Fix logs. Use dict.pop instead of del. Close #3473 (#3484) ### What problem does this PR solve? Fix logs. Use dict.pop instead of del. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/api_app.py | 3 +- api/apps/chunk_app.py | 3 +- api/apps/sdk/dify_retrieval.py | 3 +- api/apps/sdk/doc.py | 4 +- api/apps/user_app.py | 3 +- api/db/services/document_service.py | 2 +- api/utils/log_utils.py | 1 + graphrag/graph_extractor.py | 2 +- rag/benchmark.py | 3 +- rag/nlp/search.py | 3 +- rag/utils/es_conn.py | 12 +-- rag/utils/minio_conn.py | 7 +- rag/utils/redis_conn.py | 126 ++++++++++++++++------------ 13 files changed, 93 insertions(+), 79 deletions(-) diff --git a/api/apps/api_app.py b/api/apps/api_app.py index d77a437aa..3e8520a68 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -839,8 +839,7 @@ def retrieval(): similarity_threshold, vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl) for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] + c.pop("vector", None) return get_json_result(data=ranks) except Exception as e: if str(e).find("not_found") > 0: diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 252f0fa40..572a41dfc 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -287,8 +287,7 @@ def retrieval_test(): similarity_threshold, vector_similarity_weight, top, doc_ids, rerank_mdl=rerank_mdl, highlight=req.get("highlight")) for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] + c.pop("vector", None) return get_json_result(data=ranks) except Exception as e: diff --git a/api/apps/sdk/dify_retrieval.py b/api/apps/sdk/dify_retrieval.py index 8e13f5c6d..388a9d1c8 100644 --- a/api/apps/sdk/dify_retrieval.py +++ b/api/apps/sdk/dify_retrieval.py @@ -58,8 +58,7 @@ def retrieval(tenant_id): ) records = [] for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] + c.pop("vector", None) records.append({ "content": c["content_ltks"], "score": c["similarity"], diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index faae6333c..80bd8d9cb 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -37,7 +37,6 @@ from api.db.services.document_service import DocumentService from api.db.services.file2document_service import File2DocumentService from api.db.services.file_service import FileService from api.db.services.knowledgebase_service import KnowledgebaseService -from api import settings from api.utils.api_utils import construct_json_result, get_parser_config from rag.nlp import search from rag.utils import rmSpace @@ -1342,8 +1341,7 @@ def retrieval_test(tenant_id): highlight=highlight, ) for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] + c.pop("vector", None) ##rename keys renamed_chunks = [] diff --git a/api/apps/user_app.py b/api/apps/user_app.py index e6b576860..9d10986a9 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -696,8 +696,7 @@ def set_tenant_info(): """ req = request.json try: - tid = req["tenant_id"] - del req["tenant_id"] + tid = req.pop("tenant_id") TenantService.update_by_id(tid, req) return get_json_result(data=True) except Exception as e: diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py index b5c758d57..9feb69da0 100644 --- a/api/db/services/document_service.py +++ b/api/db/services/document_service.py @@ -500,7 +500,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id): STORAGE_IMPL.put(kb.id, d["id"], output_buffer.getvalue()) d["img_id"] = "{}-{}".format(kb.id, d["id"]) - del d["image"] + d.pop("image", None) docs.append(d) parser_ids = {d["id"]: d["parser_id"] for d, _ in files} diff --git a/api/utils/log_utils.py b/api/utils/log_utils.py index b8ed722e2..56aeeae9b 100644 --- a/api/utils/log_utils.py +++ b/api/utils/log_utils.py @@ -49,5 +49,6 @@ def initRootLogger(logfile_basename: str, log_level: int = logging.INFO, log_for handler2.setFormatter(formatter) logger.addHandler(handler2) + logging.captureWarnings(True) msg = f"{logfile_basename} log path: {log_path}" logger.info(msg) \ No newline at end of file diff --git a/graphrag/graph_extractor.py b/graphrag/graph_extractor.py index 2a9132cc6..0a8345402 100644 --- a/graphrag/graph_extractor.py +++ b/graphrag/graph_extractor.py @@ -9,7 +9,7 @@ import logging import numbers import re import traceback -from typing import Any, Callable +from typing import Any, Callable, Mapping from dataclasses import dataclass import tiktoken from graphrag.graph_prompt import GRAPH_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT diff --git a/rag/benchmark.py b/rag/benchmark.py index 2688d3630..dc48bed3e 100644 --- a/rag/benchmark.py +++ b/rag/benchmark.py @@ -59,8 +59,7 @@ class Benchmark: del qrels[query] continue for c in ranks["chunks"]: - if "vector" in c: - del c["vector"] + c.pop("vector", None) run[query][c["chunk_id"]] = c["similarity"] return run diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 0aeee4ad9..92c6ccf27 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -106,8 +106,7 @@ class Dealer: # If result is empty, try again with lower min_match if total == 0: matchText, _ = self.qryr.question(qst, min_match=0.1) - if "doc_ids" in filters: - del filters["doc_ids"] + filters.pop("doc_ids", None) matchDense.extra_options["similarity"] = 0.17 res = self.dataStore.search(src, highlightFields, filters, [matchText, matchDense, fusionExpr], orderBy, offset, limit, idx_names, kb_ids) total=self.dataStore.getTotal(res) diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index d8541eddc..104b53ab1 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -5,7 +5,7 @@ import time import os import copy -from elasticsearch import Elasticsearch +from elasticsearch import Elasticsearch, NotFoundError from elasticsearch_dsl import UpdateByQuery, Q, Search, Index from elastic_transport import ConnectionTimeout from rag import settings @@ -82,7 +82,9 @@ class ESConnection(DocStoreConnection): def deleteIdx(self, indexName: str, knowledgebaseId: str): try: - return self.es.indices.delete(indexName, allow_no_indices=True) + self.es.indices.delete(index=indexName, allow_no_indices=True) + except NotFoundError: + pass except Exception: logging.exception("ES delete index error %s" % (indexName)) @@ -146,6 +148,7 @@ class ESConnection(DocStoreConnection): similarity=similarity, ) + condition["kb_id"] = knowledgebaseIds if condition: if not bqry: bqry = Q("bool", must=[]) @@ -226,8 +229,7 @@ class ESConnection(DocStoreConnection): assert "_id" not in d assert "id" in d d_copy = copy.deepcopy(d) - meta_id = d_copy["id"] - del d_copy["id"] + meta_id = d_copy.pop("id", "") operations.append( {"index": {"_index": indexName, "_id": meta_id}}) operations.append(d_copy) @@ -254,7 +256,7 @@ class ESConnection(DocStoreConnection): def update(self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str) -> bool: doc = copy.deepcopy(newValue) - del doc['id'] + doc.pop("id", None) if "id" in condition and isinstance(condition["id"], str): # update specific single document chunkId = condition["id"] diff --git a/rag/utils/minio_conn.py b/rag/utils/minio_conn.py index b9c30386b..5952482fe 100644 --- a/rag/utils/minio_conn.py +++ b/rag/utils/minio_conn.py @@ -78,10 +78,13 @@ class RAGFlowMinio(object): def obj_exist(self, bucket, fnm): try: - if self.conn.stat_object(bucket, fnm):return True + if not self.conn.bucket_exists(bucket): + return False + if self.conn.stat_object(bucket, fnm): + return True return False except Exception: - logging.exception(f"Fail put {bucket}/{fnm}:") + logging.exception(f"RAGFlowMinio.obj_exist {bucket}/{fnm} got exception:") return False diff --git a/rag/utils/redis_conn.py b/rag/utils/redis_conn.py index 7013a227d..660e51873 100644 --- a/rag/utils/redis_conn.py +++ b/rag/utils/redis_conn.py @@ -12,7 +12,7 @@ class Payload: self.__queue_name = queue_name self.__group_name = group_name self.__msg_id = msg_id - self.__message = json.loads(message['message']) + self.__message = json.loads(message["message"]) def ack(self): try: @@ -35,19 +35,20 @@ class RedisDB: def __open__(self): try: - self.REDIS = redis.StrictRedis(host=self.config["host"].split(":")[0], - port=int(self.config.get("host", ":6379").split(":")[1]), - db=int(self.config.get("db", 1)), - password=self.config.get("password"), - decode_responses=True) + self.REDIS = redis.StrictRedis( + host=self.config["host"].split(":")[0], + port=int(self.config.get("host", ":6379").split(":")[1]), + db=int(self.config.get("db", 1)), + password=self.config.get("password"), + decode_responses=True, + ) except Exception: logging.warning("Redis can't be connected.") return self.REDIS def health(self): - self.REDIS.ping() - a, b = 'xx', 'yy' + a, b = "xx", "yy" self.REDIS.set(a, b, 3) if self.REDIS.get(a) == b: @@ -57,19 +58,21 @@ class RedisDB: return self.REDIS is not None def exist(self, k): - if not self.REDIS: return + if not self.REDIS: + return try: return self.REDIS.exists(k) except Exception as e: - logging.warning("[EXCEPTION]exist" + str(k) + "||" + str(e)) + logging.warning("RedisDB.exist " + str(k) + " got exception: " + str(e)) self.__open__() def get(self, k): - if not self.REDIS: return + if not self.REDIS: + return try: return self.REDIS.get(k) except Exception as e: - logging.warning("[EXCEPTION]get" + str(k) + "||" + str(e)) + logging.warning("RedisDB.get " + str(k) + " got exception: " + str(e)) self.__open__() def set_obj(self, k, obj, exp=3600): @@ -77,7 +80,7 @@ class RedisDB: self.REDIS.set(k, json.dumps(obj, ensure_ascii=False), exp) return True except Exception as e: - logging.warning("[EXCEPTION]set_obj" + str(k) + "||" + str(e)) + logging.warning("RedisDB.set_obj " + str(k) + " got exception: " + str(e)) self.__open__() return False @@ -86,7 +89,7 @@ class RedisDB: self.REDIS.set(k, v, exp) return True except Exception as e: - logging.warning("[EXCEPTION]set" + str(k) + "||" + str(e)) + logging.warning("RedisDB.set " + str(k) + " got exception: " + str(e)) self.__open__() return False @@ -95,7 +98,7 @@ class RedisDB: self.REDIS.sadd(key, member) return True except Exception as e: - logging.warning("[EXCEPTION]sadd" + str(key) + "||" + str(e)) + logging.warning("RedisDB.sadd " + str(key) + " got exception: " + str(e)) self.__open__() return False @@ -104,7 +107,7 @@ class RedisDB: self.REDIS.srem(key, member) return True except Exception as e: - logging.warning("[EXCEPTION]srem" + str(key) + "||" + str(e)) + logging.warning("RedisDB.srem " + str(key) + " got exception: " + str(e)) self.__open__() return False @@ -113,7 +116,9 @@ class RedisDB: res = self.REDIS.smembers(key) return res except Exception as e: - logging.warning("[EXCEPTION]smembers" + str(key) + "||" + str(e)) + logging.warning( + "RedisDB.smembers " + str(key) + " got exception: " + str(e) + ) self.__open__() return None @@ -122,7 +127,7 @@ class RedisDB: self.REDIS.zadd(key, {member: score}) return True except Exception as e: - logging.warning("[EXCEPTION]zadd" + str(key) + "||" + str(e)) + logging.warning("RedisDB.zadd " + str(key) + " got exception: " + str(e)) self.__open__() return False @@ -131,7 +136,7 @@ class RedisDB: res = self.REDIS.zcount(key, min, max) return res except Exception as e: - logging.warning("[EXCEPTION]spopmin" + str(key) + "||" + str(e)) + logging.warning("RedisDB.zcount " + str(key) + " got exception: " + str(e)) self.__open__() return 0 @@ -140,7 +145,7 @@ class RedisDB: res = self.REDIS.zpopmin(key, count) return res except Exception as e: - logging.warning("[EXCEPTION]spopmin" + str(key) + "||" + str(e)) + logging.warning("RedisDB.zpopmin " + str(key) + " got exception: " + str(e)) self.__open__() return None @@ -149,7 +154,9 @@ class RedisDB: res = self.REDIS.zrangebyscore(key, min, max) return res except Exception as e: - logging.warning("[EXCEPTION]srangebyscore" + str(key) + "||" + str(e)) + logging.warning( + "RedisDB.zrangebyscore " + str(key) + " got exception: " + str(e) + ) self.__open__() return None @@ -160,7 +167,9 @@ class RedisDB: pipeline.execute() return True except Exception as e: - logging.warning("[EXCEPTION]set" + str(key) + "||" + str(e)) + logging.warning( + "RedisDB.transaction " + str(key) + " got exception: " + str(e) + ) self.__open__() return False @@ -170,23 +179,22 @@ class RedisDB: payload = {"message": json.dumps(message)} pipeline = self.REDIS.pipeline() pipeline.xadd(queue, payload) - #pipeline.expire(queue, exp) + # pipeline.expire(queue, exp) pipeline.execute() return True - except Exception: - logging.exception("producer" + str(queue) + " got exception") + except Exception as e: + logging.exception( + "RedisDB.queue_product " + str(queue) + " got exception: " + str(e) + ) return False - def queue_consumer(self, queue_name, group_name, consumer_name, msg_id=b">") -> Payload: + def queue_consumer( + self, queue_name, group_name, consumer_name, msg_id=b">" + ) -> Payload: try: group_info = self.REDIS.xinfo_groups(queue_name) if not any(e["name"] == group_name for e in group_info): - self.REDIS.xgroup_create( - queue_name, - group_name, - id="0", - mkstream=True - ) + self.REDIS.xgroup_create(queue_name, group_name, id="0", mkstream=True) args = { "groupname": group_name, "consumername": consumer_name, @@ -202,10 +210,15 @@ class RedisDB: res = Payload(self.REDIS, queue_name, group_name, msg_id, payload) return res except Exception as e: - if 'key' in str(e): + if "key" in str(e): pass else: - logging.exception("consumer: " + str(queue_name) + " got exception") + logging.exception( + "RedisDB.queue_consumer " + + str(queue_name) + + " got exception: " + + str(e) + ) return None def get_unacked_for(self, consumer_name, queue_name, group_name): @@ -213,36 +226,39 @@ class RedisDB: group_info = self.REDIS.xinfo_groups(queue_name) if not any(e["name"] == group_name for e in group_info): return - pendings = self.REDIS.xpending_range(queue_name, group_name, min=0, max=10000000000000, count=1, consumername=consumer_name) - if not pendings: return + pendings = self.REDIS.xpending_range( + queue_name, + group_name, + min=0, + max=10000000000000, + count=1, + consumername=consumer_name, + ) + if not pendings: + return msg_id = pendings[0]["message_id"] msg = self.REDIS.xrange(queue_name, min=msg_id, count=1) _, payload = msg[0] return Payload(self.REDIS, queue_name, group_name, msg_id, payload) except Exception as e: - if 'key' in str(e): + if "key" in str(e): return - logging.exception("xpending_range: " + consumer_name + " got exception") + logging.exception( + "RedisDB.get_unacked_for " + consumer_name + " got exception: " + str(e) + ) self.__open__() - def queue_info(self, queue, group_name) -> dict: - for _ in range(3): - try: - groups = self.REDIS.xinfo_groups(queue) - for group in groups: - if group["name"] == group_name: - return group - except Exception: - logging.exception("queue_length" + str(queue) + " got exception") + def queue_info(self, queue, group_name) -> dict | None: + try: + groups = self.REDIS.xinfo_groups(queue) + for group in groups: + if group["name"] == group_name: + return group + except Exception as e: + logging.warning( + "RedisDB.queue_info " + str(queue) + " got exception: " + str(e) + ) return None - def queue_head(self, queue) -> int: - for _ in range(3): - try: - ent = self.REDIS.xrange(queue, count=1) - return ent[0] - except Exception: - logging.exception("queue_head" + str(queue) + " got exception") - return 0 REDIS_CONN = RedisDB()