diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py index 98d8fd21d..90dfb0263 100644 --- a/api/db/services/llm_service.py +++ b/api/db/services/llm_service.py @@ -179,7 +179,7 @@ class TenantLLMService(CommonService): .where(cls.model.tenant_id == tenant_id, cls.model.llm_factory == tenant_llm.llm_factory, cls.model.llm_name == llm_name)\ .execute() else: - llm_factory = llm_name.split("/")[0] if "/" in llm_name else llm_name + llm_factory = mdlnm.split("@")[1] if "@" in mdlnm else mdlnm num = cls.model.create(tenant_id=tenant_id, llm_factory=llm_factory, llm_name=llm_name, used_tokens=used_tokens) except Exception: logging.exception("TenantLLMService.increase_usage got exception") diff --git a/api/ragflow_server.py b/api/ragflow_server.py index e28347383..0d7cd6a36 100644 --- a/api/ragflow_server.py +++ b/api/ragflow_server.py @@ -19,15 +19,10 @@ # beartype_all(conf=BeartypeConf(violation_type=UserWarning)) # <-- emit warnings from all code import logging +import os from api.utils.log_utils import initRootLogger -initRootLogger("ragflow_server") -for module in ["pdfminer"]: - module_logger = logging.getLogger(module) - module_logger.setLevel(logging.WARNING) -for module in ["peewee"]: - module_logger = logging.getLogger(module) - module_logger.handlers.clear() - module_logger.propagate = True +LOG_LEVELS = os.environ.get("LOG_LEVELS", "") +initRootLogger("ragflow_server", LOG_LEVELS) import os import signal diff --git a/api/utils/log_utils.py b/api/utils/log_utils.py index 56aeeae9b..99654d20b 100644 --- a/api/utils/log_utils.py +++ b/api/utils/log_utils.py @@ -28,7 +28,7 @@ def get_project_base_directory(): ) return PROJECT_BASE -def initRootLogger(logfile_basename: str, log_level: int = logging.INFO, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): +def initRootLogger(logfile_basename: str, log_levels: str, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"): logger = logging.getLogger() if logger.hasHandlers(): return @@ -36,19 +36,39 @@ def initRootLogger(logfile_basename: str, log_level: int = logging.INFO, log_for log_path = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"{logfile_basename}.log")) os.makedirs(os.path.dirname(log_path), exist_ok=True) - logger.setLevel(log_level) formatter = logging.Formatter(log_format) handler1 = RotatingFileHandler(log_path, maxBytes=10*1024*1024, backupCount=5) - handler1.setLevel(log_level) handler1.setFormatter(formatter) logger.addHandler(handler1) handler2 = logging.StreamHandler() - handler2.setLevel(log_level) handler2.setFormatter(formatter) logger.addHandler(handler2) logging.captureWarnings(True) - msg = f"{logfile_basename} log path: {log_path}" + + pkg_levels = {} + for pkg_name_level in log_levels.split(","): + terms = pkg_name_level.split("=") + if len(terms)!= 2: + continue + pkg_name, pkg_level = terms[0], terms[1] + pkg_name = pkg_name.strip() + pkg_level = logging.getLevelName(pkg_level.strip().upper()) + if not isinstance(pkg_level, int): + pkg_level = logging.INFO + pkg_levels[pkg_name] = logging.getLevelName(pkg_level) + + for pkg_name in ['peewee', 'pdfminer']: + if pkg_name not in pkg_levels: + pkg_levels[pkg_name] = logging.getLevelName(logging.WARNING) + if 'root' not in pkg_levels: + pkg_levels['root'] = logging.getLevelName(logging.INFO) + + for pkg_name, pkg_level in pkg_levels.items(): + pkg_logger = logging.getLogger(pkg_name) + pkg_logger.setLevel(pkg_level) + + msg = f"{logfile_basename} log path: {log_path}, log levels: {pkg_levels}" logger.info(msg) \ No newline at end of file diff --git a/docker/.env b/docker/.env index d64cd2be7..19d2cf4dd 100644 --- a/docker/.env +++ b/docker/.env @@ -129,3 +129,11 @@ TIMEZONE='Asia/Shanghai' # You can uncomment this line and update the value if you wish to change the 128M file size limit # MAX_CONTENT_LENGTH=134217728 +# The log level for the RAGFlow's owned packages and imported packages. +# Available level: +# - `DEBUG` +# - `INFO` (default) +# - `WARNING` +# - `ERROR` +# For example, following line changes the log level of `ragflow.es_conn` to `DEBUG`: +# LOG_LEVELS=ragflow.es_conn=DEBUG diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index b4d86f476..03f5659c6 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -19,19 +19,14 @@ import logging import sys +import os from api.utils.log_utils import initRootLogger CONSUMER_NO = "0" if len(sys.argv) < 2 else sys.argv[1] CONSUMER_NAME = "task_executor_" + CONSUMER_NO -initRootLogger(CONSUMER_NAME) -for module in ["pdfminer"]: - module_logger = logging.getLogger(module) - module_logger.setLevel(logging.WARNING) -for module in ["peewee"]: - module_logger = logging.getLogger(module) - module_logger.handlers.clear() - module_logger.propagate = True +LOG_LEVELS = os.environ.get("LOG_LEVELS", "") +initRootLogger(CONSUMER_NAME, LOG_LEVELS) from datetime import datetime import json diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index 09981fbab..82016a5e2 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -18,12 +18,13 @@ from rag.nlp import is_english, rag_tokenizer ATTEMPT_TIME = 2 +logger = logging.getLogger('ragflow.es_conn') @singleton class ESConnection(DocStoreConnection): def __init__(self): self.info = {} - logging.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") + logger.info(f"Use Elasticsearch {settings.ES['hosts']} as the doc engine.") for _ in range(ATTEMPT_TIME): try: self.es = Elasticsearch( @@ -37,25 +38,25 @@ class ESConnection(DocStoreConnection): self.info = self.es.info() break except Exception as e: - logging.warning(f"{str(e)}. Waiting Elasticsearch {settings.ES['hosts']} to be healthy.") + logger.warning(f"{str(e)}. Waiting Elasticsearch {settings.ES['hosts']} to be healthy.") time.sleep(5) if not self.es.ping(): msg = f"Elasticsearch {settings.ES['hosts']} didn't become healthy in 120s." - logging.error(msg) + logger.error(msg) raise Exception(msg) v = self.info.get("version", {"number": "8.11.3"}) v = v["number"].split(".")[0] if int(v) < 8: msg = f"Elasticsearch version must be greater than or equal to 8, current version: {v}" - logging.error(msg) + logger.error(msg) raise Exception(msg) fp_mapping = os.path.join(get_project_base_directory(), "conf", "mapping.json") if not os.path.exists(fp_mapping): msg = f"Elasticsearch mapping file not found at {fp_mapping}" - logging.error(msg) + logger.error(msg) raise Exception(msg) self.mapping = json.load(open(fp_mapping, "r")) - logging.info(f"Elasticsearch {settings.ES['hosts']} is healthy.") + logger.info(f"Elasticsearch {settings.ES['hosts']} is healthy.") """ Database operations @@ -82,7 +83,7 @@ class ESConnection(DocStoreConnection): settings=self.mapping["settings"], mappings=self.mapping["mappings"]) except Exception: - logging.exception("ESConnection.createIndex error %s" % (indexName)) + logger.exception("ESConnection.createIndex error %s" % (indexName)) def deleteIdx(self, indexName: str, knowledgebaseId: str): if len(knowledgebaseId) > 0: @@ -93,7 +94,7 @@ class ESConnection(DocStoreConnection): except NotFoundError: pass except Exception: - logging.exception("ESConnection.deleteIdx error %s" % (indexName)) + logger.exception("ESConnection.deleteIdx error %s" % (indexName)) def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: s = Index(indexName, self.es) @@ -101,7 +102,7 @@ class ESConnection(DocStoreConnection): try: return s.exists() except Exception as e: - logging.exception("ESConnection.indexExist got exception") + logger.exception("ESConnection.indexExist got exception") if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: continue return False @@ -189,7 +190,7 @@ class ESConnection(DocStoreConnection): if limit > 0: s = s[offset:limit] q = s.to_dict() - logging.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) + logger.debug(f"ESConnection.search {str(indexNames)} query: " + json.dumps(q)) for i in range(ATTEMPT_TIME): try: @@ -201,14 +202,14 @@ class ESConnection(DocStoreConnection): _source=True) if str(res.get("timed_out", "")).lower() == "true": raise Exception("Es Timeout.") - logging.debug(f"ESConnection.search {str(indexNames)} res: " + str(res)) + logger.debug(f"ESConnection.search {str(indexNames)} res: " + str(res)) return res except Exception as e: - logging.exception(f"ESConnection.search {str(indexNames)} query: " + str(q)) + logger.exception(f"ESConnection.search {str(indexNames)} query: " + str(q)) if str(e).find("Timeout") > 0: continue raise e - logging.error("ESConnection.search timeout for 3 times!") + logger.error("ESConnection.search timeout for 3 times!") raise Exception("ESConnection.search timeout.") def get(self, chunkId: str, indexName: str, knowledgebaseIds: list[str]) -> dict | None: @@ -224,11 +225,11 @@ class ESConnection(DocStoreConnection): except NotFoundError: return None except Exception as e: - logging.exception(f"ESConnection.get({chunkId}) got exception") + logger.exception(f"ESConnection.get({chunkId}) got exception") if str(e).find("Timeout") > 0: continue raise e - logging.error("ESConnection.get timeout for 3 times!") + logger.error("ESConnection.get timeout for 3 times!") raise Exception("ESConnection.get timeout.") def insert(self, documents: list[dict], indexName: str, knowledgebaseId: str) -> list[str]: @@ -259,7 +260,7 @@ class ESConnection(DocStoreConnection): return res except Exception as e: res.append(str(e)) - logging.warning("ESConnection.insert got exception: " + str(e)) + logger.warning("ESConnection.insert got exception: " + str(e)) res = [] if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): res.append(str(e)) @@ -278,7 +279,7 @@ class ESConnection(DocStoreConnection): self.es.update(index=indexName, id=chunkId, doc=doc) return True except Exception as e: - logging.exception( + logger.exception( f"ESConnection.update(index={indexName}, id={id}, doc={json.dumps(condition, ensure_ascii=False)}) got exception") if str(e).find("Timeout") > 0: continue @@ -318,7 +319,7 @@ class ESConnection(DocStoreConnection): _ = ubq.execute() return True except Exception as e: - logging.error("ESConnection.update got exception: " + str(e)) + logger.error("ESConnection.update got exception: " + str(e)) if str(e).find("Timeout") > 0 or str(e).find("Conflict") > 0: continue return False @@ -340,7 +341,7 @@ class ESConnection(DocStoreConnection): qry.must.append(Q("term", **{k: v})) else: raise Exception("Condition value must be int, str or list.") - logging.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) + logger.debug("ESConnection.delete query: " + json.dumps(qry.to_dict())) for _ in range(ATTEMPT_TIME): try: res = self.es.delete_by_query( @@ -349,7 +350,7 @@ class ESConnection(DocStoreConnection): refresh=True) return res["deleted"] except Exception as e: - logging.warning("ESConnection.delete got exception: " + str(e)) + logger.warning("ESConnection.delete got exception: " + str(e)) if re.search(r"(Timeout|time out)", str(e), re.IGNORECASE): time.sleep(3) continue @@ -433,7 +434,7 @@ class ESConnection(DocStoreConnection): """ def sql(self, sql: str, fetch_size: int, format: str): - logging.debug(f"ESConnection.sql get sql: {sql}") + logger.debug(f"ESConnection.sql get sql: {sql}") sql = re.sub(r"[ `]+", " ", sql) sql = sql.replace("%", "") replaces = [] @@ -450,7 +451,7 @@ class ESConnection(DocStoreConnection): for p, r in replaces: sql = sql.replace(p, r, 1) - logging.debug(f"ESConnection.sql to es: {sql}") + logger.debug(f"ESConnection.sql to es: {sql}") for i in range(ATTEMPT_TIME): try: @@ -458,10 +459,10 @@ class ESConnection(DocStoreConnection): request_timeout="2s") return res except ConnectionTimeout: - logging.exception("ESConnection.sql timeout") + logger.exception("ESConnection.sql timeout") continue except Exception: - logging.exception("ESConnection.sql got exception") + logger.exception("ESConnection.sql got exception") return None - logging.error("ESConnection.sql timeout for 3 times!") + logger.error("ESConnection.sql timeout for 3 times!") return None diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index f6345d455..5c0e2ef69 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -23,6 +23,7 @@ from rag.utils.doc_store_conn import ( OrderByExpr, ) +logger = logging.getLogger('ragflow.infinity_conn') def equivalent_condition_to_str(condition: dict) -> str: assert "_id" not in condition @@ -68,7 +69,7 @@ class InfinityConnection(DocStoreConnection): host, port = infinity_uri.split(":") infinity_uri = infinity.common.NetworkAddress(host, int(port)) self.connPool = None - logging.info(f"Use Infinity {infinity_uri} as the doc engine.") + logger.info(f"Use Infinity {infinity_uri} as the doc engine.") for _ in range(24): try: connPool = ConnectionPool(infinity_uri) @@ -78,16 +79,16 @@ class InfinityConnection(DocStoreConnection): self.connPool = connPool if res.error_code == ErrorCode.OK and res.server_status=="started": break - logging.warn(f"Infinity status: {res.server_status}. Waiting Infinity {infinity_uri} to be healthy.") + logger.warn(f"Infinity status: {res.server_status}. Waiting Infinity {infinity_uri} to be healthy.") time.sleep(5) except Exception as e: - logging.warning(f"{str(e)}. Waiting Infinity {infinity_uri} to be healthy.") + logger.warning(f"{str(e)}. Waiting Infinity {infinity_uri} to be healthy.") time.sleep(5) if self.connPool is None: msg = f"Infinity {infinity_uri} didn't become healthy in 120s." - logging.error(msg) + logger.error(msg) raise Exception(msg) - logging.info(f"Infinity {infinity_uri} is healthy.") + logger.info(f"Infinity {infinity_uri} is healthy.") """ Database operations @@ -162,7 +163,7 @@ class InfinityConnection(DocStoreConnection): ) break self.connPool.release_conn(inf_conn) - logging.info( + logger.info( f"INFINITY created table {table_name}, vector size {vectorSize}" ) @@ -172,7 +173,7 @@ class InfinityConnection(DocStoreConnection): db_instance = inf_conn.get_database(self.dbName) db_instance.drop_table(table_name, ConflictType.Ignore) self.connPool.release_conn(inf_conn) - logging.info(f"INFINITY dropped table {table_name}") + logger.info(f"INFINITY dropped table {table_name}") def indexExist(self, indexName: str, knowledgebaseId: str) -> bool: table_name = f"{indexName}_{knowledgebaseId}" @@ -183,7 +184,7 @@ class InfinityConnection(DocStoreConnection): self.connPool.release_conn(inf_conn) return True except Exception as e: - logging.warning(f"INFINITY indexExist {str(e)}") + logger.warning(f"INFINITY indexExist {str(e)}") return False """ @@ -230,7 +231,7 @@ class InfinityConnection(DocStoreConnection): ) if len(filter_cond) != 0: filter_fulltext = f"({filter_cond}) AND {filter_fulltext}" - logging.debug(f"filter_fulltext: {filter_fulltext}") + logger.debug(f"filter_fulltext: {filter_fulltext}") minimum_should_match = matchExpr.extra_options.get("minimum_should_match", 0.0) if isinstance(minimum_should_match, float): str_minimum_should_match = str(int(minimum_should_match * 100)) + "%" @@ -296,7 +297,7 @@ class InfinityConnection(DocStoreConnection): df_list.append(kb_res) self.connPool.release_conn(inf_conn) res = concat_dataframes(df_list, selectFields) - logging.debug("INFINITY search tables: " + str(table_list)) + logger.debug("INFINITY search tables: " + str(table_list)) return res def get( @@ -356,18 +357,18 @@ class InfinityConnection(DocStoreConnection): str_filter = f"id IN ({str_ids})" table_instance.delete(str_filter) # for doc in documents: - # logging.info(f"insert position_list: {doc['position_list']}") - # logging.info(f"InfinityConnection.insert {json.dumps(documents)}") + # logger.info(f"insert position_list: {doc['position_list']}") + # logger.info(f"InfinityConnection.insert {json.dumps(documents)}") table_instance.insert(documents) self.connPool.release_conn(inf_conn) - logging.debug(f"inserted into {table_name} {str_ids}.") + logger.debug(f"inserted into {table_name} {str_ids}.") return [] def update( self, condition: dict, newValue: dict, indexName: str, knowledgebaseId: str ) -> bool: # if 'position_list' in newValue: - # logging.info(f"upsert position_list: {newValue['position_list']}") + # logger.info(f"upsert position_list: {newValue['position_list']}") inf_conn = self.connPool.get_conn() db_instance = inf_conn.get_database(self.dbName) table_name = f"{indexName}_{knowledgebaseId}" @@ -388,7 +389,7 @@ class InfinityConnection(DocStoreConnection): try: table_instance = db_instance.get_table(table_name) except Exception: - logging.warning( + logger.warning( f"Skipped deleting `{filter}` from table {table_name} since the table doesn't exist." ) return 0