mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-06-04 11:24:00 +08:00
Reuse loaded modules if possible (#5231)
### What problem does this PR solve? Reuse loaded modules if possible ### Type of change - [x] Refactoring
This commit is contained in:
parent
392f28882f
commit
0151d42156
@ -31,6 +31,7 @@ import onnxruntime as ort
|
|||||||
|
|
||||||
from .postprocess import build_post_process
|
from .postprocess import build_post_process
|
||||||
|
|
||||||
|
loaded_models = {}
|
||||||
|
|
||||||
def transform(data, ops=None):
|
def transform(data, ops=None):
|
||||||
""" transform """
|
""" transform """
|
||||||
@ -67,6 +68,12 @@ def create_operators(op_param_list, global_config=None):
|
|||||||
|
|
||||||
def load_model(model_dir, nm):
|
def load_model(model_dir, nm):
|
||||||
model_file_path = os.path.join(model_dir, nm + ".onnx")
|
model_file_path = os.path.join(model_dir, nm + ".onnx")
|
||||||
|
global loaded_models
|
||||||
|
loaded_model = loaded_models.get(model_file_path)
|
||||||
|
if loaded_model:
|
||||||
|
logging.info(f"load_model {model_file_path} reuses cached model")
|
||||||
|
return loaded_model
|
||||||
|
|
||||||
if not os.path.exists(model_file_path):
|
if not os.path.exists(model_file_path):
|
||||||
raise ValueError("not find model file path {}".format(
|
raise ValueError("not find model file path {}".format(
|
||||||
model_file_path))
|
model_file_path))
|
||||||
@ -102,15 +109,17 @@ def load_model(model_dir, nm):
|
|||||||
provider_options=[cuda_provider_options]
|
provider_options=[cuda_provider_options]
|
||||||
)
|
)
|
||||||
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
||||||
logging.info(f"TextRecognizer {nm} uses GPU")
|
logging.info(f"load_model {model_file_path} uses GPU")
|
||||||
else:
|
else:
|
||||||
sess = ort.InferenceSession(
|
sess = ort.InferenceSession(
|
||||||
model_file_path,
|
model_file_path,
|
||||||
options=options,
|
options=options,
|
||||||
providers=['CPUExecutionProvider'])
|
providers=['CPUExecutionProvider'])
|
||||||
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
||||||
logging.info(f"TextRecognizer {nm} uses CPU")
|
logging.info(f"load_model {model_file_path} uses CPU")
|
||||||
return sess, sess.get_inputs()[0], run_options
|
loaded_model = (sess, run_options)
|
||||||
|
loaded_models[model_file_path] = loaded_model
|
||||||
|
return loaded_model
|
||||||
|
|
||||||
|
|
||||||
class TextRecognizer(object):
|
class TextRecognizer(object):
|
||||||
@ -123,7 +132,8 @@ class TextRecognizer(object):
|
|||||||
"use_space_char": True
|
"use_space_char": True
|
||||||
}
|
}
|
||||||
self.postprocess_op = build_post_process(postprocess_params)
|
self.postprocess_op = build_post_process(postprocess_params)
|
||||||
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
|
self.predictor, self.run_options = load_model(model_dir, 'rec')
|
||||||
|
self.input_tensor = self.predictor.get_inputs()[0]
|
||||||
|
|
||||||
def resize_norm_img(self, img, max_wh_ratio):
|
def resize_norm_img(self, img, max_wh_ratio):
|
||||||
imgC, imgH, imgW = self.rec_image_shape
|
imgC, imgH, imgW = self.rec_image_shape
|
||||||
@ -408,7 +418,8 @@ class TextDetector(object):
|
|||||||
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
||||||
|
|
||||||
self.postprocess_op = build_post_process(postprocess_params)
|
self.postprocess_op = build_post_process(postprocess_params)
|
||||||
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
|
self.predictor, self.run_options = load_model(model_dir, 'det')
|
||||||
|
self.input_tensor = self.predictor.get_inputs()[0]
|
||||||
|
|
||||||
img_h, img_w = self.input_tensor.shape[2:]
|
img_h, img_w = self.input_tensor.shape[2:]
|
||||||
if isinstance(img_h, str) or isinstance(img_w, str):
|
if isinstance(img_h, str) or isinstance(img_w, str):
|
||||||
|
@ -21,14 +21,12 @@ import numpy as np
|
|||||||
import cv2
|
import cv2
|
||||||
from functools import cmp_to_key
|
from functools import cmp_to_key
|
||||||
|
|
||||||
import onnxruntime as ort
|
|
||||||
from huggingface_hub import snapshot_download
|
|
||||||
|
|
||||||
from api.utils.file_utils import get_project_base_directory
|
from api.utils.file_utils import get_project_base_directory
|
||||||
from .operators import * # noqa: F403
|
from .operators import * # noqa: F403
|
||||||
from .operators import preprocess
|
from .operators import preprocess
|
||||||
from . import operators
|
from . import operators
|
||||||
|
from .ocr import load_model
|
||||||
|
|
||||||
class Recognizer(object):
|
class Recognizer(object):
|
||||||
def __init__(self, label_list, task_name, model_dir=None):
|
def __init__(self, label_list, task_name, model_dir=None):
|
||||||
@ -47,51 +45,7 @@ class Recognizer(object):
|
|||||||
model_dir = os.path.join(
|
model_dir = os.path.join(
|
||||||
get_project_base_directory(),
|
get_project_base_directory(),
|
||||||
"rag/res/deepdoc")
|
"rag/res/deepdoc")
|
||||||
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
self.ort_sess, self.run_options = load_model(model_dir, task_name)
|
||||||
if not os.path.exists(model_file_path):
|
|
||||||
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
|
|
||||||
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
|
||||||
local_dir_use_symlinks=False)
|
|
||||||
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
|
||||||
else:
|
|
||||||
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
|
||||||
|
|
||||||
if not os.path.exists(model_file_path):
|
|
||||||
raise ValueError("not find model file path {}".format(
|
|
||||||
model_file_path))
|
|
||||||
|
|
||||||
def cuda_is_available():
|
|
||||||
try:
|
|
||||||
import torch
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
return False
|
|
||||||
|
|
||||||
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
|
||||||
# Shrink GPU memory after execution
|
|
||||||
self.run_options = ort.RunOptions()
|
|
||||||
|
|
||||||
if cuda_is_available():
|
|
||||||
options = ort.SessionOptions()
|
|
||||||
options.enable_cpu_mem_arena = False
|
|
||||||
cuda_provider_options = {
|
|
||||||
"device_id": 0, # Use specific GPU
|
|
||||||
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
|
||||||
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
|
||||||
}
|
|
||||||
self.ort_sess = ort.InferenceSession(
|
|
||||||
model_file_path, options=options,
|
|
||||||
providers=['CUDAExecutionProvider'],
|
|
||||||
provider_options=[cuda_provider_options]
|
|
||||||
)
|
|
||||||
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
|
||||||
logging.info(f"Recognizer {task_name} uses GPU")
|
|
||||||
else:
|
|
||||||
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
|
||||||
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
|
||||||
logging.info(f"Recognizer {task_name} uses CPU")
|
|
||||||
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
||||||
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
||||||
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user