mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 15:19:02 +08:00
deepdoc use GPU if possible (#4618)
### What problem does this PR solve? deepdoc use GPU if possible ### Type of change - [x] Refactoring
This commit is contained in:
parent
e14d6ae441
commit
4230402fbb
@ -14,6 +14,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
import logging
|
||||||
import copy
|
import copy
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
@ -75,17 +76,32 @@ def load_model(model_dir, nm):
|
|||||||
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
||||||
options.intra_op_num_threads = 2
|
options.intra_op_num_threads = 2
|
||||||
options.inter_op_num_threads = 2
|
options.inter_op_num_threads = 2
|
||||||
if False and ort.get_device() == "GPU":
|
|
||||||
|
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
||||||
|
# Shrink GPU memory after execution
|
||||||
|
run_options = ort.RunOptions()
|
||||||
|
if ort.get_device() == "GPU":
|
||||||
|
cuda_provider_options = {
|
||||||
|
"device_id": 0, # Use specific GPU
|
||||||
|
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
||||||
|
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
||||||
|
}
|
||||||
sess = ort.InferenceSession(
|
sess = ort.InferenceSession(
|
||||||
model_file_path,
|
model_file_path,
|
||||||
options=options,
|
options=options,
|
||||||
providers=['CUDAExecutionProvider'])
|
providers=['CUDAExecutionProvider'],
|
||||||
|
provider_options=[cuda_provider_options]
|
||||||
|
)
|
||||||
|
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
||||||
|
logging.info(f"TextRecognizer {nm} uses GPU")
|
||||||
else:
|
else:
|
||||||
sess = ort.InferenceSession(
|
sess = ort.InferenceSession(
|
||||||
model_file_path,
|
model_file_path,
|
||||||
options=options,
|
options=options,
|
||||||
providers=['CPUExecutionProvider'])
|
providers=['CPUExecutionProvider'])
|
||||||
return sess, sess.get_inputs()[0]
|
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
||||||
|
logging.info(f"TextRecognizer {nm} uses CPU")
|
||||||
|
return sess, sess.get_inputs()[0], run_options
|
||||||
|
|
||||||
|
|
||||||
class TextRecognizer(object):
|
class TextRecognizer(object):
|
||||||
@ -98,7 +114,7 @@ class TextRecognizer(object):
|
|||||||
"use_space_char": True
|
"use_space_char": True
|
||||||
}
|
}
|
||||||
self.postprocess_op = build_post_process(postprocess_params)
|
self.postprocess_op = build_post_process(postprocess_params)
|
||||||
self.predictor, self.input_tensor = load_model(model_dir, 'rec')
|
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
|
||||||
|
|
||||||
def resize_norm_img(self, img, max_wh_ratio):
|
def resize_norm_img(self, img, max_wh_ratio):
|
||||||
imgC, imgH, imgW = self.rec_image_shape
|
imgC, imgH, imgW = self.rec_image_shape
|
||||||
@ -344,7 +360,7 @@ class TextRecognizer(object):
|
|||||||
input_dict[self.input_tensor.name] = norm_img_batch
|
input_dict[self.input_tensor.name] = norm_img_batch
|
||||||
for i in range(100000):
|
for i in range(100000):
|
||||||
try:
|
try:
|
||||||
outputs = self.predictor.run(None, input_dict)
|
outputs = self.predictor.run(None, input_dict, self.run_options)
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if i >= 3:
|
if i >= 3:
|
||||||
@ -383,7 +399,7 @@ class TextDetector(object):
|
|||||||
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
||||||
|
|
||||||
self.postprocess_op = build_post_process(postprocess_params)
|
self.postprocess_op = build_post_process(postprocess_params)
|
||||||
self.predictor, self.input_tensor = load_model(model_dir, 'det')
|
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
|
||||||
|
|
||||||
img_h, img_w = self.input_tensor.shape[2:]
|
img_h, img_w = self.input_tensor.shape[2:]
|
||||||
if isinstance(img_h, str) or isinstance(img_w, str):
|
if isinstance(img_h, str) or isinstance(img_w, str):
|
||||||
@ -456,7 +472,7 @@ class TextDetector(object):
|
|||||||
input_dict[self.input_tensor.name] = img
|
input_dict[self.input_tensor.name] = img
|
||||||
for i in range(100000):
|
for i in range(100000):
|
||||||
try:
|
try:
|
||||||
outputs = self.predictor.run(None, input_dict)
|
outputs = self.predictor.run(None, input_dict, self.run_options)
|
||||||
break
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if i >= 3:
|
if i >= 3:
|
||||||
|
@ -60,12 +60,29 @@ class Recognizer(object):
|
|||||||
if not os.path.exists(model_file_path):
|
if not os.path.exists(model_file_path):
|
||||||
raise ValueError("not find model file path {}".format(
|
raise ValueError("not find model file path {}".format(
|
||||||
model_file_path))
|
model_file_path))
|
||||||
if False and ort.get_device() == "GPU":
|
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
||||||
|
# Shrink GPU memory after execution
|
||||||
|
self.run_options = ort.RunOptions()
|
||||||
|
|
||||||
|
if ort.get_device() == "GPU":
|
||||||
options = ort.SessionOptions()
|
options = ort.SessionOptions()
|
||||||
options.enable_cpu_mem_arena = False
|
options.enable_cpu_mem_arena = False
|
||||||
self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
|
cuda_provider_options = {
|
||||||
|
"device_id": 0, # Use specific GPU
|
||||||
|
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
||||||
|
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
||||||
|
}
|
||||||
|
self.ort_sess = ort.InferenceSession(
|
||||||
|
model_file_path, options=options,
|
||||||
|
providers=['CUDAExecutionProvider'],
|
||||||
|
provider_options=[cuda_provider_options]
|
||||||
|
)
|
||||||
|
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
||||||
|
logging.info(f"Recognizer {task_name} uses GPU")
|
||||||
else:
|
else:
|
||||||
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
||||||
|
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
||||||
|
logging.info(f"Recognizer {task_name} uses CPU")
|
||||||
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
||||||
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
||||||
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
||||||
@ -454,7 +471,7 @@ class Recognizer(object):
|
|||||||
inputs = self.preprocess(batch_image_list)
|
inputs = self.preprocess(batch_image_list)
|
||||||
logging.debug("preprocess")
|
logging.debug("preprocess")
|
||||||
for ins in inputs:
|
for ins in inputs:
|
||||||
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names}, self.run_options)[0], ins, thr)
|
||||||
res.append(bb)
|
res.append(bb)
|
||||||
|
|
||||||
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user