mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-13 23:15:59 +08:00
Accelerate titles' embeddings. (#4492)
### What problem does this PR solve? ### Type of change - [x] Performance Improvement
This commit is contained in:
parent
b4614e9517
commit
c852a6dfbf
@ -78,7 +78,7 @@ class LayoutRecognizer(Recognizer):
|
|||||||
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
||||||
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
||||||
"page_number": pn,
|
"page_number": pn,
|
||||||
} for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
|
} for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
|
||||||
lts = self.sort_Y_firstly(lts, np.mean(
|
lts = self.sort_Y_firstly(lts, np.mean(
|
||||||
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
||||||
lts = self.layouts_cleanup(bxs, lts)
|
lts = self.layouts_cleanup(bxs, lts)
|
||||||
|
@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
|||||||
|
|
||||||
tk_count = 0
|
tk_count = 0
|
||||||
if len(tts) == len(cnts):
|
if len(tts) == len(cnts):
|
||||||
tts_ = np.array([])
|
vts, c = mdl.encode(tts[0: 1])
|
||||||
for i in range(0, len(tts), batch_size):
|
tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
|
||||||
vts, c = mdl.encode(tts[i: i + batch_size])
|
|
||||||
if len(tts_) == 0:
|
|
||||||
tts_ = vts
|
|
||||||
else:
|
|
||||||
tts_ = np.concatenate((tts_, vts), axis=0)
|
|
||||||
tk_count += c
|
tk_count += c
|
||||||
callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
|
|
||||||
tts = tts_
|
|
||||||
|
|
||||||
cnts_ = np.array([])
|
cnts_ = np.array([])
|
||||||
for i in range(0, len(cnts), batch_size):
|
for i in range(0, len(cnts), batch_size):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user