mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 07:19:04 +08:00
Accelerate titles' embeddings. (#4492)
### What problem does this PR solve? ### Type of change - [x] Performance Improvement
This commit is contained in:
parent
b4614e9517
commit
c852a6dfbf
@ -78,7 +78,7 @@ class LayoutRecognizer(Recognizer):
|
||||
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
||||
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
||||
"page_number": pn,
|
||||
} for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
|
||||
} for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
|
||||
lts = self.sort_Y_firstly(lts, np.mean(
|
||||
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
||||
lts = self.layouts_cleanup(bxs, lts)
|
||||
|
@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
||||
|
||||
tk_count = 0
|
||||
if len(tts) == len(cnts):
|
||||
tts_ = np.array([])
|
||||
for i in range(0, len(tts), batch_size):
|
||||
vts, c = mdl.encode(tts[i: i + batch_size])
|
||||
if len(tts_) == 0:
|
||||
tts_ = vts
|
||||
else:
|
||||
tts_ = np.concatenate((tts_, vts), axis=0)
|
||||
tk_count += c
|
||||
callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
|
||||
tts = tts_
|
||||
vts, c = mdl.encode(tts[0: 1])
|
||||
tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
|
||||
tk_count += c
|
||||
|
||||
cnts_ = np.array([])
|
||||
for i in range(0, len(cnts), batch_size):
|
||||
|
Loading…
x
Reference in New Issue
Block a user