mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-07-29 01:52:03 +08:00
Fix: empty query issue. (#7551)
### What problem does this PR solve? #5214 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
parent
d66c17ab5c
commit
a14865e6bb
@ -309,7 +309,7 @@ class RAGFlowPdfParser:
|
||||
"bottom": b[-1][1] / ZM,
|
||||
"chars": [],
|
||||
"page_number": pagenum} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]],
|
||||
self.mean_height[-1] / 3
|
||||
self.mean_height[pagenum-1] / 3
|
||||
)
|
||||
|
||||
# merge chars in the same rect
|
||||
@ -355,8 +355,8 @@ class RAGFlowPdfParser:
|
||||
del boxes_to_reg[i]["box_image"]
|
||||
logging.info(f"__ocr recognize {len(bxs)} boxes cost {timer() - start}s")
|
||||
bxs = [b for b in bxs if b["text"]]
|
||||
if self.mean_height[-1] == 0:
|
||||
self.mean_height[-1] = np.median([b["bottom"] - b["top"]
|
||||
if self.mean_height[pagenum-1] == 0:
|
||||
self.mean_height[pagenum-1] = np.median([b["bottom"] - b["top"]
|
||||
for b in bxs])
|
||||
self.boxes.append(bxs)
|
||||
|
||||
|
@ -77,6 +77,7 @@ class FulltextQueryer:
|
||||
" ",
|
||||
rag_tokenizer.tradi2simp(rag_tokenizer.strQ2B(txt.lower())),
|
||||
).strip()
|
||||
otxt = txt
|
||||
txt = FulltextQueryer.rmWWW(txt)
|
||||
|
||||
if not self.isChinese(txt):
|
||||
@ -196,6 +197,8 @@ class FulltextQueryer:
|
||||
|
||||
if qs:
|
||||
query = " OR ".join([f"({t})" for t in qs if t])
|
||||
if not query:
|
||||
query = otxt
|
||||
return MatchTextExpr(
|
||||
self.query_fields, query, 100, {"minimum_should_match": min_match}
|
||||
), keywords
|
||||
|
Loading…
x
Reference in New Issue
Block a user