diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py
index 3973ce03a..361a88872 100644
--- a/deepdoc/parser/pdf_parser.py
+++ b/deepdoc/parser/pdf_parser.py
@@ -307,13 +307,13 @@ class RAGFlowPdfParser:
             [{"x0": b[0][0] / ZM, "x1": b[1][0] / ZM,
               "top": b[0][1] / ZM, "text": "", "txt": t,
               "bottom": b[-1][1] / ZM,
+              "chars": [],
               "page_number": pagenum} for b, t in bxs if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]],
             self.mean_height[-1] / 3
         )
 
         # merge chars in the same rect
-        for c in Recognizer.sort_Y_firstly(
-                chars, self.mean_height[pagenum - 1] // 4):
+        for c in chars:
             ii = Recognizer.find_overlapped(c, bxs)
             if ii is None:
                 self.lefted_chars.append(c)
@@ -323,11 +323,20 @@ class RAGFlowPdfParser:
             if abs(ch - bh) / max(ch, bh) >= 0.7 and c["text"] != ' ':
                 self.lefted_chars.append(c)
                 continue
-            if c["text"] == " " and bxs[ii]["text"]:
-                if re.match(r"[0-9a-zA-Zа-яА-Я,.?;:!%%]", bxs[ii]["text"][-1]):
-                    bxs[ii]["text"] += " "
-            else:
-                bxs[ii]["text"] += c["text"]
+            bxs[ii]["chars"].append(c)
+
+        for b in bxs:
+            if not b["chars"]:
+                del b["chars"]
+                continue
+            m_ht = np.mean([c["height"] for c in b["chars"]])
+            for c in Recognizer.sort_Y_firstly(b["chars"], m_ht):
+                if c["text"] == " " and b["text"]:
+                    if re.match(r"[0-9a-zA-Zа-яА-Я,.?;:!%%]", b["text"][-1]):
+                        b["text"] += " "
+                else:
+                    b["text"] += c["text"]
+            del b["chars"]
 
         logging.info(f"__ocr sorting {len(chars)} chars cost {timer() - start}s")
         start = timer()