From fe797bcc6672bf6432d5bc16c9e25bd242ec05ed Mon Sep 17 00:00:00 2001
From: Kevin Hu <kevinhu.sh@gmail.com>
Date: Mon, 5 Aug 2024 16:21:52 +0800
Subject: [PATCH] be better chunks before graphrag (#1811)

### What problem does this PR solve?

#1594

### Type of change

- [x] Refactoring
---
 rag/app/naive.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rag/app/naive.py b/rag/app/naive.py
index 6c39954c5..ab824bfab 100644
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@@ -273,14 +273,13 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
         raise NotImplementedError(
             "file type not supported yet(pdf, xlsx, doc, docx, txt supported)")
 
-    if kwargs.get("section_only", False):
-        return [t for t, _ in sections]
-
     st = timer()
     chunks = naive_merge(
         sections, int(parser_config.get(
             "chunk_token_num", 128)), parser_config.get(
             "delimiter", "\n!?。；！？"))
+    if kwargs.get("section_only", False):
+        return chunks
 
     res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
     cron_logger.info("naive_merge({}): {}".format(filename, timer() - st))