Support for code files parse (#789)

### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2025-08-12 19:29:01 +08:00 · 2024-05-15 16:34:28 +08:00 · 2024-05-15 16:34:28 +08:00 · 6ff63ee2ba
commit 6ff63ee2ba
parent 12b4c5668c
2 changed files with 2 additions and 2 deletions
--- a/api/utils/file_utils.py
+++ b/api/utils/file_utils.py
@ -156,7 +156,7 @@ def filename_type(filename):
        return FileType.PDF.value

    if re.match(
-            r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md)$", filename):
+            r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename):
        return FileType.DOC.value

    if re.match(
--- a/rag/app/naive.py
+++ b/rag/app/naive.py
@ -136,7 +136,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
        excel_parser = ExcelParser()
        sections = [(excel_parser.html(binary), "")]

-    elif re.search(r"\.(txt|md)$", filename, re.IGNORECASE):
+    elif re.search(r"\.(txt|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE):
        callback(0.1, "Start to parse.")
        txt = ""
        if binary: