From baa108f5cca5d7d7914ff2a1d9f25ab9366a105a Mon Sep 17 00:00:00 2001 From: alkscr <72345450+alkscr@users.noreply.github.com> Date: Mon, 12 May 2025 17:16:55 +0800 Subject: [PATCH] Fix: markdown table conversion error (#7570) ### What problem does this PR solve? Since `import markdown.markdown` has been changed to `import markdown` in `rag/app/naive.py`, previous code for converting markdown tables would call a markdown module instead of a callable function. This cause error. ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [ ] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): --- rag/app/naive.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/rag/app/naive.py b/rag/app/naive.py index dcaea240c..e2e4a6a45 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -22,7 +22,7 @@ from timeit import default_timer as timer from docx import Document from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError -import markdown +from markdown import markdown from PIL import Image from tika import parser @@ -298,8 +298,7 @@ class Markdown(MarkdownParser): return [] from bs4 import BeautifulSoup - md = markdown.Markdown() - html_content = md.convert(text) + html_content = markdown(text) soup = BeautifulSoup(html_content, 'html.parser') html_images = [img.get('src') for img in soup.find_all('img') if img.get('src')] return html_images @@ -344,7 +343,6 @@ class Markdown(MarkdownParser): sections.append((sec_ + "\n" + sec, "")) else: sections.append((sec, "")) - for table in tables: tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), "")) return sections, tbls