Fix: markdown table conversion error (#7570)

### What problem does this PR solve?

Since `import markdown.markdown` has been changed to `import markdown`
in `rag/app/naive.py`, previous code for converting markdown tables
would call a markdown module instead of a callable function. This cause
error.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
This commit is contained in:
alkscr 2025-05-12 17:16:55 +08:00 committed by GitHub
parent 4a891f2d67
commit baa108f5cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -22,7 +22,7 @@ from timeit import default_timer as timer
from docx import Document from docx import Document
from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError
import markdown from markdown import markdown
from PIL import Image from PIL import Image
from tika import parser from tika import parser
@ -298,8 +298,7 @@ class Markdown(MarkdownParser):
return [] return []
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
md = markdown.Markdown() html_content = markdown(text)
html_content = md.convert(text)
soup = BeautifulSoup(html_content, 'html.parser') soup = BeautifulSoup(html_content, 'html.parser')
html_images = [img.get('src') for img in soup.find_all('img') if img.get('src')] html_images = [img.get('src') for img in soup.find_all('img') if img.get('src')]
return html_images return html_images
@ -344,7 +343,6 @@ class Markdown(MarkdownParser):
sections.append((sec_ + "\n" + sec, "")) sections.append((sec_ + "\n" + sec, ""))
else: else:
sections.append((sec, "")) sections.append((sec, ""))
for table in tables: for table in tables:
tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), "")) tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), ""))
return sections, tbls return sections, tbls