diff --git a/rag/app/naive.py b/rag/app/naive.py index dcaea240c..e2e4a6a45 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -22,7 +22,7 @@ from timeit import default_timer as timer from docx import Document from docx.image.exceptions import InvalidImageStreamError, UnexpectedEndOfFileError, UnrecognizedImageError -import markdown +from markdown import markdown from PIL import Image from tika import parser @@ -298,8 +298,7 @@ class Markdown(MarkdownParser): return [] from bs4 import BeautifulSoup - md = markdown.Markdown() - html_content = md.convert(text) + html_content = markdown(text) soup = BeautifulSoup(html_content, 'html.parser') html_images = [img.get('src') for img in soup.find_all('img') if img.get('src')] return html_images @@ -344,7 +343,6 @@ class Markdown(MarkdownParser): sections.append((sec_ + "\n" + sec, "")) else: sections.append((sec, "")) - for table in tables: tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), "")) return sections, tbls