mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 06:19:03 +08:00
fix: keep image url (#17430)
This commit is contained in:
parent
296e2ef90f
commit
fc3f14c0ee
@ -27,9 +27,26 @@ class CleanProcessor:
|
||||
pattern = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
|
||||
text = re.sub(pattern, "", text)
|
||||
|
||||
# Remove URL
|
||||
pattern = r"https?://[^\s]+"
|
||||
text = re.sub(pattern, "", text)
|
||||
# Remove URL but keep Markdown image URLs
|
||||
# First, temporarily replace Markdown image URLs with a placeholder
|
||||
markdown_image_pattern = r"!\[.*?\]\((https?://[^\s)]+)\)"
|
||||
placeholders: list[str] = []
|
||||
|
||||
def replace_with_placeholder(match, placeholders=placeholders):
|
||||
url = match.group(1)
|
||||
placeholder = f"__MARKDOWN_IMAGE_URL_{len(placeholders)}__"
|
||||
placeholders.append(url)
|
||||
return f""
|
||||
|
||||
text = re.sub(markdown_image_pattern, replace_with_placeholder, text)
|
||||
|
||||
# Now remove all remaining URLs
|
||||
url_pattern = r"https?://[^\s)]+"
|
||||
text = re.sub(url_pattern, "", text)
|
||||
|
||||
# Finally, restore the Markdown image URLs
|
||||
for i, url in enumerate(placeholders):
|
||||
text = text.replace(f"__MARKDOWN_IMAGE_URL_{i}__", url)
|
||||
return text
|
||||
|
||||
def filter_string(self, text):
|
||||
|
Loading…
x
Reference in New Issue
Block a user