mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-12 22:59:02 +08:00
Fix potential SSRF attack vulnerability (#4334)
### What problem does this PR solve? Fix potential SSRF attack vulnerability ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
parent
5083d92998
commit
8674156d1c
@ -41,7 +41,7 @@ class Crawler(ComponentBase, ABC):
|
||||
ans = self.get_input()
|
||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||
if not is_valid_url(ans):
|
||||
return Crawler.be_output("")
|
||||
return Crawler.be_output("URL not valid")
|
||||
try:
|
||||
result = asyncio.run(self.get_web(ans))
|
||||
|
||||
|
@ -1,4 +1,7 @@
|
||||
import re
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
import ipaddress
|
||||
import json
|
||||
import base64
|
||||
|
||||
@ -76,5 +79,25 @@ def __get_pdf_from_html(
|
||||
return base64.b64decode(result["data"])
|
||||
|
||||
|
||||
def is_private_ip(ip: str) -> bool:
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(ip)
|
||||
return ip_obj.is_private
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def is_valid_url(url: str) -> bool:
|
||||
return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
|
||||
if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
|
||||
return False
|
||||
parsed_url = urlparse(url)
|
||||
hostname = parsed_url.hostname
|
||||
|
||||
if not hostname:
|
||||
return False
|
||||
try:
|
||||
ip = socket.gethostbyname(hostname)
|
||||
if is_private_ip(ip):
|
||||
return False
|
||||
except socket.gaierror:
|
||||
return False
|
||||
return True
|
Loading…
x
Reference in New Issue
Block a user