mirror of
https://git.mirrors.martin98.com/https://github.com/infiniflow/ragflow.git
synced 2025-08-14 05:55:59 +08:00
Fix potential SSRF attack vulnerability (#4334)
### What problem does this PR solve? Fix potential SSRF attack vulnerability ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
parent
5083d92998
commit
8674156d1c
@ -41,7 +41,7 @@ class Crawler(ComponentBase, ABC):
|
|||||||
ans = self.get_input()
|
ans = self.get_input()
|
||||||
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
||||||
if not is_valid_url(ans):
|
if not is_valid_url(ans):
|
||||||
return Crawler.be_output("")
|
return Crawler.be_output("URL not valid")
|
||||||
try:
|
try:
|
||||||
result = asyncio.run(self.get_web(ans))
|
result = asyncio.run(self.get_web(ans))
|
||||||
|
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import ipaddress
|
||||||
import json
|
import json
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
@ -76,5 +79,25 @@ def __get_pdf_from_html(
|
|||||||
return base64.b64decode(result["data"])
|
return base64.b64decode(result["data"])
|
||||||
|
|
||||||
|
|
||||||
|
def is_private_ip(ip: str) -> bool:
|
||||||
|
try:
|
||||||
|
ip_obj = ipaddress.ip_address(ip)
|
||||||
|
return ip_obj.is_private
|
||||||
|
except ValueError:
|
||||||
|
return False
|
||||||
|
|
||||||
def is_valid_url(url: str) -> bool:
|
def is_valid_url(url: str) -> bool:
|
||||||
return bool(re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url))
|
if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
|
||||||
|
return False
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
hostname = parsed_url.hostname
|
||||||
|
|
||||||
|
if not hostname:
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
ip = socket.gethostbyname(hostname)
|
||||||
|
if is_private_ip(ip):
|
||||||
|
return False
|
||||||
|
except socket.gaierror:
|
||||||
|
return False
|
||||||
|
return True
|
Loading…
x
Reference in New Issue
Block a user