mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 18:29:04 +08:00
refactor(file_factory): improve filename and mime type determination (#9784)
This commit is contained in:
parent
fc63841169
commit
e54b7cda3d
@ -179,27 +179,19 @@ def _build_from_remote_url(
|
|||||||
if not url:
|
if not url:
|
||||||
raise ValueError("Invalid file url")
|
raise ValueError("Invalid file url")
|
||||||
|
|
||||||
|
mime_type = mimetypes.guess_type(url)[0] or ""
|
||||||
|
file_size = -1
|
||||||
|
filename = url.split("/")[-1].split("?")[0] or "unknown_file"
|
||||||
|
|
||||||
resp = ssrf_proxy.head(url, follow_redirects=True)
|
resp = ssrf_proxy.head(url, follow_redirects=True)
|
||||||
if resp.status_code == httpx.codes.OK:
|
if resp.status_code == httpx.codes.OK:
|
||||||
# Try to extract filename from response headers or URL
|
if content_disposition := resp.headers.get("Content-Disposition"):
|
||||||
content_disposition = resp.headers.get("Content-Disposition")
|
|
||||||
if content_disposition:
|
|
||||||
filename = content_disposition.split("filename=")[-1].strip('"')
|
filename = content_disposition.split("filename=")[-1].strip('"')
|
||||||
else:
|
file_size = int(resp.headers.get("Content-Length", file_size))
|
||||||
filename = url.split("/")[-1].split("?")[0]
|
mime_type = mime_type or str(resp.headers.get("Content-Type", ""))
|
||||||
# Create the File object
|
|
||||||
file_size = int(resp.headers.get("Content-Length", -1))
|
|
||||||
mime_type = str(resp.headers.get("Content-Type", ""))
|
|
||||||
else:
|
|
||||||
filename = ""
|
|
||||||
file_size = -1
|
|
||||||
mime_type = ""
|
|
||||||
|
|
||||||
# If filename is empty, set a default one
|
|
||||||
if not filename:
|
|
||||||
filename = "unknown_file"
|
|
||||||
# Determine file extension
|
# Determine file extension
|
||||||
extension = "." + filename.split(".")[-1] if "." in filename else ".bin"
|
extension = mimetypes.guess_extension(mime_type) or "." + filename.split(".")[-1] if "." in filename else ".bin"
|
||||||
|
|
||||||
if not mime_type:
|
if not mime_type:
|
||||||
mime_type, _ = mimetypes.guess_type(url)
|
mime_type, _ = mimetypes.guess_type(url)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user