mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-04-23 14:19:42 +08:00
59 lines
1.7 KiB
Python
59 lines
1.7 KiB
Python
import mimetypes
|
|
import os
|
|
import re
|
|
import urllib.parse
|
|
from uuid import uuid4
|
|
|
|
import httpx
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class FileInfo(BaseModel):
|
|
filename: str
|
|
extension: str
|
|
mimetype: str
|
|
size: int
|
|
|
|
|
|
def guess_file_info_from_response(response: httpx.Response):
|
|
url = str(response.url)
|
|
# Try to extract filename from URL
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
url_path = parsed_url.path
|
|
filename = os.path.basename(url_path)
|
|
|
|
# If filename couldn't be extracted, use Content-Disposition header
|
|
if not filename:
|
|
content_disposition = response.headers.get("Content-Disposition")
|
|
if content_disposition:
|
|
filename_match = re.search(r'filename="?(.+)"?', content_disposition)
|
|
if filename_match:
|
|
filename = filename_match.group(1)
|
|
|
|
# If still no filename, generate a unique one
|
|
if not filename:
|
|
unique_name = str(uuid4())
|
|
filename = f"{unique_name}"
|
|
|
|
# Guess MIME type from filename first, then URL
|
|
mimetype, _ = mimetypes.guess_type(filename)
|
|
if mimetype is None:
|
|
mimetype, _ = mimetypes.guess_type(url)
|
|
if mimetype is None:
|
|
# If guessing fails, use Content-Type from response headers
|
|
mimetype = response.headers.get("Content-Type", "application/octet-stream")
|
|
|
|
extension = os.path.splitext(filename)[1]
|
|
|
|
# Ensure filename has an extension
|
|
if not extension:
|
|
extension = mimetypes.guess_extension(mimetype) or ".bin"
|
|
filename = f"{filename}{extension}"
|
|
|
|
return FileInfo(
|
|
filename=filename,
|
|
extension=extension,
|
|
mimetype=mimetype,
|
|
size=int(response.headers.get("Content-Length", -1)),
|
|
)
|