mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-05-26 08:08:17 +08:00

Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: xhe <xw897002528@gmail.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: takatost <takatost@gmail.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Novice Lee <novicelee@NoviPro.local> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: AkaraChen <akarachen@outlook.com> Co-authored-by: Yi <yxiaoisme@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: AkaraChen <85140972+AkaraChen@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Hiroki Nagai <82458324+nagaihiroki-git@users.noreply.github.com> Co-authored-by: Gen Sato <52241300+halogen22@users.noreply.github.com> Co-authored-by: eux <euxuuu@gmail.com> Co-authored-by: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com> Co-authored-by: lotsik <lotsik@mail.ru> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: gakkiyomi <gakkiyomi@aliyun.com> Co-authored-by: CN-P5 <heibai2006@gmail.com> Co-authored-by: CN-P5 <heibai2006@qq.com> Co-authored-by: Chuehnone <1897025+chuehnone@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Boris Feld <lothiraldan@gmail.com> Co-authored-by: mbo <himabo@gmail.com> Co-authored-by: mabo <mabo@aeyes.ai> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: jiandanfeng <chenjh3@wangsu.com> Co-authored-by: zhu-an <70234959+xhdd123321@users.noreply.github.com> Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com> Co-authored-by: 海狸大師 <86974027+yenslife@users.noreply.github.com> Co-authored-by: Xu Song <xusong.vip@gmail.com> Co-authored-by: rayshaw001 <396301947@163.com> Co-authored-by: Ding Jiatong <dingjiatong@gmail.com> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: le0zh <newlight@qq.com> Co-authored-by: zhuxinliang <zhuxinliang@didiglobal.com> Co-authored-by: k-zaku <zaku99@outlook.jp> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: jiangbo721 <365065261@qq.com> Co-authored-by: 刘江波 <jiangbo721@163.com> Co-authored-by: Shun Miyazawa <34241526+miya@users.noreply.github.com> Co-authored-by: EricPan <30651140+Egfly@users.noreply.github.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: sino <sino2322@gmail.com> Co-authored-by: Jhvcc <37662342+Jhvcc@users.noreply.github.com> Co-authored-by: lowell <lowell.hu@zkteco.in> Co-authored-by: Boris Polonsky <BorisPolonsky@users.noreply.github.com> Co-authored-by: Ademílson Tonato <ademilsonft@outlook.com> Co-authored-by: Ademílson Tonato <ademilson.tonato@refurbed.com> Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com> Co-authored-by: Yueh-Po Peng (Yabi) <94939112+y10ab1@users.noreply.github.com> Co-authored-by: Jason <ggbbddjm@gmail.com> Co-authored-by: Xin Zhang <sjhpzx@gmail.com> Co-authored-by: yjc980121 <3898524+yjc980121@users.noreply.github.com> Co-authored-by: heyszt <36215648+hieheihei@users.noreply.github.com> Co-authored-by: Abdullah AlOsaimi <osaimiacc@gmail.com> Co-authored-by: Abdullah AlOsaimi <189027247+osaimi@users.noreply.github.com> Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: Hash Brown <hi@xzd.me> Co-authored-by: zuodongxu <192560071+zuodongxu@users.noreply.github.com> Co-authored-by: Masashi Tomooka <tmokmss@users.noreply.github.com> Co-authored-by: aplio <ryo.091219@gmail.com> Co-authored-by: Obada Khalili <54270856+obadakhalili@users.noreply.github.com> Co-authored-by: Nam Vu <zuzoovn@gmail.com> Co-authored-by: Kei YAMAZAKI <1715090+kei-yamazaki@users.noreply.github.com> Co-authored-by: TechnoHouse <13776377+deephbz@users.noreply.github.com> Co-authored-by: Riddhimaan-Senapati <114703025+Riddhimaan-Senapati@users.noreply.github.com> Co-authored-by: MaFee921 <31881301+2284730142@users.noreply.github.com> Co-authored-by: te-chan <t-nakanome@sakura-is.co.jp> Co-authored-by: HQidea <HQidea@users.noreply.github.com> Co-authored-by: Joshbly <36315710+Joshbly@users.noreply.github.com> Co-authored-by: xhe <xw897002528@gmail.com> Co-authored-by: weiwenyan-dev <154779315+weiwenyan-dev@users.noreply.github.com> Co-authored-by: ex_wenyan.wei <ex_wenyan.wei@tcl.com> Co-authored-by: engchina <12236799+engchina@users.noreply.github.com> Co-authored-by: engchina <atjapan2015@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: 呆萌闷油瓶 <253605712@qq.com> Co-authored-by: Kemal <kemalmeler@outlook.com> Co-authored-by: Lazy_Frog <4590648+lazyFrogLOL@users.noreply.github.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: Steven sun <98230804+Tuyohai@users.noreply.github.com> Co-authored-by: steven <sunzwj@digitalchina.com> Co-authored-by: Kalo Chin <91766386+fdb02983rhy@users.noreply.github.com> Co-authored-by: Katy Tao <34019945+KatyTao@users.noreply.github.com> Co-authored-by: depy <42985524+h4ckdepy@users.noreply.github.com> Co-authored-by: 胡春东 <gycm520@gmail.com> Co-authored-by: Junjie.M <118170653@qq.com> Co-authored-by: MuYu <mr.muzea@gmail.com> Co-authored-by: Naoki Takashima <39912547+takatea@users.noreply.github.com> Co-authored-by: Summer-Gu <37869445+gubinjie@users.noreply.github.com> Co-authored-by: Fei He <droxer.he@gmail.com> Co-authored-by: ybalbert001 <120714773+ybalbert001@users.noreply.github.com> Co-authored-by: Yuanbo Li <ybalbert@amazon.com> Co-authored-by: douxc <7553076+douxc@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Wu Jiayang <62842862+Wu-Jiayang@users.noreply.github.com> Co-authored-by: Your Name <you@example.com> Co-authored-by: kimjion <45935338+kimjion@users.noreply.github.com> Co-authored-by: AugNSo <song.tiankai@icloud.com> Co-authored-by: llinvokerl <38915183+llinvokerl@users.noreply.github.com> Co-authored-by: liusurong.lsr <liusurong.lsr@alibaba-inc.com> Co-authored-by: Vasu Negi <vasu-negi@users.noreply.github.com> Co-authored-by: Hundredwz <1808096180@qq.com> Co-authored-by: Xiyuan Chen <52963600+GareArc@users.noreply.github.com>
309 lines
9.3 KiB
Python
309 lines
9.3 KiB
Python
import mimetypes
|
|
import uuid
|
|
from collections.abc import Callable, Mapping, Sequence
|
|
from typing import Any, cast
|
|
|
|
import httpx
|
|
from sqlalchemy import select
|
|
|
|
from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS
|
|
from core.file import File, FileBelongsTo, FileTransferMethod, FileType, FileUploadConfig
|
|
from core.helper import ssrf_proxy
|
|
from extensions.ext_database import db
|
|
from models import MessageFile, ToolFile, UploadFile
|
|
|
|
|
|
def build_from_message_files(
|
|
*,
|
|
message_files: Sequence["MessageFile"],
|
|
tenant_id: str,
|
|
config: FileUploadConfig,
|
|
) -> Sequence[File]:
|
|
results = [
|
|
build_from_message_file(message_file=file, tenant_id=tenant_id, config=config)
|
|
for file in message_files
|
|
if file.belongs_to != FileBelongsTo.ASSISTANT
|
|
]
|
|
return results
|
|
|
|
|
|
def build_from_message_file(
|
|
*,
|
|
message_file: "MessageFile",
|
|
tenant_id: str,
|
|
config: FileUploadConfig,
|
|
):
|
|
mapping = {
|
|
"transfer_method": message_file.transfer_method,
|
|
"url": message_file.url,
|
|
"id": message_file.id,
|
|
"type": message_file.type,
|
|
"upload_file_id": message_file.upload_file_id,
|
|
}
|
|
return build_from_mapping(
|
|
mapping=mapping,
|
|
tenant_id=tenant_id,
|
|
config=config,
|
|
)
|
|
|
|
|
|
def build_from_mapping(
|
|
*,
|
|
mapping: Mapping[str, Any],
|
|
tenant_id: str,
|
|
config: FileUploadConfig | None = None,
|
|
) -> File:
|
|
transfer_method = FileTransferMethod.value_of(mapping.get("transfer_method"))
|
|
|
|
build_functions: dict[FileTransferMethod, Callable] = {
|
|
FileTransferMethod.LOCAL_FILE: _build_from_local_file,
|
|
FileTransferMethod.REMOTE_URL: _build_from_remote_url,
|
|
FileTransferMethod.TOOL_FILE: _build_from_tool_file,
|
|
}
|
|
|
|
build_func = build_functions.get(transfer_method)
|
|
if not build_func:
|
|
raise ValueError(f"Invalid file transfer method: {transfer_method}")
|
|
|
|
file: File = build_func(
|
|
mapping=mapping,
|
|
tenant_id=tenant_id,
|
|
transfer_method=transfer_method,
|
|
)
|
|
|
|
if config and not _is_file_valid_with_config(
|
|
input_file_type=mapping.get("type", FileType.CUSTOM),
|
|
file_extension=file.extension or "",
|
|
file_transfer_method=file.transfer_method,
|
|
config=config,
|
|
):
|
|
raise ValueError(f"File validation failed for file: {file.filename}")
|
|
|
|
return file
|
|
|
|
|
|
def build_from_mappings(
|
|
*,
|
|
mappings: Sequence[Mapping[str, Any]],
|
|
config: FileUploadConfig | None = None,
|
|
tenant_id: str,
|
|
) -> Sequence[File]:
|
|
files = [
|
|
build_from_mapping(
|
|
mapping=mapping,
|
|
tenant_id=tenant_id,
|
|
config=config,
|
|
)
|
|
for mapping in mappings
|
|
]
|
|
|
|
if (
|
|
config
|
|
# If image config is set.
|
|
and config.image_config
|
|
# And the number of image files exceeds the maximum limit
|
|
and sum(1 for _ in (filter(lambda x: x.type == FileType.IMAGE, files))) > config.image_config.number_limits
|
|
):
|
|
raise ValueError(f"Number of image files exceeds the maximum limit {config.image_config.number_limits}")
|
|
if config and config.number_limits and len(files) > config.number_limits:
|
|
raise ValueError(f"Number of files exceeds the maximum limit {config.number_limits}")
|
|
|
|
return files
|
|
|
|
|
|
def _build_from_local_file(
|
|
*,
|
|
mapping: Mapping[str, Any],
|
|
tenant_id: str,
|
|
transfer_method: FileTransferMethod,
|
|
) -> File:
|
|
upload_file_id = mapping.get("upload_file_id")
|
|
if not upload_file_id:
|
|
raise ValueError("Invalid upload file id")
|
|
# check if upload_file_id is a valid uuid
|
|
try:
|
|
uuid.UUID(upload_file_id)
|
|
except ValueError:
|
|
raise ValueError("Invalid upload file id format")
|
|
stmt = select(UploadFile).where(
|
|
UploadFile.id == upload_file_id,
|
|
UploadFile.tenant_id == tenant_id,
|
|
)
|
|
|
|
row = db.session.scalar(stmt)
|
|
if row is None:
|
|
raise ValueError("Invalid upload file")
|
|
|
|
file_type = FileType(mapping.get("type", "custom"))
|
|
file_type = _standardize_file_type(file_type, extension="." + row.extension, mime_type=row.mime_type)
|
|
|
|
return File(
|
|
id=mapping.get("id"),
|
|
filename=row.name,
|
|
extension="." + row.extension,
|
|
mime_type=row.mime_type,
|
|
tenant_id=tenant_id,
|
|
type=file_type,
|
|
transfer_method=transfer_method,
|
|
remote_url=row.source_url,
|
|
related_id=mapping.get("upload_file_id"),
|
|
size=row.size,
|
|
storage_key=row.key,
|
|
)
|
|
|
|
|
|
def _build_from_remote_url(
|
|
*,
|
|
mapping: Mapping[str, Any],
|
|
tenant_id: str,
|
|
transfer_method: FileTransferMethod,
|
|
) -> File:
|
|
url = mapping.get("url") or mapping.get("remote_url")
|
|
if not url:
|
|
raise ValueError("Invalid file url")
|
|
|
|
mime_type, filename, file_size = _get_remote_file_info(url)
|
|
extension = mimetypes.guess_extension(mime_type) or "." + filename.split(".")[-1] if "." in filename else ".bin"
|
|
|
|
file_type = FileType(mapping.get("type", "custom"))
|
|
file_type = _standardize_file_type(file_type, extension=extension, mime_type=mime_type)
|
|
|
|
return File(
|
|
id=mapping.get("id"),
|
|
filename=filename,
|
|
tenant_id=tenant_id,
|
|
type=file_type,
|
|
transfer_method=transfer_method,
|
|
remote_url=url,
|
|
mime_type=mime_type,
|
|
extension=extension,
|
|
size=file_size,
|
|
storage_key="",
|
|
)
|
|
|
|
|
|
def _get_remote_file_info(url: str):
|
|
file_size = -1
|
|
filename = url.split("/")[-1].split("?")[0] or "unknown_file"
|
|
mime_type = mimetypes.guess_type(filename)[0] or ""
|
|
|
|
resp = ssrf_proxy.head(url, follow_redirects=True)
|
|
resp = cast(httpx.Response, resp)
|
|
if resp.status_code == httpx.codes.OK:
|
|
if content_disposition := resp.headers.get("Content-Disposition"):
|
|
filename = str(content_disposition.split("filename=")[-1].strip('"'))
|
|
file_size = int(resp.headers.get("Content-Length", file_size))
|
|
mime_type = mime_type or str(resp.headers.get("Content-Type", ""))
|
|
|
|
return mime_type, filename, file_size
|
|
|
|
|
|
def _build_from_tool_file(
|
|
*,
|
|
mapping: Mapping[str, Any],
|
|
tenant_id: str,
|
|
transfer_method: FileTransferMethod,
|
|
) -> File:
|
|
tool_file = (
|
|
db.session.query(ToolFile)
|
|
.filter(
|
|
ToolFile.id == mapping.get("tool_file_id"),
|
|
ToolFile.tenant_id == tenant_id,
|
|
)
|
|
.first()
|
|
)
|
|
|
|
if tool_file is None:
|
|
raise ValueError(f"ToolFile {mapping.get('tool_file_id')} not found")
|
|
|
|
extension = "." + tool_file.file_key.split(".")[-1] if "." in tool_file.file_key else ".bin"
|
|
file_type = FileType(mapping.get("type", "custom"))
|
|
file_type = _standardize_file_type(file_type, extension=extension, mime_type=tool_file.mimetype)
|
|
|
|
return File(
|
|
id=mapping.get("id"),
|
|
tenant_id=tenant_id,
|
|
filename=tool_file.name,
|
|
type=file_type,
|
|
transfer_method=transfer_method,
|
|
remote_url=tool_file.original_url,
|
|
related_id=tool_file.id,
|
|
extension=extension,
|
|
mime_type=tool_file.mimetype,
|
|
size=tool_file.size,
|
|
storage_key=tool_file.file_key,
|
|
)
|
|
|
|
|
|
def _is_file_valid_with_config(
|
|
*,
|
|
input_file_type: str,
|
|
file_extension: str,
|
|
file_transfer_method: FileTransferMethod,
|
|
config: FileUploadConfig,
|
|
) -> bool:
|
|
if (
|
|
config.allowed_file_types
|
|
and input_file_type not in config.allowed_file_types
|
|
and input_file_type != FileType.CUSTOM
|
|
):
|
|
return False
|
|
|
|
if (
|
|
input_file_type == FileType.CUSTOM
|
|
and config.allowed_file_extensions is not None
|
|
and file_extension not in config.allowed_file_extensions
|
|
):
|
|
return False
|
|
|
|
if input_file_type == FileType.IMAGE and config.image_config:
|
|
if config.image_config.transfer_methods and file_transfer_method not in config.image_config.transfer_methods:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def _standardize_file_type(file_type: FileType, /, *, extension: str = "", mime_type: str = "") -> FileType:
|
|
"""
|
|
If custom type, try to guess the file type by extension and mime_type.
|
|
"""
|
|
if file_type != FileType.CUSTOM:
|
|
return FileType(file_type)
|
|
guessed_type = None
|
|
if extension:
|
|
guessed_type = _get_file_type_by_extension(extension)
|
|
if guessed_type is None and mime_type:
|
|
guessed_type = _get_file_type_by_mimetype(mime_type)
|
|
return guessed_type or FileType.CUSTOM
|
|
|
|
|
|
def _get_file_type_by_extension(extension: str) -> FileType | None:
|
|
extension = extension.lstrip(".")
|
|
if extension in IMAGE_EXTENSIONS:
|
|
return FileType.IMAGE
|
|
elif extension in VIDEO_EXTENSIONS:
|
|
return FileType.VIDEO
|
|
elif extension in AUDIO_EXTENSIONS:
|
|
return FileType.AUDIO
|
|
elif extension in DOCUMENT_EXTENSIONS:
|
|
return FileType.DOCUMENT
|
|
return None
|
|
|
|
|
|
def _get_file_type_by_mimetype(mime_type: str) -> FileType | None:
|
|
if "image" in mime_type:
|
|
file_type = FileType.IMAGE
|
|
elif "video" in mime_type:
|
|
file_type = FileType.VIDEO
|
|
elif "audio" in mime_type:
|
|
file_type = FileType.AUDIO
|
|
elif "text" in mime_type or "pdf" in mime_type:
|
|
file_type = FileType.DOCUMENT
|
|
else:
|
|
file_type = FileType.CUSTOM
|
|
return file_type
|
|
|
|
|
|
def get_file_type_by_mime_type(mime_type: str) -> FileType:
|
|
return _get_file_type_by_mimetype(mime_type) or FileType.CUSTOM
|