Fix HTTP Request node to give priority to file extension of content-disposition (#12653)

This commit is contained in:
Gen Sato 2025-03-18 12:00:20 +09:00 committed by GitHub
parent 963b6f628a
commit 475b8d731e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 46 additions and 27 deletions

View File

@ -109,14 +109,12 @@ class Response:
3. MIME type analysis
"""
content_type = self.content_type.split(";")[0].strip().lower()
content_disposition = self.response.headers.get("content-disposition", "")
parsed_content_disposition = self.parsed_content_disposition
# Check if it's explicitly marked as an attachment
if content_disposition:
msg = Message()
msg["content-disposition"] = content_disposition
disp_type = msg.get_content_disposition() # Returns 'attachment', 'inline', or None
filename = msg.get_filename() # Returns filename if present, None otherwise
if parsed_content_disposition:
disp_type = parsed_content_disposition.get_content_disposition() # Returns 'attachment', 'inline', or None
filename = parsed_content_disposition.get_filename() # Returns filename if present, None otherwise
if disp_type == "attachment" or filename is not None:
return True
@ -182,3 +180,12 @@ class Response:
return f"{(self.size / 1024):.2f} KB"
else:
return f"{(self.size / 1024 / 1024):.2f} MB"
@property
def parsed_content_disposition(self) -> Optional[Message]:
content_disposition = self.headers.get("content-disposition", "")
if content_disposition:
msg = Message()
msg["content-disposition"] = content_disposition
return msg
return None

View File

@ -169,32 +169,44 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
"""
Extract files from response by checking both Content-Type header and URL
"""
files = []
files: list[File] = []
is_file = response.is_file
content_type = response.content_type
content = response.content
parsed_content_disposition = response.parsed_content_disposition
content_disposition_type = None
if is_file:
# Guess file extension from URL or Content-Type header
filename = url.split("?")[0].split("/")[-1] or ""
mime_type = content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
if not is_file:
return files
tool_file = ToolFileManager.create_file_by_raw(
user_id=self.user_id,
tenant_id=self.tenant_id,
conversation_id=None,
file_binary=content,
mimetype=mime_type,
)
if parsed_content_disposition:
content_disposition_filename = parsed_content_disposition.get_filename()
if content_disposition_filename:
# If filename is available from content-disposition, use it to guess the content type
content_disposition_type = mimetypes.guess_type(content_disposition_filename)[0]
mapping = {
"tool_file_id": tool_file.id,
"transfer_method": FileTransferMethod.TOOL_FILE.value,
}
file = file_factory.build_from_mapping(
mapping=mapping,
tenant_id=self.tenant_id,
)
files.append(file)
# Guess file extension from URL or Content-Type header
filename = url.split("?")[0].split("/")[-1] or ""
mime_type = (
content_disposition_type or content_type or mimetypes.guess_type(filename)[0] or "application/octet-stream"
)
tool_file = ToolFileManager.create_file_by_raw(
user_id=self.user_id,
tenant_id=self.tenant_id,
conversation_id=None,
file_binary=content,
mimetype=mime_type,
)
mapping = {
"tool_file_id": tool_file.id,
"transfer_method": FileTransferMethod.TOOL_FILE.value,
}
file = file_factory.build_from_mapping(
mapping=mapping,
tenant_id=self.tenant_id,
)
files.append(file)
return files