mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-12 09:29:02 +08:00
fix(http_request): allow content type application/x-javascript
(#10862)
This commit is contained in:
parent
f3af7b5f35
commit
25fda7adc5
@ -1,4 +1,6 @@
|
|||||||
|
import mimetypes
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
from email.message import Message
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -7,14 +9,6 @@ from pydantic import BaseModel, Field, ValidationInfo, field_validator
|
|||||||
from configs import dify_config
|
from configs import dify_config
|
||||||
from core.workflow.nodes.base import BaseNodeData
|
from core.workflow.nodes.base import BaseNodeData
|
||||||
|
|
||||||
NON_FILE_CONTENT_TYPES = (
|
|
||||||
"application/json",
|
|
||||||
"application/xml",
|
|
||||||
"text/html",
|
|
||||||
"text/plain",
|
|
||||||
"application/x-www-form-urlencoded",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class HttpRequestNodeAuthorizationConfig(BaseModel):
|
class HttpRequestNodeAuthorizationConfig(BaseModel):
|
||||||
type: Literal["basic", "bearer", "custom"]
|
type: Literal["basic", "bearer", "custom"]
|
||||||
@ -93,13 +87,53 @@ class Response:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def is_file(self):
|
def is_file(self):
|
||||||
content_type = self.content_type
|
"""
|
||||||
|
Determine if the response contains a file by checking:
|
||||||
|
1. Content-Disposition header (RFC 6266)
|
||||||
|
2. Content characteristics
|
||||||
|
3. MIME type analysis
|
||||||
|
"""
|
||||||
|
content_type = self.content_type.split(";")[0].strip().lower()
|
||||||
content_disposition = self.response.headers.get("content-disposition", "")
|
content_disposition = self.response.headers.get("content-disposition", "")
|
||||||
|
|
||||||
return "attachment" in content_disposition or (
|
# Check if it's explicitly marked as an attachment
|
||||||
not any(non_file in content_type for non_file in NON_FILE_CONTENT_TYPES)
|
if content_disposition:
|
||||||
and any(file_type in content_type for file_type in ("application/", "image/", "audio/", "video/"))
|
msg = Message()
|
||||||
)
|
msg["content-disposition"] = content_disposition
|
||||||
|
disp_type = msg.get_content_disposition() # Returns 'attachment', 'inline', or None
|
||||||
|
filename = msg.get_filename() # Returns filename if present, None otherwise
|
||||||
|
if disp_type == "attachment" or filename is not None:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# For application types, try to detect if it's a text-based format
|
||||||
|
if content_type.startswith("application/"):
|
||||||
|
# Common text-based application types
|
||||||
|
if any(
|
||||||
|
text_type in content_type
|
||||||
|
for text_type in ("json", "xml", "javascript", "x-www-form-urlencoded", "yaml", "graphql")
|
||||||
|
):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Try to detect if content is text-based by sampling first few bytes
|
||||||
|
try:
|
||||||
|
# Sample first 1024 bytes for text detection
|
||||||
|
content_sample = self.response.content[:1024]
|
||||||
|
content_sample.decode("utf-8")
|
||||||
|
# If we can decode as UTF-8 and find common text patterns, likely not a file
|
||||||
|
text_markers = (b"{", b"[", b"<", b"function", b"var ", b"const ", b"let ")
|
||||||
|
if any(marker in content_sample for marker in text_markers):
|
||||||
|
return False
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# If we can't decode as UTF-8, likely a binary file
|
||||||
|
return True
|
||||||
|
|
||||||
|
# For other types, use MIME type analysis
|
||||||
|
main_type, _ = mimetypes.guess_type("dummy" + (mimetypes.guess_extension(content_type) or ""))
|
||||||
|
if main_type:
|
||||||
|
return main_type.split("/")[0] in ("application", "image", "audio", "video")
|
||||||
|
|
||||||
|
# For unknown types, check if it's a media type
|
||||||
|
return any(media_type in content_type for media_type in ("image/", "audio/", "video/"))
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def content_type(self) -> str:
|
def content_type(self) -> str:
|
||||||
|
@ -0,0 +1,140 @@
|
|||||||
|
from unittest.mock import Mock, PropertyMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from core.workflow.nodes.http_request.entities import Response
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_response():
|
||||||
|
response = Mock(spec=httpx.Response)
|
||||||
|
response.headers = {}
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_file_with_attachment_disposition(mock_response):
|
||||||
|
"""Test is_file when content-disposition header contains 'attachment'"""
|
||||||
|
mock_response.headers = {"content-disposition": "attachment; filename=test.pdf", "content-type": "application/pdf"}
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_file_with_filename_disposition(mock_response):
|
||||||
|
"""Test is_file when content-disposition header contains filename parameter"""
|
||||||
|
mock_response.headers = {"content-disposition": "inline; filename=test.pdf", "content-type": "application/pdf"}
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("content_type", ["application/pdf", "image/jpeg", "audio/mp3", "video/mp4"])
|
||||||
|
def test_is_file_with_file_content_types(mock_response, content_type):
|
||||||
|
"""Test is_file with various file content types"""
|
||||||
|
mock_response.headers = {"content-type": content_type}
|
||||||
|
# Mock binary content
|
||||||
|
type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file, f"Content type {content_type} should be identified as a file"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"content_type",
|
||||||
|
[
|
||||||
|
"application/json",
|
||||||
|
"application/xml",
|
||||||
|
"application/javascript",
|
||||||
|
"application/x-www-form-urlencoded",
|
||||||
|
"application/yaml",
|
||||||
|
"application/graphql",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_text_based_application_types(mock_response, content_type):
|
||||||
|
"""Test common text-based application types are not identified as files"""
|
||||||
|
mock_response.headers = {"content-type": content_type}
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert not response.is_file, f"Content type {content_type} should not be identified as a file"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("content", "content_type"),
|
||||||
|
[
|
||||||
|
(b'{"key": "value"}', "application/octet-stream"),
|
||||||
|
(b"[1, 2, 3]", "application/unknown"),
|
||||||
|
(b"function test() {}", "application/x-unknown"),
|
||||||
|
(b"<root>test</root>", "application/binary"),
|
||||||
|
(b"var x = 1;", "application/data"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_content_based_detection(mock_response, content, content_type):
|
||||||
|
"""Test content-based detection for text-like content"""
|
||||||
|
mock_response.headers = {"content-type": content_type}
|
||||||
|
type(mock_response).content = PropertyMock(return_value=content)
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert not response.is_file, f"Content {content} with type {content_type} should not be identified as a file"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("content", "content_type"),
|
||||||
|
[
|
||||||
|
(bytes([0x00, 0xFF] * 512), "application/octet-stream"),
|
||||||
|
(bytes([0x89, 0x50, 0x4E, 0x47]), "application/unknown"), # PNG magic numbers
|
||||||
|
(bytes([0xFF, 0xD8, 0xFF]), "application/binary"), # JPEG magic numbers
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_binary_content_detection(mock_response, content, content_type):
|
||||||
|
"""Test content-based detection for binary content"""
|
||||||
|
mock_response.headers = {"content-type": content_type}
|
||||||
|
type(mock_response).content = PropertyMock(return_value=content)
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file, f"Binary content with type {content_type} should be identified as a file"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
("content_type", "expected_main_type"),
|
||||||
|
[
|
||||||
|
("x-world/x-vrml", "model"), # VRML 3D model
|
||||||
|
("font/ttf", "application"), # TrueType font
|
||||||
|
("text/csv", "text"), # CSV text file
|
||||||
|
("unknown/xyz", None), # Unknown type
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_mimetype_based_detection(mock_response, content_type, expected_main_type):
|
||||||
|
"""Test detection using mimetypes.guess_type for non-application content types"""
|
||||||
|
mock_response.headers = {"content-type": content_type}
|
||||||
|
type(mock_response).content = PropertyMock(return_value=bytes([0x00])) # Dummy content
|
||||||
|
|
||||||
|
with patch("core.workflow.nodes.http_request.entities.mimetypes.guess_type") as mock_guess_type:
|
||||||
|
# Mock the return value based on expected_main_type
|
||||||
|
if expected_main_type:
|
||||||
|
mock_guess_type.return_value = (f"{expected_main_type}/subtype", None)
|
||||||
|
else:
|
||||||
|
mock_guess_type.return_value = (None, None)
|
||||||
|
|
||||||
|
response = Response(mock_response)
|
||||||
|
|
||||||
|
# Check if the result matches our expectation
|
||||||
|
if expected_main_type in ("application", "image", "audio", "video"):
|
||||||
|
assert response.is_file, f"Content type {content_type} should be identified as a file"
|
||||||
|
else:
|
||||||
|
assert not response.is_file, f"Content type {content_type} should not be identified as a file"
|
||||||
|
|
||||||
|
# Verify that guess_type was called
|
||||||
|
mock_guess_type.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_file_with_inline_disposition(mock_response):
|
||||||
|
"""Test is_file when content-disposition is 'inline'"""
|
||||||
|
mock_response.headers = {"content-disposition": "inline", "content-type": "application/pdf"}
|
||||||
|
# Mock binary content
|
||||||
|
type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file
|
||||||
|
|
||||||
|
|
||||||
|
def test_is_file_with_no_content_disposition(mock_response):
|
||||||
|
"""Test is_file when no content-disposition header is present"""
|
||||||
|
mock_response.headers = {"content-type": "application/pdf"}
|
||||||
|
# Mock binary content
|
||||||
|
type(mock_response).content = PropertyMock(return_value=bytes([0x00, 0xFF] * 512))
|
||||||
|
response = Response(mock_response)
|
||||||
|
assert response.is_file
|
Loading…
x
Reference in New Issue
Block a user