fix(file-retrieval): improve error handling for HTTP HEAD requests

- Fallback to GET request if HEAD response status is not 200. - Apply changes to RemoteFileInfoApi and file factory functions. - Enhance reliability by ensuring accurate content retrieval.
2025-08-20 12:59:06 +08:00 · 2024-11-08 19:19:50 +08:00 · 2024-11-08 19:19:50 +08:00 · 17ba978d1a
commit 17ba978d1a
parent fa6d2874b3
3 changed files with 16 additions and 16 deletions
--- a/api/controllers/console/datasets/file.py
+++ b/api/controllers/console/datasets/file.py
@ -89,14 +89,13 @@ class RemoteFileInfoApi(Resource):
    @marshal_with(remote_file_info_fields)
    def get(self, url):
        decoded_url = urllib.parse.unquote(url)
-        try:
-            response = ssrf_proxy.head(decoded_url)
-            return {
-                "file_type": response.headers.get("Content-Type", "application/octet-stream"),
-                "file_length": int(response.headers.get("Content-Length", 0)),
-            }
-        except Exception as e:
-            return {"error": str(e)}, 400
+        resp = ssrf_proxy.head(decoded_url)
+        if resp.status_code != 200:
+            resp = ssrf_proxy.get(decoded_url, timeout=3)
+        return {
+            "file_type": resp.headers.get("Content-Type", "application/octet-stream"),
+            "file_length": int(resp.headers.get("Content-Length", 0)),
+        }


 api.add_resource(FileApi, "/files/upload")
--- a/api/controllers/web/file.py
+++ b/api/controllers/web/file.py
@ -42,14 +42,13 @@ class RemoteFileInfoApi(WebApiResource):
    @marshal_with(remote_file_info_fields)
    def get(self, url):
        decoded_url = urllib.parse.unquote(url)
-        try:
-            response = ssrf_proxy.head(decoded_url)
-            return {
-                "file_type": response.headers.get("Content-Type", "application/octet-stream"),
-                "file_length": int(response.headers.get("Content-Length", -1)),
-            }
-        except Exception as e:
-            return {"error": str(e)}, 400
+        resp = ssrf_proxy.head(decoded_url)
+        if resp.status_code != 200:
+            resp = ssrf_proxy.get(decoded_url, timeout=3)
+        return {
+            "file_type": resp.headers.get("Content-Type", "application/octet-stream"),
+            "file_length": int(resp.headers.get("Content-Length", -1)),
+        }


 api.add_resource(FileApi, "/files/upload")
--- a/api/factories/file_factory.py
+++ b/api/factories/file_factory.py
@ -184,6 +184,8 @@ def _build_from_remote_url(
    filename = url.split("/")[-1].split("?")[0] or "unknown_file"

    resp = ssrf_proxy.head(url, follow_redirects=True)
+    if resp.status_code != httpx.codes.OK:
+        resp = ssrf_proxy.get(url, follow_redirects=True, timeout=3)
    if resp.status_code == httpx.codes.OK:
        if content_disposition := resp.headers.get("Content-Disposition"):
            filename = content_disposition.split("filename=")[-1].strip('"')