fix(api): resolve external knowledge API error due to excessive URL validation (#19003)

The `validators.url` method from the `validators==0.21.0` library enforces a
URL length limit of less than 90 characters, which led to failures in external
knowledge API requests for long URLs.

This PR addresses the issue by replacing `validators.url` with 
`urllib.parse.urlparse`, effectively removing the restrictive URL length check.

Additionally, the unused `validators` dependency has been removed.

Fixes #18981.

Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
This commit is contained in:
kenwoodjw 2025-04-29 22:32:38 +08:00 committed by GitHub
parent a9f7bcb12f
commit 8bf3f5ea78
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 14 deletions

View File

@ -81,7 +81,6 @@ dependencies = [
"tokenizers~=0.15.0", "tokenizers~=0.15.0",
"transformers~=4.35.0", "transformers~=4.35.0",
"unstructured[docx,epub,md,ppt,pptx]~=0.16.1", "unstructured[docx,epub,md,ppt,pptx]~=0.16.1",
"validators==0.21.0",
"weave~=0.51.34", "weave~=0.51.34",
"yarl~=1.18.3", "yarl~=1.18.3",
"webvtt-py~=0.5.1", "webvtt-py~=0.5.1",
@ -196,6 +195,6 @@ vdb = [
"tidb-vector==0.0.9", "tidb-vector==0.0.9",
"upstash-vector==0.6.0", "upstash-vector==0.6.0",
"volcengine-compat~=1.0.156", "volcengine-compat~=1.0.156",
"weaviate-client~=3.21.0", "weaviate-client~=3.24.0",
"xinference-client~=1.2.2", "xinference-client~=1.2.2",
] ]

View File

@ -2,9 +2,9 @@ import json
from copy import deepcopy from copy import deepcopy
from datetime import UTC, datetime from datetime import UTC, datetime
from typing import Any, Optional, Union, cast from typing import Any, Optional, Union, cast
from urllib.parse import urlparse
import httpx import httpx
import validators
from constants import HIDDEN_VALUE from constants import HIDDEN_VALUE
from core.helper import ssrf_proxy from core.helper import ssrf_proxy
@ -72,7 +72,9 @@ class ExternalDatasetService:
endpoint = f"{settings['endpoint']}/retrieval" endpoint = f"{settings['endpoint']}/retrieval"
api_key = settings["api_key"] api_key = settings["api_key"]
if not validators.url(endpoint, simple_host=True):
parsed_url = urlparse(endpoint)
if not all([parsed_url.scheme, parsed_url.netloc]):
if not endpoint.startswith("http://") and not endpoint.startswith("https://"): if not endpoint.startswith("http://") and not endpoint.startswith("https://"):
raise ValueError(f"invalid endpoint: {endpoint} must start with http:// or https://") raise ValueError(f"invalid endpoint: {endpoint} must start with http:// or https://")
else: else:

17
api/uv.lock generated
View File

@ -1232,7 +1232,6 @@ dependencies = [
{ name = "tokenizers" }, { name = "tokenizers" },
{ name = "transformers" }, { name = "transformers" },
{ name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] }, { name = "unstructured", extra = ["docx", "epub", "md", "ppt", "pptx"] },
{ name = "validators" },
{ name = "weave" }, { name = "weave" },
{ name = "webvtt-py" }, { name = "webvtt-py" },
{ name = "yarl" }, { name = "yarl" },
@ -1403,7 +1402,6 @@ requires-dist = [
{ name = "tokenizers", specifier = "~=0.15.0" }, { name = "tokenizers", specifier = "~=0.15.0" },
{ name = "transformers", specifier = "~=4.35.0" }, { name = "transformers", specifier = "~=4.35.0" },
{ name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.16.1" },
{ name = "validators", specifier = "==0.21.0" },
{ name = "weave", specifier = "~=0.51.34" }, { name = "weave", specifier = "~=0.51.34" },
{ name = "webvtt-py", specifier = "~=0.5.1" }, { name = "webvtt-py", specifier = "~=0.5.1" },
{ name = "yarl", specifier = "~=1.18.3" }, { name = "yarl", specifier = "~=1.18.3" },
@ -1493,7 +1491,7 @@ vdb = [
{ name = "tidb-vector", specifier = "==0.0.9" }, { name = "tidb-vector", specifier = "==0.0.9" },
{ name = "upstash-vector", specifier = "==0.6.0" }, { name = "upstash-vector", specifier = "==0.6.0" },
{ name = "volcengine-compat", specifier = "~=1.0.156" }, { name = "volcengine-compat", specifier = "~=1.0.156" },
{ name = "weaviate-client", specifier = "~=3.21.0" }, { name = "weaviate-client", specifier = "~=3.24.0" },
{ name = "xinference-client", specifier = "~=1.2.2" }, { name = "xinference-client", specifier = "~=1.2.2" },
] ]
@ -6087,11 +6085,11 @@ wheels = [
[[package]] [[package]]
name = "validators" name = "validators"
version = "0.21.0" version = "0.34.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1f/c5/4095e7a5a6fecc2eca953ad058a3609135d833f986f84951f7e26790d651/validators-0.21.0.tar.gz", hash = "sha256:245b98ab778ed9352a7269c6a8f6c2a839bed5b2a7e3e60273ce399d247dd4b3", size = 20937 } sdist = { url = "https://files.pythonhosted.org/packages/64/07/91582d69320f6f6daaf2d8072608a4ad8884683d4840e7e4f3a9dbdcc639/validators-0.34.0.tar.gz", hash = "sha256:647fe407b45af9a74d245b943b18e6a816acf4926974278f6dd617778e1e781f", size = 70955 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/ad/50/18dbf2ac594234ee6249bfe3425fa424c18eeb96f29dcd47f199ed6c51bc/validators-0.21.0-py3-none-any.whl", hash = "sha256:3470db6f2384c49727ee319afa2e97aec3f8fad736faa6067e0fd7f9eaf2c551", size = 27686 }, { url = "https://files.pythonhosted.org/packages/6e/78/36828a4d857b25896f9774c875714ba4e9b3bc8a92d2debe3f4df3a83d4f/validators-0.34.0-py3-none-any.whl", hash = "sha256:c804b476e3e6d3786fa07a30073a4ef694e617805eb1946ceee3fe5a9b8b1321", size = 43536 },
] ]
[[package]] [[package]]
@ -6221,17 +6219,16 @@ wheels = [
[[package]] [[package]]
name = "weaviate-client" name = "weaviate-client"
version = "3.21.0" version = "3.24.2"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "authlib" }, { name = "authlib" },
{ name = "requests" }, { name = "requests" },
{ name = "tqdm" },
{ name = "validators" }, { name = "validators" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/b4/a5/c6777a8507249d7a63f4f5d9696eb5f45beac87db0eddfa4438d408cc3b4/weaviate-client-3.21.0.tar.gz", hash = "sha256:ec94ac554883c765e94da8b2947c4f0fa4a0378ed3bbe9f3653df3a5b1745a6d", size = 186970 } sdist = { url = "https://files.pythonhosted.org/packages/1f/c1/3285a21d8885f2b09aabb65edb9a8e062a35c2d7175e1bb024fa096582ab/weaviate-client-3.24.2.tar.gz", hash = "sha256:6914c48c9a7e5ad0be9399271f9cb85d6f59ab77476c6d4e56a3925bf149edaa", size = 199332 }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/df/5b/57b55ad36eb071b57e79f1ea7fba5bfe6a2fe49702607f56726569665d60/weaviate_client-3.21.0-py3-none-any.whl", hash = "sha256:420444ded7106fb000f4f8b2321b5f5fa2387825aa7a303d702accf61026f9d2", size = 99944 }, { url = "https://files.pythonhosted.org/packages/ab/98/3136d05f93e30cf29e1db280eaadf766df18d812dfe7994bcced653b2340/weaviate_client-3.24.2-py3-none-any.whl", hash = "sha256:bc50ca5fcebcd48de0d00f66700b0cf7c31a97c4cd3d29b4036d77c5d1d9479b", size = 107968 },
] ]
[[package]] [[package]]