Fix : Workflow "start" paste url not support s3 pre-signed URL (#6855)

Co-authored-by: Yuanbo Li <ybalbert@amazon.com>
This commit is contained in:
ybalbert001 2024-08-11 16:45:15 +08:00 committed by GitHub
parent ac60182c91
commit f2cb1fb09f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 1109 additions and 1043 deletions

View File

@ -1,5 +1,7 @@
import re
from collections.abc import Mapping, Sequence
from typing import Any, Union
from urllib.parse import parse_qs, urlparse
import requests
@ -186,6 +188,30 @@ class MessageFileParser:
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
def is_s3_presigned_url(url):
try:
parsed_url = urlparse(url)
if 'amazonaws.com' not in parsed_url.netloc:
return False
query_params = parse_qs(parsed_url.query)
required_params = ['Signature', 'Expires']
for param in required_params:
if param not in query_params:
return False
if not query_params['Expires'][0].isdigit():
return False
signature = query_params['Signature'][0]
if not re.match(r'^[A-Za-z0-9+/]+={0,2}$', signature):
return False
return True
except Exception:
return False
if is_s3_presigned_url(url):
response = requests.get(url, headers=headers, allow_redirects=True)
if response.status_code in {200, 304}:
return True, ""
response = requests.head(url, headers=headers, allow_redirects=True)
if response.status_code in {200, 304}:
return True, ""

View File

@ -379,8 +379,12 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
if not message_content.data.startswith("data:"):
# fetch image data from url
try:
image_content = requests.get(message_content.data).content
mime_type, _ = mimetypes.guess_type(message_content.data)
url = message_content.data
image_content = requests.get(url).content
if '?' in url:
url = url.split('?')[0]
mime_type, _ = mimetypes.guess_type(url)
base64_data = base64.b64encode(image_content).decode('utf-8')
except Exception as ex:
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
else:

2116
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -110,7 +110,7 @@ authlib = "1.3.1"
azure-identity = "1.16.1"
azure-storage-blob = "12.13.0"
beautifulsoup4 = "4.12.2"
boto3 = "1.34.136"
boto3 = "1.34.148"
bs4 = "~0.0.1"
cachetools = "~5.3.0"
celery = "~5.3.6"