mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-14 03:25:57 +08:00
refactor: update load_stream method to directly yield file chunks (#9806)
This commit is contained in:
parent
dd17506078
commit
5bf31e7a86
@ -36,13 +36,10 @@ class AliyunOssStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
obj = self.client.get_object(self.__wrapper_folder_filename(filename))
|
obj = self.client.get_object(self.__wrapper_folder_filename(filename))
|
||||||
while chunk := obj.read(4096):
|
while chunk := obj.read(4096):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.get_object_to_file(self.__wrapper_folder_filename(filename), target_filepath)
|
self.client.get_object_to_file(self.__wrapper_folder_filename(filename), target_filepath)
|
||||||
|
|
||||||
|
@ -62,7 +62,6 @@ class AwsS3Storage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
try:
|
try:
|
||||||
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
||||||
yield from response["Body"].iter_chunks()
|
yield from response["Body"].iter_chunks()
|
||||||
@ -72,8 +71,6 @@ class AwsS3Storage(BaseStorage):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.download_file(self.bucket_name, filename, target_filepath)
|
self.client.download_file(self.bucket_name, filename, target_filepath)
|
||||||
|
|
||||||
|
@ -32,14 +32,10 @@ class AzureBlobStorage(BaseStorage):
|
|||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
client = self._sync_client()
|
client = self._sync_client()
|
||||||
|
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
|
blob = client.get_blob_client(container=self.bucket_name, blob=filename)
|
||||||
blob_data = blob.download_blob()
|
blob_data = blob.download_blob()
|
||||||
yield from blob_data.chunks()
|
yield from blob_data.chunks()
|
||||||
|
|
||||||
return generate(filename)
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
client = self._sync_client()
|
client = self._sync_client()
|
||||||
|
|
||||||
|
@ -39,13 +39,10 @@ class BaiduObsStorage(BaseStorage):
|
|||||||
return response.data.read()
|
return response.data.read()
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
response = self.client.get_object(bucket_name=self.bucket_name, key=filename).data
|
response = self.client.get_object(bucket_name=self.bucket_name, key=filename).data
|
||||||
while chunk := response.read(4096):
|
while chunk := response.read(4096):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.get_object_to_file(bucket_name=self.bucket_name, key=filename, file_name=target_filepath)
|
self.client.get_object_to_file(bucket_name=self.bucket_name, key=filename, file_name=target_filepath)
|
||||||
|
|
||||||
|
@ -39,15 +39,12 @@ class GoogleCloudStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
bucket = self.client.get_bucket(self.bucket_name)
|
bucket = self.client.get_bucket(self.bucket_name)
|
||||||
blob = bucket.get_blob(filename)
|
blob = bucket.get_blob(filename)
|
||||||
with blob.open(mode="rb") as blob_stream:
|
with blob.open(mode="rb") as blob_stream:
|
||||||
while chunk := blob_stream.read(4096):
|
while chunk := blob_stream.read(4096):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
bucket = self.client.get_bucket(self.bucket_name)
|
bucket = self.client.get_bucket(self.bucket_name)
|
||||||
blob = bucket.get_blob(filename)
|
blob = bucket.get_blob(filename)
|
||||||
|
@ -27,13 +27,10 @@ class HuaweiObsStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
response = self.client.getObject(bucketName=self.bucket_name, objectKey=filename)["body"].response
|
response = self.client.getObject(bucketName=self.bucket_name, objectKey=filename)["body"].response
|
||||||
while chunk := response.read(4096):
|
while chunk := response.read(4096):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.getObject(bucketName=self.bucket_name, objectKey=filename, downloadPath=target_filepath)
|
self.client.getObject(bucketName=self.bucket_name, objectKey=filename, downloadPath=target_filepath)
|
||||||
|
|
||||||
|
@ -40,16 +40,12 @@ class LocalFsStorage(BaseStorage):
|
|||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
filepath = self._build_filepath(filename)
|
filepath = self._build_filepath(filename)
|
||||||
|
|
||||||
def generate() -> Generator:
|
|
||||||
if not os.path.exists(filepath):
|
if not os.path.exists(filepath):
|
||||||
raise FileNotFoundError("File not found")
|
raise FileNotFoundError("File not found")
|
||||||
with open(filepath, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
while chunk := f.read(4096): # Read in chunks of 4KB
|
while chunk := f.read(4096): # Read in chunks of 4KB
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
filepath = self._build_filepath(filename)
|
filepath = self._build_filepath(filename)
|
||||||
if not os.path.exists(filepath):
|
if not os.path.exists(filepath):
|
||||||
|
@ -36,7 +36,6 @@ class OracleOCIStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
try:
|
try:
|
||||||
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
||||||
yield from response["Body"].iter_chunks()
|
yield from response["Body"].iter_chunks()
|
||||||
@ -46,8 +45,6 @@ class OracleOCIStorage(BaseStorage):
|
|||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.download_file(self.bucket_name, filename, target_filepath)
|
self.client.download_file(self.bucket_name, filename, target_filepath)
|
||||||
|
|
||||||
|
@ -36,17 +36,14 @@ class SupabaseStorage(BaseStorage):
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
result = self.client.storage.from_(self.bucket_name).download(filename)
|
result = self.client.storage.from_(self.bucket_name).download(filename)
|
||||||
byte_stream = io.BytesIO(result)
|
byte_stream = io.BytesIO(result)
|
||||||
while chunk := byte_stream.read(4096): # Read in chunks of 4KB
|
while chunk := byte_stream.read(4096): # Read in chunks of 4KB
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
result = self.client.storage.from_(self.bucket_name).download(filename)
|
result = self.client.storage.from_(self.bucket_name).download(filename)
|
||||||
Path(result).write_bytes(result)
|
Path(target_filepath).write_bytes(result)
|
||||||
|
|
||||||
def exists(self, filename):
|
def exists(self, filename):
|
||||||
result = self.client.storage.from_(self.bucket_name).list(filename)
|
result = self.client.storage.from_(self.bucket_name).list(filename)
|
||||||
|
@ -29,12 +29,9 @@ class TencentCosStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
||||||
yield from response["Body"].get_stream(chunk_size=4096)
|
yield from response["Body"].get_stream(chunk_size=4096)
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
|
||||||
response["Body"].get_stream_to_file(target_filepath)
|
response["Body"].get_stream_to_file(target_filepath)
|
||||||
|
@ -27,13 +27,10 @@ class VolcengineTosStorage(BaseStorage):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
def load_stream(self, filename: str) -> Generator:
|
def load_stream(self, filename: str) -> Generator:
|
||||||
def generate(filename: str = filename) -> Generator:
|
|
||||||
response = self.client.get_object(bucket=self.bucket_name, key=filename)
|
response = self.client.get_object(bucket=self.bucket_name, key=filename)
|
||||||
while chunk := response.read(4096):
|
while chunk := response.read(4096):
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
return generate()
|
|
||||||
|
|
||||||
def download(self, filename, target_filepath):
|
def download(self, filename, target_filepath):
|
||||||
self.client.get_object_to_file(bucket=self.bucket_name, key=filename, file_path=target_filepath)
|
self.client.get_object_to_file(bucket=self.bucket_name, key=filename, file_path=target_filepath)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user