mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-08-18 08:05:57 +08:00
Merge branch 'main' into fix/chore-fix
This commit is contained in:
commit
75fe785d88
@ -7,5 +7,6 @@ echo 'alias start-api="cd /workspaces/dify/api && poetry run python -m flask run
|
||||
echo 'alias start-worker="cd /workspaces/dify/api && poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc
|
||||
echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc
|
||||
echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc
|
||||
echo 'alias stop-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify down"' >> ~/.bashrc
|
||||
|
||||
source /home/vscode/.bashrc
|
||||
source /home/vscode/.bashrc
|
||||
|
1
.github/workflows/style.yml
vendored
1
.github/workflows/style.yml
vendored
@ -37,6 +37,7 @@ jobs:
|
||||
- name: Ruff check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
run: |
|
||||
poetry run -C api ruff --version
|
||||
poetry run -C api ruff check ./api
|
||||
poetry run -C api ruff format --check ./api
|
||||
|
||||
|
3
.github/workflows/vdb-tests.yml
vendored
3
.github/workflows/vdb-tests.yml
vendored
@ -51,7 +51,7 @@ jobs:
|
||||
- name: Expose Service Ports
|
||||
run: sh .github/workflows/expose_service_ports.sh
|
||||
|
||||
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
|
||||
- name: Set up Vector Stores (TiDB, Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase)
|
||||
uses: hoverkraft-tech/compose-action@v2.0.2
|
||||
with:
|
||||
compose-file: |
|
||||
@ -67,6 +67,7 @@ jobs:
|
||||
pgvector
|
||||
chroma
|
||||
elasticsearch
|
||||
tidb
|
||||
|
||||
- name: Test Vector Stores
|
||||
run: poetry run -C api bash dev/pytest/pytest_vdb.sh
|
||||
|
@ -56,20 +56,36 @@ DB_DATABASE=dify
|
||||
|
||||
# Storage configuration
|
||||
# use for store upload files, private keys...
|
||||
# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
# storage type: opendal, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
|
||||
STORAGE_TYPE=opendal
|
||||
|
||||
# Apache OpenDAL storage configuration, refer to https://github.com/apache/opendal
|
||||
STORAGE_OPENDAL_SCHEME=fs
|
||||
# OpenDAL FS
|
||||
OPENDAL_FS_ROOT=storage
|
||||
# OpenDAL S3
|
||||
OPENDAL_S3_ROOT=/
|
||||
OPENDAL_S3_BUCKET=your-bucket-name
|
||||
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
|
||||
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
|
||||
OPENDAL_S3_REGION=your-region
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
|
||||
|
||||
# S3 Storage configuration
|
||||
S3_USE_AWS_MANAGED_IAM=false
|
||||
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
|
||||
S3_BUCKET_NAME=your-bucket-name
|
||||
S3_ACCESS_KEY=your-access-key
|
||||
S3_SECRET_KEY=your-secret-key
|
||||
S3_REGION=your-region
|
||||
|
||||
# Azure Blob Storage configuration
|
||||
AZURE_BLOB_ACCOUNT_NAME=your-account-name
|
||||
AZURE_BLOB_ACCOUNT_KEY=your-account-key
|
||||
AZURE_BLOB_CONTAINER_NAME=yout-container-name
|
||||
AZURE_BLOB_ACCOUNT_URL=https://<your_account_name>.blob.core.windows.net
|
||||
|
||||
# Aliyun oss Storage configuration
|
||||
ALIYUN_OSS_BUCKET_NAME=your-bucket-name
|
||||
ALIYUN_OSS_ACCESS_KEY=your-access-key
|
||||
@ -79,6 +95,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
|
||||
ALIYUN_OSS_REGION=your-region
|
||||
# Don't start with '/'. OSS doesn't support leading slash in object names.
|
||||
ALIYUN_OSS_PATH=your-path
|
||||
|
||||
# Google Storage configuration
|
||||
GOOGLE_STORAGE_BUCKET_NAME=yout-bucket-name
|
||||
GOOGLE_STORAGE_SERVICE_ACCOUNT_JSON_BASE64=your-google-service-account-json-base64-string
|
||||
@ -125,8 +142,8 @@ SUPABASE_URL=your-server-url
|
||||
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
|
||||
|
||||
|
||||
# Vector database configuration, support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
|
||||
# Vector database configuration
|
||||
# support: weaviate, qdrant, milvus, myscale, relyt, pgvecto_rs, pgvector, pgvector, chroma, opensearch, tidb_vector, couchbase, vikingdb, upstash, lindorm, oceanbase
|
||||
VECTOR_STORE=weaviate
|
||||
|
||||
# Weaviate configuration
|
||||
@ -277,6 +294,7 @@ VIKINGDB_SOCKET_TIMEOUT=30
|
||||
LINDORM_URL=http://ld-*******************-proxy-search-pub.lindorm.aliyuncs.com:30070
|
||||
LINDORM_USERNAME=admin
|
||||
LINDORM_PASSWORD=admin
|
||||
USING_UGC_INDEX=False
|
||||
|
||||
# OceanBase Vector configuration
|
||||
OCEANBASE_VECTOR_HOST=127.0.0.1
|
||||
@ -381,6 +399,8 @@ LOG_FILE_BACKUP_COUNT=5
|
||||
LOG_DATEFORMAT=%Y-%m-%d %H:%M:%S
|
||||
# Log Timezone
|
||||
LOG_TZ=UTC
|
||||
# Log format
|
||||
LOG_FORMAT=%(asctime)s,%(msecs)d %(levelname)-2s [%(filename)s:%(lineno)d] %(req_id)s %(message)s
|
||||
|
||||
# Indexing configuration
|
||||
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH=4000
|
||||
@ -424,3 +444,5 @@ RESET_PASSWORD_TOKEN_EXPIRY_MINUTES=5
|
||||
|
||||
CREATE_TIDB_SERVICE_JOB_ENABLED=false
|
||||
|
||||
# Maximum number of submitted thread count in a ThreadPool for parallel node execution
|
||||
MAX_SUBMIT_COUNT=100
|
||||
|
@ -1,11 +1,51 @@
|
||||
from pydantic_settings import SettingsConfigDict
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from configs.deploy import DeploymentConfig
|
||||
from configs.enterprise import EnterpriseFeatureConfig
|
||||
from configs.extra import ExtraServiceConfig
|
||||
from configs.feature import FeatureConfig
|
||||
from configs.middleware import MiddlewareConfig
|
||||
from configs.packaging import PackagingInfo
|
||||
from pydantic.fields import FieldInfo
|
||||
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict
|
||||
|
||||
from .deploy import DeploymentConfig
|
||||
from .enterprise import EnterpriseFeatureConfig
|
||||
from .extra import ExtraServiceConfig
|
||||
from .feature import FeatureConfig
|
||||
from .middleware import MiddlewareConfig
|
||||
from .packaging import PackagingInfo
|
||||
from .remote_settings_sources import RemoteSettingsSource, RemoteSettingsSourceConfig, RemoteSettingsSourceName
|
||||
from .remote_settings_sources.apollo import ApolloSettingsSource
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RemoteSettingsSourceFactory(PydanticBaseSettingsSource):
|
||||
def __init__(self, settings_cls: type[BaseSettings]):
|
||||
super().__init__(settings_cls)
|
||||
|
||||
def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
|
||||
raise NotImplementedError
|
||||
|
||||
def __call__(self) -> dict[str, Any]:
|
||||
current_state = self.current_state
|
||||
remote_source_name = current_state.get("REMOTE_SETTINGS_SOURCE_NAME")
|
||||
if not remote_source_name:
|
||||
return {}
|
||||
|
||||
remote_source: RemoteSettingsSource | None = None
|
||||
match remote_source_name:
|
||||
case RemoteSettingsSourceName.APOLLO:
|
||||
remote_source = ApolloSettingsSource(current_state)
|
||||
case _:
|
||||
logger.warning(f"Unsupported remote source: {remote_source_name}")
|
||||
return {}
|
||||
|
||||
d: dict[str, Any] = {}
|
||||
|
||||
for field_name, field in self.settings_cls.model_fields.items():
|
||||
field_value, field_key, value_is_complex = remote_source.get_field_value(field, field_name)
|
||||
field_value = remote_source.prepare_field_value(field_name, field, field_value, value_is_complex)
|
||||
if field_value is not None:
|
||||
d[field_key] = field_value
|
||||
|
||||
return d
|
||||
|
||||
|
||||
class DifyConfig(
|
||||
@ -19,6 +59,8 @@ class DifyConfig(
|
||||
MiddlewareConfig,
|
||||
# Extra service configs
|
||||
ExtraServiceConfig,
|
||||
# Remote source configs
|
||||
RemoteSettingsSourceConfig,
|
||||
# Enterprise feature configs
|
||||
# **Before using, please contact business@dify.ai by email to inquire about licensing matters.**
|
||||
EnterpriseFeatureConfig,
|
||||
@ -35,3 +77,20 @@ class DifyConfig(
|
||||
# please consider to arrange it in the proper config group of existed or added
|
||||
# for better readability and maintainability.
|
||||
# Thanks for your concentration and consideration.
|
||||
|
||||
@classmethod
|
||||
def settings_customise_sources(
|
||||
cls,
|
||||
settings_cls: type[BaseSettings],
|
||||
init_settings: PydanticBaseSettingsSource,
|
||||
env_settings: PydanticBaseSettingsSource,
|
||||
dotenv_settings: PydanticBaseSettingsSource,
|
||||
file_secret_settings: PydanticBaseSettingsSource,
|
||||
) -> tuple[PydanticBaseSettingsSource, ...]:
|
||||
return (
|
||||
init_settings,
|
||||
env_settings,
|
||||
RemoteSettingsSourceFactory(settings_cls),
|
||||
dotenv_settings,
|
||||
file_secret_settings,
|
||||
)
|
||||
|
@ -497,6 +497,17 @@ class WorkflowConfig(BaseSettings):
|
||||
)
|
||||
|
||||
|
||||
class WorkflowNodeExecutionConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for workflow node execution
|
||||
"""
|
||||
|
||||
MAX_SUBMIT_COUNT: PositiveInt = Field(
|
||||
description="Maximum number of submitted thread count in a ThreadPool for parallel node execution",
|
||||
default=100,
|
||||
)
|
||||
|
||||
|
||||
class AuthConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for authentication and OAuth
|
||||
@ -835,6 +846,7 @@ class FeatureConfig(
|
||||
ToolConfig,
|
||||
UpdateConfig,
|
||||
WorkflowConfig,
|
||||
WorkflowNodeExecutionConfig,
|
||||
WorkspaceConfig,
|
||||
LoginConfig,
|
||||
# hosted services config
|
||||
|
@ -1,54 +1,69 @@
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Literal, Optional
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from configs.middleware.cache.redis_config import RedisConfig
|
||||
from configs.middleware.storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from configs.middleware.storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from configs.middleware.storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from configs.middleware.storage.oci_storage_config import OCIStorageConfig
|
||||
from configs.middleware.storage.supabase_storage_config import SupabaseStorageConfig
|
||||
from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||
from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig
|
||||
from configs.middleware.vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from configs.middleware.vdb.chroma_config import ChromaConfig
|
||||
from configs.middleware.vdb.couchbase_config import CouchbaseConfig
|
||||
from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from configs.middleware.vdb.lindorm_config import LindormConfig
|
||||
from configs.middleware.vdb.milvus_config import MilvusConfig
|
||||
from configs.middleware.vdb.myscale_config import MyScaleConfig
|
||||
from configs.middleware.vdb.oceanbase_config import OceanBaseVectorConfig
|
||||
from configs.middleware.vdb.opensearch_config import OpenSearchConfig
|
||||
from configs.middleware.vdb.oracle_config import OracleConfig
|
||||
from configs.middleware.vdb.pgvector_config import PGVectorConfig
|
||||
from configs.middleware.vdb.pgvectors_config import PGVectoRSConfig
|
||||
from configs.middleware.vdb.qdrant_config import QdrantConfig
|
||||
from configs.middleware.vdb.relyt_config import RelytConfig
|
||||
from configs.middleware.vdb.tencent_vector_config import TencentVectorDBConfig
|
||||
from configs.middleware.vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
|
||||
from configs.middleware.vdb.tidb_vector_config import TiDBVectorConfig
|
||||
from configs.middleware.vdb.upstash_config import UpstashConfig
|
||||
from configs.middleware.vdb.vikingdb_config import VikingDBConfig
|
||||
from configs.middleware.vdb.weaviate_config import WeaviateConfig
|
||||
from .cache.redis_config import RedisConfig
|
||||
from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from .storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from .storage.oci_storage_config import OCIStorageConfig
|
||||
from .storage.opendal_storage_config import OpenDALStorageConfig
|
||||
from .storage.supabase_storage_config import SupabaseStorageConfig
|
||||
from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
|
||||
from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from .vdb.analyticdb_config import AnalyticdbConfig
|
||||
from .vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from .vdb.chroma_config import ChromaConfig
|
||||
from .vdb.couchbase_config import CouchbaseConfig
|
||||
from .vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from .vdb.lindorm_config import LindormConfig
|
||||
from .vdb.milvus_config import MilvusConfig
|
||||
from .vdb.myscale_config import MyScaleConfig
|
||||
from .vdb.oceanbase_config import OceanBaseVectorConfig
|
||||
from .vdb.opensearch_config import OpenSearchConfig
|
||||
from .vdb.oracle_config import OracleConfig
|
||||
from .vdb.pgvector_config import PGVectorConfig
|
||||
from .vdb.pgvectors_config import PGVectoRSConfig
|
||||
from .vdb.qdrant_config import QdrantConfig
|
||||
from .vdb.relyt_config import RelytConfig
|
||||
from .vdb.tencent_vector_config import TencentVectorDBConfig
|
||||
from .vdb.tidb_on_qdrant_config import TidbOnQdrantConfig
|
||||
from .vdb.tidb_vector_config import TiDBVectorConfig
|
||||
from .vdb.upstash_config import UpstashConfig
|
||||
from .vdb.vikingdb_config import VikingDBConfig
|
||||
from .vdb.weaviate_config import WeaviateConfig
|
||||
|
||||
|
||||
class StorageConfig(BaseSettings):
|
||||
STORAGE_TYPE: str = Field(
|
||||
STORAGE_TYPE: Literal[
|
||||
"opendal",
|
||||
"s3",
|
||||
"aliyun-oss",
|
||||
"azure-blob",
|
||||
"baidu-obs",
|
||||
"google-storage",
|
||||
"huawei-obs",
|
||||
"oci-storage",
|
||||
"tencent-cos",
|
||||
"volcengine-tos",
|
||||
"supabase",
|
||||
"local",
|
||||
] = Field(
|
||||
description="Type of storage to use."
|
||||
" Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
|
||||
"'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
|
||||
default="local",
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
|
||||
"'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
default="opendal",
|
||||
)
|
||||
|
||||
STORAGE_LOCAL_PATH: str = Field(
|
||||
description="Path for local storage when STORAGE_TYPE is set to 'local'.",
|
||||
default="storage",
|
||||
deprecated=True,
|
||||
)
|
||||
|
||||
|
||||
@ -73,7 +88,7 @@ class KeywordStoreConfig(BaseSettings):
|
||||
)
|
||||
|
||||
|
||||
class DatabaseConfig:
|
||||
class DatabaseConfig(BaseSettings):
|
||||
DB_HOST: str = Field(
|
||||
description="Hostname or IP address of the database server.",
|
||||
default="localhost",
|
||||
@ -235,6 +250,7 @@ class MiddlewareConfig(
|
||||
GoogleCloudStorageConfig,
|
||||
HuaweiCloudOBSStorageConfig,
|
||||
OCIStorageConfig,
|
||||
OpenDALStorageConfig,
|
||||
S3StorageConfig,
|
||||
SupabaseStorageConfig,
|
||||
TencentCloudCOSStorageConfig,
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class BaiduOBSStorageConfig(BaseModel):
|
||||
class BaiduOBSStorageConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Baidu Object Storage Service (OBS)
|
||||
"""
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class HuaweiCloudOBSStorageConfig(BaseModel):
|
||||
class HuaweiCloudOBSStorageConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Huawei Cloud Object Storage Service (OBS)
|
||||
"""
|
||||
|
51
api/configs/middleware/storage/opendal_storage_config.py
Normal file
51
api/configs/middleware/storage/opendal_storage_config.py
Normal file
@ -0,0 +1,51 @@
|
||||
from enum import StrEnum
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class OpenDALScheme(StrEnum):
|
||||
FS = "fs"
|
||||
S3 = "s3"
|
||||
|
||||
|
||||
class OpenDALStorageConfig(BaseSettings):
|
||||
STORAGE_OPENDAL_SCHEME: str = Field(
|
||||
default=OpenDALScheme.FS.value,
|
||||
description="OpenDAL scheme.",
|
||||
)
|
||||
# FS
|
||||
OPENDAL_FS_ROOT: str = Field(
|
||||
default="storage",
|
||||
description="Root path for local storage.",
|
||||
)
|
||||
# S3
|
||||
OPENDAL_S3_ROOT: str = Field(
|
||||
default="/",
|
||||
description="Root path for S3 storage.",
|
||||
)
|
||||
OPENDAL_S3_BUCKET: str = Field(
|
||||
default="",
|
||||
description="S3 bucket name.",
|
||||
)
|
||||
OPENDAL_S3_ENDPOINT: str = Field(
|
||||
default="https://s3.amazonaws.com",
|
||||
description="S3 endpoint URL.",
|
||||
)
|
||||
OPENDAL_S3_ACCESS_KEY_ID: str = Field(
|
||||
default="",
|
||||
description="S3 access key ID.",
|
||||
)
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY: str = Field(
|
||||
default="",
|
||||
description="S3 secret access key.",
|
||||
)
|
||||
OPENDAL_S3_REGION: str = Field(
|
||||
default="",
|
||||
description="S3 region.",
|
||||
)
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION: Literal["aws:kms", ""] = Field(
|
||||
default="",
|
||||
description="S3 server-side encryption.",
|
||||
)
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class SupabaseStorageConfig(BaseModel):
|
||||
class SupabaseStorageConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Supabase Object Storage Service
|
||||
"""
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class VolcengineTOSStorageConfig(BaseModel):
|
||||
class VolcengineTOSStorageConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Volcengine Tinder Object Storage (TOS)
|
||||
"""
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field, PositiveInt
|
||||
from pydantic import Field, PositiveInt
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class AnalyticdbConfig(BaseModel):
|
||||
class AnalyticdbConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for connecting to Alibaba Cloud AnalyticDB for PostgreSQL.
|
||||
Refer to the following documentation for details on obtaining credentials:
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class CouchbaseConfig(BaseModel):
|
||||
class CouchbaseConfig(BaseSettings):
|
||||
"""
|
||||
Couchbase configs
|
||||
"""
|
||||
|
@ -21,3 +21,14 @@ class LindormConfig(BaseSettings):
|
||||
description="Lindorm password",
|
||||
default=None,
|
||||
)
|
||||
DEFAULT_INDEX_TYPE: Optional[str] = Field(
|
||||
description="Lindorm Vector Index Type, hnsw or flat is available in dify",
|
||||
default="hnsw",
|
||||
)
|
||||
DEFAULT_DISTANCE_TYPE: Optional[str] = Field(
|
||||
description="Vector Distance Type, support l2, cosinesimil, innerproduct", default="l2"
|
||||
)
|
||||
USING_UGC_INDEX: Optional[bool] = Field(
|
||||
description="Using UGC index will store the same type of Index in a single index but can retrieve separately.",
|
||||
default=False,
|
||||
)
|
||||
|
@ -1,7 +1,8 @@
|
||||
from pydantic import BaseModel, Field, PositiveInt
|
||||
from pydantic import Field, PositiveInt
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class MyScaleConfig(BaseModel):
|
||||
class MyScaleConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for MyScale vector database
|
||||
"""
|
||||
|
@ -1,9 +1,10 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class VikingDBConfig(BaseModel):
|
||||
class VikingDBConfig(BaseSettings):
|
||||
"""
|
||||
Configuration for connecting to Volcengine VikingDB.
|
||||
Refer to the following documentation for details on obtaining credentials:
|
||||
|
@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
|
||||
|
||||
CURRENT_VERSION: str = Field(
|
||||
description="Dify version",
|
||||
default="0.13.1",
|
||||
default="0.13.2",
|
||||
)
|
||||
|
||||
COMMIT_SHA: str = Field(
|
||||
|
17
api/configs/remote_settings_sources/__init__.py
Normal file
17
api/configs/remote_settings_sources/__init__.py
Normal file
@ -0,0 +1,17 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from .apollo import ApolloSettingsSourceInfo
|
||||
from .base import RemoteSettingsSource
|
||||
from .enums import RemoteSettingsSourceName
|
||||
|
||||
|
||||
class RemoteSettingsSourceConfig(ApolloSettingsSourceInfo):
|
||||
REMOTE_SETTINGS_SOURCE_NAME: RemoteSettingsSourceName | str = Field(
|
||||
description="name of remote config source",
|
||||
default="",
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["RemoteSettingsSource", "RemoteSettingsSourceConfig", "RemoteSettingsSourceName"]
|
55
api/configs/remote_settings_sources/apollo/__init__.py
Normal file
55
api/configs/remote_settings_sources/apollo/__init__.py
Normal file
@ -0,0 +1,55 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic.fields import FieldInfo
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
from configs.remote_settings_sources.base import RemoteSettingsSource
|
||||
|
||||
from .client import ApolloClient
|
||||
|
||||
|
||||
class ApolloSettingsSourceInfo(BaseSettings):
|
||||
"""
|
||||
Packaging build information
|
||||
"""
|
||||
|
||||
APOLLO_APP_ID: Optional[str] = Field(
|
||||
description="apollo app_id",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_CLUSTER: Optional[str] = Field(
|
||||
description="apollo cluster",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_CONFIG_URL: Optional[str] = Field(
|
||||
description="apollo config url",
|
||||
default=None,
|
||||
)
|
||||
|
||||
APOLLO_NAMESPACE: Optional[str] = Field(
|
||||
description="apollo namespace",
|
||||
default=None,
|
||||
)
|
||||
|
||||
|
||||
class ApolloSettingsSource(RemoteSettingsSource):
|
||||
def __init__(self, configs: Mapping[str, Any]):
|
||||
self.client = ApolloClient(
|
||||
app_id=configs["APOLLO_APP_ID"],
|
||||
cluster=configs["APOLLO_CLUSTER"],
|
||||
config_url=configs["APOLLO_CONFIG_URL"],
|
||||
start_hot_update=False,
|
||||
_notification_map={configs["APOLLO_NAMESPACE"]: -1},
|
||||
)
|
||||
self.namespace = configs["APOLLO_NAMESPACE"]
|
||||
self.remote_configs = self.client.get_all_dicts(self.namespace)
|
||||
|
||||
def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
|
||||
if not isinstance(self.remote_configs, dict):
|
||||
raise ValueError(f"remote configs is not dict, but {type(self.remote_configs)}")
|
||||
field_value = self.remote_configs.get(field_name)
|
||||
return field_value, field_name, False
|
303
api/configs/remote_settings_sources/apollo/client.py
Normal file
303
api/configs/remote_settings_sources/apollo/client.py
Normal file
@ -0,0 +1,303 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from .python_3x import http_request, makedirs_wrapper
|
||||
from .utils import (
|
||||
CONFIGURATIONS,
|
||||
NAMESPACE_NAME,
|
||||
NOTIFICATION_ID,
|
||||
get_value_from_dict,
|
||||
init_ip,
|
||||
no_key_cache_key,
|
||||
signature,
|
||||
url_encode_wrapper,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ApolloClient:
|
||||
def __init__(
|
||||
self,
|
||||
config_url,
|
||||
app_id,
|
||||
cluster="default",
|
||||
secret="",
|
||||
start_hot_update=True,
|
||||
change_listener=None,
|
||||
_notification_map=None,
|
||||
):
|
||||
# Core routing parameters
|
||||
self.config_url = config_url
|
||||
self.cluster = cluster
|
||||
self.app_id = app_id
|
||||
|
||||
# Non-core parameters
|
||||
self.ip = init_ip()
|
||||
self.secret = secret
|
||||
|
||||
# Check the parameter variables
|
||||
|
||||
# Private control variables
|
||||
self._cycle_time = 5
|
||||
self._stopping = False
|
||||
self._cache = {}
|
||||
self._no_key = {}
|
||||
self._hash = {}
|
||||
self._pull_timeout = 75
|
||||
self._cache_file_path = os.path.expanduser("~") + "/.dify/config/remote-settings/apollo/cache/"
|
||||
self._long_poll_thread = None
|
||||
self._change_listener = change_listener # "add" "delete" "update"
|
||||
if _notification_map is None:
|
||||
_notification_map = {"application": -1}
|
||||
self._notification_map = _notification_map
|
||||
self.last_release_key = None
|
||||
# Private startup method
|
||||
self._path_checker()
|
||||
if start_hot_update:
|
||||
self._start_hot_update()
|
||||
|
||||
# start the heartbeat thread
|
||||
heartbeat = threading.Thread(target=self._heart_beat)
|
||||
heartbeat.daemon = True
|
||||
heartbeat.start()
|
||||
|
||||
def get_json_from_net(self, namespace="application"):
|
||||
url = "{}/configs/{}/{}/{}?releaseKey={}&ip={}".format(
|
||||
self.config_url, self.app_id, self.cluster, namespace, "", self.ip
|
||||
)
|
||||
try:
|
||||
code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
|
||||
if code == 200:
|
||||
if not body:
|
||||
logger.error(f"get_json_from_net load configs failed, body is {body}")
|
||||
return None
|
||||
data = json.loads(body)
|
||||
data = data["configurations"]
|
||||
return_data = {CONFIGURATIONS: data}
|
||||
return return_data
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("an error occurred in get_json_from_net")
|
||||
return None
|
||||
|
||||
def get_value(self, key, default_val=None, namespace="application"):
|
||||
try:
|
||||
# read memory configuration
|
||||
namespace_cache = self._cache.get(namespace)
|
||||
val = get_value_from_dict(namespace_cache, key)
|
||||
if val is not None:
|
||||
return val
|
||||
|
||||
no_key = no_key_cache_key(namespace, key)
|
||||
if no_key in self._no_key:
|
||||
return default_val
|
||||
|
||||
# read the network configuration
|
||||
namespace_data = self.get_json_from_net(namespace)
|
||||
val = get_value_from_dict(namespace_data, key)
|
||||
if val is not None:
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
return val
|
||||
|
||||
# read the file configuration
|
||||
namespace_cache = self._get_local_cache(namespace)
|
||||
val = get_value_from_dict(namespace_cache, key)
|
||||
if val is not None:
|
||||
self._update_cache_and_file(namespace_cache, namespace)
|
||||
return val
|
||||
|
||||
# If all of them are not obtained, the default value is returned
|
||||
# and the local cache is set to None
|
||||
self._set_local_cache_none(namespace, key)
|
||||
return default_val
|
||||
except Exception:
|
||||
logger.exception("get_value has error, [key is %s], [namespace is %s]", key, namespace)
|
||||
return default_val
|
||||
|
||||
# Set the key of a namespace to none, and do not set default val
|
||||
# to ensure the real-time correctness of the function call.
|
||||
# If the user does not have the same default val twice
|
||||
# and the default val is used here, there may be a problem.
|
||||
def _set_local_cache_none(self, namespace, key):
|
||||
no_key = no_key_cache_key(namespace, key)
|
||||
self._no_key[no_key] = key
|
||||
|
||||
def _start_hot_update(self):
|
||||
self._long_poll_thread = threading.Thread(target=self._listener)
|
||||
# When the asynchronous thread is started, the daemon thread will automatically exit
|
||||
# when the main thread is launched.
|
||||
self._long_poll_thread.daemon = True
|
||||
self._long_poll_thread.start()
|
||||
|
||||
def stop(self):
|
||||
self._stopping = True
|
||||
logger.info("Stopping listener...")
|
||||
|
||||
# Call the set callback function, and if it is abnormal, try it out
|
||||
def _call_listener(self, namespace, old_kv, new_kv):
|
||||
if self._change_listener is None:
|
||||
return
|
||||
if old_kv is None:
|
||||
old_kv = {}
|
||||
if new_kv is None:
|
||||
new_kv = {}
|
||||
try:
|
||||
for key in old_kv:
|
||||
new_value = new_kv.get(key)
|
||||
old_value = old_kv.get(key)
|
||||
if new_value is None:
|
||||
# If newValue is empty, it means key, and the value is deleted.
|
||||
self._change_listener("delete", namespace, key, old_value)
|
||||
continue
|
||||
if new_value != old_value:
|
||||
self._change_listener("update", namespace, key, new_value)
|
||||
continue
|
||||
for key in new_kv:
|
||||
new_value = new_kv.get(key)
|
||||
old_value = old_kv.get(key)
|
||||
if old_value is None:
|
||||
self._change_listener("add", namespace, key, new_value)
|
||||
except BaseException as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
def _path_checker(self):
|
||||
if not os.path.isdir(self._cache_file_path):
|
||||
makedirs_wrapper(self._cache_file_path)
|
||||
|
||||
# update the local cache and file cache
|
||||
def _update_cache_and_file(self, namespace_data, namespace="application"):
|
||||
# update the local cache
|
||||
self._cache[namespace] = namespace_data
|
||||
# update the file cache
|
||||
new_string = json.dumps(namespace_data)
|
||||
new_hash = hashlib.md5(new_string.encode("utf-8")).hexdigest()
|
||||
if self._hash.get(namespace) == new_hash:
|
||||
pass
|
||||
else:
|
||||
file_path = Path(self._cache_file_path) / f"{self.app_id}_configuration_{namespace}.txt"
|
||||
file_path.write_text(new_string)
|
||||
self._hash[namespace] = new_hash
|
||||
|
||||
# get the configuration from the local file
|
||||
def _get_local_cache(self, namespace="application"):
|
||||
cache_file_path = os.path.join(self._cache_file_path, f"{self.app_id}_configuration_{namespace}.txt")
|
||||
if os.path.isfile(cache_file_path):
|
||||
with open(cache_file_path) as f:
|
||||
result = json.loads(f.readline())
|
||||
return result
|
||||
return {}
|
||||
|
||||
def _long_poll(self):
|
||||
notifications = []
|
||||
for key in self._cache:
|
||||
namespace_data = self._cache[key]
|
||||
notification_id = -1
|
||||
if NOTIFICATION_ID in namespace_data:
|
||||
notification_id = self._cache[key][NOTIFICATION_ID]
|
||||
notifications.append({NAMESPACE_NAME: key, NOTIFICATION_ID: notification_id})
|
||||
try:
|
||||
# if the length is 0 it is returned directly
|
||||
if len(notifications) == 0:
|
||||
return
|
||||
url = "{}/notifications/v2".format(self.config_url)
|
||||
params = {
|
||||
"appId": self.app_id,
|
||||
"cluster": self.cluster,
|
||||
"notifications": json.dumps(notifications, ensure_ascii=False),
|
||||
}
|
||||
param_str = url_encode_wrapper(params)
|
||||
url = url + "?" + param_str
|
||||
code, body = http_request(url, self._pull_timeout, headers=self._sign_headers(url))
|
||||
http_code = code
|
||||
if http_code == 304:
|
||||
logger.debug("No change, loop...")
|
||||
return
|
||||
if http_code == 200:
|
||||
if not body:
|
||||
logger.error(f"_long_poll load configs failed,body is {body}")
|
||||
return
|
||||
data = json.loads(body)
|
||||
for entry in data:
|
||||
namespace = entry[NAMESPACE_NAME]
|
||||
n_id = entry[NOTIFICATION_ID]
|
||||
logger.info("%s has changes: notificationId=%d", namespace, n_id)
|
||||
self._get_net_and_set_local(namespace, n_id, call_change=True)
|
||||
return
|
||||
else:
|
||||
logger.warning("Sleep...")
|
||||
except Exception as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
def _get_net_and_set_local(self, namespace, n_id, call_change=False):
|
||||
namespace_data = self.get_json_from_net(namespace)
|
||||
if not namespace_data:
|
||||
return
|
||||
namespace_data[NOTIFICATION_ID] = n_id
|
||||
old_namespace = self._cache.get(namespace)
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
if self._change_listener is not None and call_change and old_namespace:
|
||||
old_kv = old_namespace.get(CONFIGURATIONS)
|
||||
new_kv = namespace_data.get(CONFIGURATIONS)
|
||||
self._call_listener(namespace, old_kv, new_kv)
|
||||
|
||||
def _listener(self):
|
||||
logger.info("start long_poll")
|
||||
while not self._stopping:
|
||||
self._long_poll()
|
||||
time.sleep(self._cycle_time)
|
||||
logger.info("stopped, long_poll")
|
||||
|
||||
# add the need for endorsement to the header
|
||||
def _sign_headers(self, url):
|
||||
headers = {}
|
||||
if self.secret == "":
|
||||
return headers
|
||||
uri = url[len(self.config_url) : len(url)]
|
||||
time_unix_now = str(int(round(time.time() * 1000)))
|
||||
headers["Authorization"] = "Apollo " + self.app_id + ":" + signature(time_unix_now, uri, self.secret)
|
||||
headers["Timestamp"] = time_unix_now
|
||||
return headers
|
||||
|
||||
def _heart_beat(self):
|
||||
while not self._stopping:
|
||||
for namespace in self._notification_map:
|
||||
self._do_heart_beat(namespace)
|
||||
time.sleep(60 * 10) # 10分钟
|
||||
|
||||
def _do_heart_beat(self, namespace):
|
||||
url = "{}/configs/{}/{}/{}?ip={}".format(self.config_url, self.app_id, self.cluster, namespace, self.ip)
|
||||
try:
|
||||
code, body = http_request(url, timeout=3, headers=self._sign_headers(url))
|
||||
if code == 200:
|
||||
if not body:
|
||||
logger.error(f"_do_heart_beat load configs failed,body is {body}")
|
||||
return None
|
||||
data = json.loads(body)
|
||||
if self.last_release_key == data["releaseKey"]:
|
||||
return None
|
||||
self.last_release_key = data["releaseKey"]
|
||||
data = data["configurations"]
|
||||
self._update_cache_and_file(data, namespace)
|
||||
else:
|
||||
return None
|
||||
except Exception:
|
||||
logger.exception("an error occurred in _do_heart_beat")
|
||||
return None
|
||||
|
||||
def get_all_dicts(self, namespace):
|
||||
namespace_data = self._cache.get(namespace)
|
||||
if namespace_data is None:
|
||||
net_namespace_data = self.get_json_from_net(namespace)
|
||||
if not net_namespace_data:
|
||||
return namespace_data
|
||||
namespace_data = net_namespace_data.get(CONFIGURATIONS)
|
||||
if namespace_data:
|
||||
self._update_cache_and_file(namespace_data, namespace)
|
||||
return namespace_data
|
41
api/configs/remote_settings_sources/apollo/python_3x.py
Normal file
41
api/configs/remote_settings_sources/apollo/python_3x.py
Normal file
@ -0,0 +1,41 @@
|
||||
import logging
|
||||
import os
|
||||
import ssl
|
||||
import urllib.request
|
||||
from urllib import parse
|
||||
from urllib.error import HTTPError
|
||||
|
||||
# Create an SSL context that allows for a lower level of security
|
||||
ssl_context = ssl.create_default_context()
|
||||
ssl_context.set_ciphers("HIGH:!DH:!aNULL")
|
||||
ssl_context.check_hostname = False
|
||||
ssl_context.verify_mode = ssl.CERT_NONE
|
||||
|
||||
# Create an opener object and pass in a custom SSL context
|
||||
opener = urllib.request.build_opener(urllib.request.HTTPSHandler(context=ssl_context))
|
||||
|
||||
urllib.request.install_opener(opener)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def http_request(url, timeout, headers={}):
|
||||
try:
|
||||
request = urllib.request.Request(url, headers=headers)
|
||||
res = urllib.request.urlopen(request, timeout=timeout)
|
||||
body = res.read().decode("utf-8")
|
||||
return res.code, body
|
||||
except HTTPError as e:
|
||||
if e.code == 304:
|
||||
logger.warning("http_request error,code is 304, maybe you should check secret")
|
||||
return 304, None
|
||||
logger.warning("http_request error,code is %d, msg is %s", e.code, e.msg)
|
||||
raise e
|
||||
|
||||
|
||||
def url_encode(params):
|
||||
return parse.urlencode(params)
|
||||
|
||||
|
||||
def makedirs_wrapper(path):
|
||||
os.makedirs(path, exist_ok=True)
|
51
api/configs/remote_settings_sources/apollo/utils.py
Normal file
51
api/configs/remote_settings_sources/apollo/utils.py
Normal file
@ -0,0 +1,51 @@
|
||||
import hashlib
|
||||
import socket
|
||||
|
||||
from .python_3x import url_encode
|
||||
|
||||
# define constants
|
||||
CONFIGURATIONS = "configurations"
|
||||
NOTIFICATION_ID = "notificationId"
|
||||
NAMESPACE_NAME = "namespaceName"
|
||||
|
||||
|
||||
# add timestamps uris and keys
|
||||
def signature(timestamp, uri, secret):
|
||||
import base64
|
||||
import hmac
|
||||
|
||||
string_to_sign = "" + timestamp + "\n" + uri
|
||||
hmac_code = hmac.new(secret.encode(), string_to_sign.encode(), hashlib.sha1).digest()
|
||||
return base64.b64encode(hmac_code).decode()
|
||||
|
||||
|
||||
def url_encode_wrapper(params):
|
||||
return url_encode(params)
|
||||
|
||||
|
||||
def no_key_cache_key(namespace, key):
|
||||
return "{}{}{}".format(namespace, len(namespace), key)
|
||||
|
||||
|
||||
# Returns whether the obtained value is obtained, and None if it does not
|
||||
def get_value_from_dict(namespace_cache, key):
|
||||
if namespace_cache:
|
||||
kv_data = namespace_cache.get(CONFIGURATIONS)
|
||||
if kv_data is None:
|
||||
return None
|
||||
if key in kv_data:
|
||||
return kv_data[key]
|
||||
return None
|
||||
|
||||
|
||||
def init_ip():
|
||||
ip = ""
|
||||
s = None
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 53))
|
||||
ip = s.getsockname()[0]
|
||||
finally:
|
||||
if s:
|
||||
s.close()
|
||||
return ip
|
15
api/configs/remote_settings_sources/base.py
Normal file
15
api/configs/remote_settings_sources/base.py
Normal file
@ -0,0 +1,15 @@
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from pydantic.fields import FieldInfo
|
||||
|
||||
|
||||
class RemoteSettingsSource:
|
||||
def __init__(self, configs: Mapping[str, Any]):
|
||||
pass
|
||||
|
||||
def get_field_value(self, field: FieldInfo, field_name: str) -> tuple[Any, str, bool]:
|
||||
raise NotImplementedError
|
||||
|
||||
def prepare_field_value(self, field_name: str, field: FieldInfo, value: Any, value_is_complex: bool) -> Any:
|
||||
return value
|
5
api/configs/remote_settings_sources/enums.py
Normal file
5
api/configs/remote_settings_sources/enums.py
Normal file
@ -0,0 +1,5 @@
|
||||
from enum import StrEnum
|
||||
|
||||
|
||||
class RemoteSettingsSourceName(StrEnum):
|
||||
APOLLO = "apollo"
|
@ -14,11 +14,11 @@ AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
|
||||
|
||||
|
||||
if dify_config.ETL_TYPE == "Unstructured":
|
||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls"]
|
||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls"]
|
||||
DOCUMENT_EXTENSIONS.extend(("docx", "csv", "eml", "msg", "pptx", "xml", "epub"))
|
||||
if dify_config.UNSTRUCTURED_API_URL:
|
||||
DOCUMENT_EXTENSIONS.append("ppt")
|
||||
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
||||
else:
|
||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
|
||||
DOCUMENT_EXTENSIONS = ["txt", "markdown", "md", "mdx", "pdf", "html", "htm", "xlsx", "xls", "docx", "csv"]
|
||||
DOCUMENT_EXTENSIONS.extend([ext.upper() for ext in DOCUMENT_EXTENSIONS])
|
||||
|
@ -42,10 +42,10 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True] = True,
|
||||
) -> Generator[Mapping | str, None, None]: ...
|
||||
streaming: Literal[False],
|
||||
) -> Mapping[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
@ -55,8 +55,8 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator):
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[False] = False,
|
||||
) -> Mapping: ...
|
||||
streaming: Literal[True],
|
||||
) -> Generator[Mapping | str, None, None]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
|
@ -20,6 +20,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueMessageReplaceEvent,
|
||||
QueueNodeExceptionEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeInIterationFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -32,6 +33,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueStopEvent,
|
||||
QueueTextChunkEvent,
|
||||
QueueWorkflowFailedEvent,
|
||||
QueueWorkflowPartialSuccessEvent,
|
||||
QueueWorkflowStartedEvent,
|
||||
QueueWorkflowSucceededEvent,
|
||||
)
|
||||
@ -129,7 +131,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
|
||||
self._conversation_name_generate_thread = None
|
||||
self._recorded_files: list[Mapping[str, Any]] = []
|
||||
self.total_tokens: int = 0
|
||||
|
||||
def process(self):
|
||||
"""
|
||||
@ -320,7 +321,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
|
||||
if response:
|
||||
yield response
|
||||
elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent):
|
||||
elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
|
||||
workflow_node_execution = self._handle_workflow_node_execution_failed(event)
|
||||
|
||||
response = self._workflow_node_finish_to_stream_response(
|
||||
@ -363,8 +364,6 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
if not workflow_run:
|
||||
raise Exception("Workflow run not initialized.")
|
||||
|
||||
# FIXME for issue #11221 quick fix maybe have a better solution
|
||||
self.total_tokens += event.metadata.get("total_tokens", 0) if event.metadata else 0
|
||||
yield self._workflow_iteration_completed_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
|
||||
)
|
||||
@ -378,7 +377,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
workflow_run = self._handle_workflow_run_success(
|
||||
workflow_run=workflow_run,
|
||||
start_at=graph_runtime_state.start_at,
|
||||
total_tokens=graph_runtime_state.total_tokens or self.total_tokens,
|
||||
total_tokens=graph_runtime_state.total_tokens,
|
||||
total_steps=graph_runtime_state.node_run_steps,
|
||||
outputs=event.outputs,
|
||||
conversation_id=self._conversation.id,
|
||||
@ -389,6 +388,29 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
|
||||
)
|
||||
|
||||
self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
|
||||
elif isinstance(event, QueueWorkflowPartialSuccessEvent):
|
||||
if not workflow_run:
|
||||
raise Exception("Workflow run not initialized.")
|
||||
|
||||
if not graph_runtime_state:
|
||||
raise Exception("Graph runtime state not initialized.")
|
||||
|
||||
workflow_run = self._handle_workflow_run_partial_success(
|
||||
workflow_run=workflow_run,
|
||||
start_at=graph_runtime_state.start_at,
|
||||
total_tokens=graph_runtime_state.total_tokens,
|
||||
total_steps=graph_runtime_state.node_run_steps,
|
||||
outputs=event.outputs,
|
||||
exceptions_count=event.exceptions_count,
|
||||
conversation_id=None,
|
||||
trace_manager=trace_manager,
|
||||
)
|
||||
|
||||
yield self._workflow_finish_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
|
||||
)
|
||||
|
||||
self._queue_manager.publish(QueueAdvancedChatMessageEndEvent(), PublishFrom.TASK_PIPELINE)
|
||||
elif isinstance(event, QueueWorkflowFailedEvent):
|
||||
if not workflow_run:
|
||||
@ -406,6 +428,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||
error=event.error,
|
||||
conversation_id=self._conversation.id,
|
||||
trace_manager=trace_manager,
|
||||
exceptions_count=event.exceptions_count,
|
||||
)
|
||||
|
||||
yield self._workflow_finish_to_stream_response(
|
||||
|
@ -37,31 +37,39 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True] = True,
|
||||
streaming: Literal[False],
|
||||
) -> Mapping[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
*,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True],
|
||||
) -> Generator[Mapping | str, None, None]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
*,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[False] = False,
|
||||
) -> Mapping: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool,
|
||||
) -> Union[Mapping, Generator[Mapping | str, None, None]]: ...
|
||||
|
||||
def generate(
|
||||
self, app_model: App, user: Union[Account, EndUser], args: Any, invoke_from: InvokeFrom, streaming: bool = True
|
||||
self,
|
||||
*,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool = True,
|
||||
) -> Union[Mapping, Generator[Mapping | str, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
@ -90,7 +98,7 @@ class AgentChatAppGenerator(MessageBasedAppGenerator):
|
||||
# get conversation
|
||||
conversation = None
|
||||
if args.get("conversation_id"):
|
||||
conversation = self._get_conversation_by_user(app_model, args.get("conversation_id"), user)
|
||||
conversation = self._get_conversation_by_user(app_model, args.get("conversation_id", ""), user)
|
||||
|
||||
# get app model config
|
||||
app_model_config = self._get_app_model_config(app_model=app_model, conversation=conversation)
|
||||
|
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
import threading
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Generator, Mapping
|
||||
from typing import Any, Literal, Union, overload
|
||||
|
||||
from flask import Flask, current_app
|
||||
@ -34,39 +34,39 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Any,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True] = True,
|
||||
) -> Generator[dict | str, None, None]: ...
|
||||
streaming: Literal[True],
|
||||
) -> Generator[Mapping | str, None, None]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Any,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[False] = False,
|
||||
) -> dict: ...
|
||||
streaming: Literal[False],
|
||||
) -> Mapping[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Any,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool = False,
|
||||
) -> Union[dict, Generator[dict | str, None, None]]: ...
|
||||
streaming: bool,
|
||||
) -> Union[Mapping[str, Any], Generator[Mapping[str, Any] | str, None, None]]: ...
|
||||
|
||||
def generate(
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Any,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool = True,
|
||||
) -> Union[dict, Generator[dict | str, None, None]]:
|
||||
) -> Union[Mapping[str, Any], Generator[Mapping[str, Any] | str, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
|
||||
@ -91,7 +91,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
# get conversation
|
||||
conversation = None
|
||||
if args.get("conversation_id"):
|
||||
conversation = self._get_conversation_by_user(app_model, args.get("conversation_id"), user)
|
||||
conversation = self._get_conversation_by_user(app_model, args.get("conversation_id", ""), user)
|
||||
|
||||
# get app model config
|
||||
app_model_config = self._get_app_model_config(app_model=app_model, conversation=conversation)
|
||||
@ -104,7 +104,7 @@ class ChatAppGenerator(MessageBasedAppGenerator):
|
||||
|
||||
# validate config
|
||||
override_model_config_dict = ChatAppConfigManager.config_validate(
|
||||
tenant_id=app_model.tenant_id, config=args.get("model_config")
|
||||
tenant_id=app_model.tenant_id, config=args.get("model_config", {})
|
||||
)
|
||||
|
||||
# always enable retriever resource in debugger mode
|
||||
|
@ -1,8 +1,8 @@
|
||||
import logging
|
||||
import threading
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from typing import Any, Literal, Mapping, Union, overload
|
||||
from collections.abc import Generator, Mapping
|
||||
from typing import Any, Literal, Union, overload
|
||||
|
||||
from flask import Flask, current_app
|
||||
from pydantic import ValidationError
|
||||
@ -34,9 +34,9 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True] = True,
|
||||
streaming: Literal[True],
|
||||
) -> Generator[str, None, None]: ...
|
||||
|
||||
@overload
|
||||
@ -44,24 +44,29 @@ class CompletionAppGenerator(MessageBasedAppGenerator):
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[False] = False,
|
||||
) -> Mapping: ...
|
||||
streaming: Literal[False],
|
||||
) -> Mapping[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool = False,
|
||||
) -> Mapping | Generator[str, None, None]: ...
|
||||
) -> Union[Mapping[str, Any], Generator[str, None, None]]: ...
|
||||
|
||||
def generate(
|
||||
self, app_model: App, user: Union[Account, EndUser], args: Any, invoke_from: InvokeFrom, streaming: bool = True
|
||||
) -> Union[Mapping, Generator[str, None, None]]:
|
||||
self,
|
||||
app_model: App,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool = True,
|
||||
) -> Union[Mapping[str, Any], Generator[str, None, None]]:
|
||||
"""
|
||||
Generate App response.
|
||||
|
||||
|
@ -37,11 +37,11 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[True] = True,
|
||||
call_depth: int = 0,
|
||||
workflow_thread_pool_id: Optional[str] = None,
|
||||
streaming: Literal[True],
|
||||
call_depth: int,
|
||||
workflow_thread_pool_id: Optional[str],
|
||||
) -> Generator[Mapping | str, None, None]: ...
|
||||
|
||||
@overload
|
||||
@ -50,12 +50,12 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: Literal[False] = False,
|
||||
call_depth: int = 0,
|
||||
workflow_thread_pool_id: Optional[str] = None,
|
||||
) -> Mapping: ...
|
||||
streaming: Literal[False],
|
||||
call_depth: int,
|
||||
workflow_thread_pool_id: Optional[str],
|
||||
) -> Mapping[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def generate(
|
||||
@ -63,12 +63,12 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
app_model: App,
|
||||
workflow: Workflow,
|
||||
user: Union[Account, EndUser],
|
||||
args: Mapping,
|
||||
args: Mapping[str, Any],
|
||||
invoke_from: InvokeFrom,
|
||||
streaming: bool,
|
||||
call_depth: int = 0,
|
||||
workflow_thread_pool_id: Optional[str] = None,
|
||||
) -> Mapping | Generator[Mapping | str, None, None]: ...
|
||||
call_depth: int,
|
||||
workflow_thread_pool_id: Optional[str],
|
||||
) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]: ...
|
||||
|
||||
def generate(
|
||||
self,
|
||||
@ -80,7 +80,7 @@ class WorkflowAppGenerator(BaseAppGenerator):
|
||||
streaming: bool = True,
|
||||
call_depth: int = 0,
|
||||
workflow_thread_pool_id: Optional[str] = None,
|
||||
) -> Mapping | Generator[Mapping | str, None, None]:
|
||||
) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None]]:
|
||||
files: Sequence[Mapping[str, Any]] = args.get("files") or []
|
||||
|
||||
# parse files
|
||||
|
@ -6,6 +6,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueMessageEndEvent,
|
||||
QueueStopEvent,
|
||||
QueueWorkflowFailedEvent,
|
||||
QueueWorkflowPartialSuccessEvent,
|
||||
QueueWorkflowSucceededEvent,
|
||||
WorkflowQueueMessage,
|
||||
)
|
||||
@ -34,7 +35,8 @@ class WorkflowAppQueueManager(AppQueueManager):
|
||||
| QueueErrorEvent
|
||||
| QueueMessageEndEvent
|
||||
| QueueWorkflowSucceededEvent
|
||||
| QueueWorkflowFailedEvent,
|
||||
| QueueWorkflowFailedEvent
|
||||
| QueueWorkflowPartialSuccessEvent,
|
||||
):
|
||||
self.stop_listen()
|
||||
|
||||
|
@ -16,6 +16,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueNodeExceptionEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeInIterationFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -27,6 +28,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueStopEvent,
|
||||
QueueTextChunkEvent,
|
||||
QueueWorkflowFailedEvent,
|
||||
QueueWorkflowPartialSuccessEvent,
|
||||
QueueWorkflowStartedEvent,
|
||||
QueueWorkflowSucceededEvent,
|
||||
)
|
||||
@ -260,36 +262,36 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
|
||||
workflow_node_execution = self._handle_node_execution_start(workflow_run=workflow_run, event=event)
|
||||
|
||||
response = self._workflow_node_start_to_stream_response(
|
||||
node_start_response = self._workflow_node_start_to_stream_response(
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
|
||||
if response:
|
||||
yield response
|
||||
if node_start_response:
|
||||
yield node_start_response
|
||||
elif isinstance(event, QueueNodeSucceededEvent):
|
||||
workflow_node_execution = self._handle_workflow_node_execution_success(event)
|
||||
|
||||
response = self._workflow_node_finish_to_stream_response(
|
||||
node_success_response = self._workflow_node_finish_to_stream_response(
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
|
||||
if response:
|
||||
yield response
|
||||
elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent):
|
||||
if node_success_response:
|
||||
yield node_success_response
|
||||
elif isinstance(event, QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent):
|
||||
workflow_node_execution = self._handle_workflow_node_execution_failed(event)
|
||||
|
||||
response = self._workflow_node_finish_to_stream_response(
|
||||
node_failed_response = self._workflow_node_finish_to_stream_response(
|
||||
event=event,
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
workflow_node_execution=workflow_node_execution,
|
||||
)
|
||||
|
||||
if response:
|
||||
yield response
|
||||
if node_failed_response:
|
||||
yield node_failed_response
|
||||
elif isinstance(event, QueueParallelBranchRunStartedEvent):
|
||||
if not workflow_run:
|
||||
raise Exception("Workflow run not initialized.")
|
||||
@ -322,8 +324,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
if not workflow_run:
|
||||
raise Exception("Workflow run not initialized.")
|
||||
|
||||
# FIXME for issue #11221 quick fix maybe have a better solution
|
||||
self.total_tokens += event.metadata.get("total_tokens", 0) if event.metadata else 0
|
||||
yield self._workflow_iteration_completed_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run, event=event
|
||||
)
|
||||
@ -337,7 +337,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
workflow_run = self._handle_workflow_run_success(
|
||||
workflow_run=workflow_run,
|
||||
start_at=graph_runtime_state.start_at,
|
||||
total_tokens=graph_runtime_state.total_tokens or self.total_tokens,
|
||||
total_tokens=graph_runtime_state.total_tokens,
|
||||
total_steps=graph_runtime_state.node_run_steps,
|
||||
outputs=event.outputs,
|
||||
conversation_id=None,
|
||||
@ -347,6 +347,30 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
# save workflow app log
|
||||
self._save_workflow_app_log(workflow_run)
|
||||
|
||||
yield self._workflow_finish_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
|
||||
)
|
||||
elif isinstance(event, QueueWorkflowPartialSuccessEvent):
|
||||
if not workflow_run:
|
||||
raise Exception("Workflow run not initialized.")
|
||||
|
||||
if not graph_runtime_state:
|
||||
raise Exception("Graph runtime state not initialized.")
|
||||
|
||||
workflow_run = self._handle_workflow_run_partial_success(
|
||||
workflow_run=workflow_run,
|
||||
start_at=graph_runtime_state.start_at,
|
||||
total_tokens=graph_runtime_state.total_tokens,
|
||||
total_steps=graph_runtime_state.node_run_steps,
|
||||
outputs=event.outputs,
|
||||
exceptions_count=event.exceptions_count,
|
||||
conversation_id=None,
|
||||
trace_manager=trace_manager,
|
||||
)
|
||||
|
||||
# save workflow app log
|
||||
self._save_workflow_app_log(workflow_run)
|
||||
|
||||
yield self._workflow_finish_to_stream_response(
|
||||
task_id=self._application_generate_entity.task_id, workflow_run=workflow_run
|
||||
)
|
||||
@ -356,7 +380,6 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
|
||||
if not graph_runtime_state:
|
||||
raise Exception("Graph runtime state not initialized.")
|
||||
|
||||
workflow_run = self._handle_workflow_run_failed(
|
||||
workflow_run=workflow_run,
|
||||
start_at=graph_runtime_state.start_at,
|
||||
@ -368,6 +391,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||
error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(),
|
||||
conversation_id=None,
|
||||
trace_manager=trace_manager,
|
||||
exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0,
|
||||
)
|
||||
|
||||
# save workflow app log
|
||||
|
@ -9,6 +9,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueNodeExceptionEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeInIterationFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -19,6 +20,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueRetrieverResourcesEvent,
|
||||
QueueTextChunkEvent,
|
||||
QueueWorkflowFailedEvent,
|
||||
QueueWorkflowPartialSuccessEvent,
|
||||
QueueWorkflowStartedEvent,
|
||||
QueueWorkflowSucceededEvent,
|
||||
)
|
||||
@ -27,6 +29,7 @@ from core.workflow.graph_engine.entities.event import (
|
||||
AgentLogEvent,
|
||||
GraphEngineEvent,
|
||||
GraphRunFailedEvent,
|
||||
GraphRunPartialSucceededEvent,
|
||||
GraphRunStartedEvent,
|
||||
GraphRunSucceededEvent,
|
||||
IterationRunFailedEvent,
|
||||
@ -34,6 +37,7 @@ from core.workflow.graph_engine.entities.event import (
|
||||
IterationRunStartedEvent,
|
||||
IterationRunSucceededEvent,
|
||||
NodeInIterationFailedEvent,
|
||||
NodeRunExceptionEvent,
|
||||
NodeRunFailedEvent,
|
||||
NodeRunRetrieverResourceEvent,
|
||||
NodeRunStartedEvent,
|
||||
@ -178,8 +182,12 @@ class WorkflowBasedAppRunner(AppRunner):
|
||||
)
|
||||
elif isinstance(event, GraphRunSucceededEvent):
|
||||
self._publish_event(QueueWorkflowSucceededEvent(outputs=event.outputs))
|
||||
elif isinstance(event, GraphRunPartialSucceededEvent):
|
||||
self._publish_event(
|
||||
QueueWorkflowPartialSuccessEvent(outputs=event.outputs, exceptions_count=event.exceptions_count)
|
||||
)
|
||||
elif isinstance(event, GraphRunFailedEvent):
|
||||
self._publish_event(QueueWorkflowFailedEvent(error=event.error))
|
||||
self._publish_event(QueueWorkflowFailedEvent(error=event.error, exceptions_count=event.exceptions_count))
|
||||
elif isinstance(event, NodeRunStartedEvent):
|
||||
self._publish_event(
|
||||
QueueNodeStartedEvent(
|
||||
@ -255,6 +263,36 @@ class WorkflowBasedAppRunner(AppRunner):
|
||||
in_iteration_id=event.in_iteration_id,
|
||||
)
|
||||
)
|
||||
elif isinstance(event, NodeRunExceptionEvent):
|
||||
self._publish_event(
|
||||
QueueNodeExceptionEvent(
|
||||
node_execution_id=event.id,
|
||||
node_id=event.node_id,
|
||||
node_type=event.node_type,
|
||||
node_data=event.node_data,
|
||||
parallel_id=event.parallel_id,
|
||||
parallel_start_node_id=event.parallel_start_node_id,
|
||||
parent_parallel_id=event.parent_parallel_id,
|
||||
parent_parallel_start_node_id=event.parent_parallel_start_node_id,
|
||||
start_at=event.route_node_state.start_at,
|
||||
inputs=event.route_node_state.node_run_result.inputs
|
||||
if event.route_node_state.node_run_result
|
||||
else {},
|
||||
process_data=event.route_node_state.node_run_result.process_data
|
||||
if event.route_node_state.node_run_result
|
||||
else {},
|
||||
outputs=event.route_node_state.node_run_result.outputs
|
||||
if event.route_node_state.node_run_result
|
||||
else {},
|
||||
error=event.route_node_state.node_run_result.error
|
||||
if event.route_node_state.node_run_result and event.route_node_state.node_run_result.error
|
||||
else "Unknown error",
|
||||
execution_metadata=event.route_node_state.node_run_result.metadata
|
||||
if event.route_node_state.node_run_result
|
||||
else {},
|
||||
in_iteration_id=event.in_iteration_id,
|
||||
)
|
||||
)
|
||||
elif isinstance(event, NodeInIterationFailedEvent):
|
||||
self._publish_event(
|
||||
QueueNodeInIterationFailedEvent(
|
||||
|
@ -25,12 +25,14 @@ class QueueEvent(StrEnum):
|
||||
WORKFLOW_STARTED = "workflow_started"
|
||||
WORKFLOW_SUCCEEDED = "workflow_succeeded"
|
||||
WORKFLOW_FAILED = "workflow_failed"
|
||||
WORKFLOW_PARTIAL_SUCCEEDED = "workflow_partial_succeeded"
|
||||
ITERATION_START = "iteration_start"
|
||||
ITERATION_NEXT = "iteration_next"
|
||||
ITERATION_COMPLETED = "iteration_completed"
|
||||
NODE_STARTED = "node_started"
|
||||
NODE_SUCCEEDED = "node_succeeded"
|
||||
NODE_FAILED = "node_failed"
|
||||
NODE_EXCEPTION = "node_exception"
|
||||
RETRIEVER_RESOURCES = "retriever_resources"
|
||||
ANNOTATION_REPLY = "annotation_reply"
|
||||
AGENT_THOUGHT = "agent_thought"
|
||||
@ -238,6 +240,17 @@ class QueueWorkflowFailedEvent(AppQueueEvent):
|
||||
|
||||
event: QueueEvent = QueueEvent.WORKFLOW_FAILED
|
||||
error: str
|
||||
exceptions_count: int
|
||||
|
||||
|
||||
class QueueWorkflowPartialSuccessEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueWorkflowFailedEvent entity
|
||||
"""
|
||||
|
||||
event: QueueEvent = QueueEvent.WORKFLOW_PARTIAL_SUCCEEDED
|
||||
exceptions_count: int
|
||||
outputs: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class QueueNodeStartedEvent(AppQueueEvent):
|
||||
@ -347,6 +360,37 @@ class QueueNodeInIterationFailedEvent(AppQueueEvent):
|
||||
error: str
|
||||
|
||||
|
||||
class QueueNodeExceptionEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueNodeExceptionEvent entity
|
||||
"""
|
||||
|
||||
event: QueueEvent = QueueEvent.NODE_EXCEPTION
|
||||
|
||||
node_execution_id: str
|
||||
node_id: str
|
||||
node_type: NodeType
|
||||
node_data: BaseNodeData
|
||||
parallel_id: Optional[str] = None
|
||||
"""parallel id if node is in parallel"""
|
||||
parallel_start_node_id: Optional[str] = None
|
||||
"""parallel start node id if node is in parallel"""
|
||||
parent_parallel_id: Optional[str] = None
|
||||
"""parent parallel id if node is in parallel"""
|
||||
parent_parallel_start_node_id: Optional[str] = None
|
||||
"""parent parallel start node id if node is in parallel"""
|
||||
in_iteration_id: Optional[str] = None
|
||||
"""iteration id if node is in iteration"""
|
||||
start_at: datetime
|
||||
|
||||
inputs: Optional[dict[str, Any]] = None
|
||||
process_data: Optional[dict[str, Any]] = None
|
||||
outputs: Optional[dict[str, Any]] = None
|
||||
execution_metadata: Optional[dict[NodeRunMetadataKey, Any]] = None
|
||||
|
||||
error: str
|
||||
|
||||
|
||||
class QueueNodeFailedEvent(AppQueueEvent):
|
||||
"""
|
||||
QueueNodeFailedEvent entity
|
||||
|
@ -214,6 +214,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
|
||||
created_by: Optional[dict] = None
|
||||
created_at: int
|
||||
finished_at: int
|
||||
exceptions_count: Optional[int] = 0
|
||||
files: Optional[Sequence[Mapping[str, Any]]] = []
|
||||
|
||||
event: StreamEvent = StreamEvent.WORKFLOW_FINISHED
|
||||
|
@ -110,7 +110,7 @@ class RateLimitGenerator:
|
||||
raise StopIteration
|
||||
try:
|
||||
return next(self.generator)
|
||||
except StopIteration:
|
||||
except Exception:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
|
@ -13,6 +13,7 @@ from core.app.entities.queue_entities import (
|
||||
QueueIterationCompletedEvent,
|
||||
QueueIterationNextEvent,
|
||||
QueueIterationStartEvent,
|
||||
QueueNodeExceptionEvent,
|
||||
QueueNodeFailedEvent,
|
||||
QueueNodeInIterationFailedEvent,
|
||||
QueueNodeStartedEvent,
|
||||
@ -167,6 +168,55 @@ class WorkflowCycleManage:
|
||||
|
||||
return workflow_run
|
||||
|
||||
def _handle_workflow_run_partial_success(
|
||||
self,
|
||||
workflow_run: WorkflowRun,
|
||||
start_at: float,
|
||||
total_tokens: int,
|
||||
total_steps: int,
|
||||
outputs: Mapping[str, Any] | None = None,
|
||||
exceptions_count: int = 0,
|
||||
conversation_id: Optional[str] = None,
|
||||
trace_manager: Optional[TraceQueueManager] = None,
|
||||
) -> WorkflowRun:
|
||||
"""
|
||||
Workflow run success
|
||||
:param workflow_run: workflow run
|
||||
:param start_at: start time
|
||||
:param total_tokens: total tokens
|
||||
:param total_steps: total steps
|
||||
:param outputs: outputs
|
||||
:param conversation_id: conversation id
|
||||
:return:
|
||||
"""
|
||||
workflow_run = self._refetch_workflow_run(workflow_run.id)
|
||||
|
||||
outputs = WorkflowEntry.handle_special_values(outputs)
|
||||
|
||||
workflow_run.status = WorkflowRunStatus.PARTIAL_SUCCESSED.value
|
||||
workflow_run.outputs = json.dumps(outputs or {})
|
||||
workflow_run.elapsed_time = time.perf_counter() - start_at
|
||||
workflow_run.total_tokens = total_tokens
|
||||
workflow_run.total_steps = total_steps
|
||||
workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
workflow_run.exceptions_count = exceptions_count
|
||||
db.session.commit()
|
||||
db.session.refresh(workflow_run)
|
||||
|
||||
if trace_manager:
|
||||
trace_manager.add_trace_task(
|
||||
TraceTask(
|
||||
TraceTaskName.WORKFLOW_TRACE,
|
||||
workflow_run=workflow_run,
|
||||
conversation_id=conversation_id,
|
||||
user_id=trace_manager.user_id,
|
||||
)
|
||||
)
|
||||
|
||||
db.session.close()
|
||||
|
||||
return workflow_run
|
||||
|
||||
def _handle_workflow_run_failed(
|
||||
self,
|
||||
workflow_run: WorkflowRun,
|
||||
@ -177,6 +227,7 @@ class WorkflowCycleManage:
|
||||
error: str,
|
||||
conversation_id: Optional[str] = None,
|
||||
trace_manager: Optional[TraceQueueManager] = None,
|
||||
exceptions_count: int = 0,
|
||||
) -> WorkflowRun:
|
||||
"""
|
||||
Workflow run failed
|
||||
@ -196,7 +247,7 @@ class WorkflowCycleManage:
|
||||
workflow_run.total_tokens = total_tokens
|
||||
workflow_run.total_steps = total_steps
|
||||
workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
|
||||
workflow_run.exceptions_count = exceptions_count
|
||||
db.session.commit()
|
||||
|
||||
running_workflow_node_executions = (
|
||||
@ -328,7 +379,7 @@ class WorkflowCycleManage:
|
||||
return workflow_node_execution
|
||||
|
||||
def _handle_workflow_node_execution_failed(
|
||||
self, event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent
|
||||
self, event: QueueNodeFailedEvent | QueueNodeInIterationFailedEvent | QueueNodeExceptionEvent
|
||||
) -> WorkflowNodeExecution:
|
||||
"""
|
||||
Workflow node execution failed
|
||||
@ -354,7 +405,11 @@ class WorkflowCycleManage:
|
||||
execution_metadata = json.dumps(jsonable_encoder(execution_metadata_dict)) if execution_metadata_dict else None
|
||||
db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution.id).update(
|
||||
{
|
||||
WorkflowNodeExecution.status: WorkflowNodeExecutionStatus.FAILED.value,
|
||||
WorkflowNodeExecution.status: (
|
||||
WorkflowNodeExecutionStatus.FAILED.value
|
||||
if not isinstance(event, QueueNodeExceptionEvent)
|
||||
else WorkflowNodeExecutionStatus.EXCEPTION.value
|
||||
),
|
||||
WorkflowNodeExecution.error: event.error,
|
||||
WorkflowNodeExecution.inputs: json.dumps(inputs) if inputs else None,
|
||||
WorkflowNodeExecution.process_data: json.dumps(process_data) if process_data else None,
|
||||
@ -368,8 +423,11 @@ class WorkflowCycleManage:
|
||||
db.session.commit()
|
||||
db.session.close()
|
||||
process_data = WorkflowEntry.handle_special_values(event.process_data)
|
||||
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
|
||||
workflow_node_execution.status = (
|
||||
WorkflowNodeExecutionStatus.FAILED.value
|
||||
if not isinstance(event, QueueNodeExceptionEvent)
|
||||
else WorkflowNodeExecutionStatus.EXCEPTION.value
|
||||
)
|
||||
workflow_node_execution.error = event.error
|
||||
workflow_node_execution.inputs = json.dumps(inputs) if inputs else None
|
||||
workflow_node_execution.process_data = json.dumps(process_data) if process_data else None
|
||||
@ -450,6 +508,7 @@ class WorkflowCycleManage:
|
||||
created_at=int(workflow_run.created_at.timestamp()),
|
||||
finished_at=int(workflow_run.finished_at.timestamp()),
|
||||
files=self._fetch_files_from_node_outputs(dict(workflow_run.outputs_dict)),
|
||||
exceptions_count=workflow_run.exceptions_count,
|
||||
),
|
||||
)
|
||||
|
||||
@ -500,7 +559,10 @@ class WorkflowCycleManage:
|
||||
|
||||
def _workflow_node_finish_to_stream_response(
|
||||
self,
|
||||
event: QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeInIterationFailedEvent,
|
||||
event: QueueNodeSucceededEvent
|
||||
| QueueNodeFailedEvent
|
||||
| QueueNodeInIterationFailedEvent
|
||||
| QueueNodeExceptionEvent,
|
||||
task_id: str,
|
||||
workflow_node_execution: WorkflowNodeExecution,
|
||||
) -> Optional[NodeFinishStreamResponse]:
|
||||
|
@ -141,7 +141,7 @@ def _to_url(f: File, /):
|
||||
elif f.transfer_method == FileTransferMethod.LOCAL_FILE:
|
||||
if f.related_id is None:
|
||||
raise ValueError("Missing file related_id")
|
||||
return helpers.get_signed_file_url(upload_file_id=f.related_id)
|
||||
return f.remote_url or helpers.get_signed_file_url(upload_file_id=f.related_id)
|
||||
elif f.transfer_method == FileTransferMethod.TOOL_FILE:
|
||||
# add sign url
|
||||
if f.related_id is None or f.extension is None:
|
||||
|
@ -24,6 +24,12 @@ BACKOFF_FACTOR = 0.5
|
||||
STATUS_FORCELIST = [429, 500, 502, 503, 504]
|
||||
|
||||
|
||||
class MaxRetriesExceededError(Exception):
|
||||
"""Raised when the maximum number of retries is exceeded."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
if "allow_redirects" in kwargs:
|
||||
allow_redirects = kwargs.pop("allow_redirects")
|
||||
@ -64,7 +70,7 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
if retries <= max_retries:
|
||||
time.sleep(BACKOFF_FACTOR * (2 ** (retries - 1)))
|
||||
|
||||
raise Exception(f"Reached maximum retries ({max_retries}) for URL {url}")
|
||||
raise MaxRetriesExceededError(f"Reached maximum retries ({max_retries}) for URL {url}")
|
||||
|
||||
|
||||
def get(url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
|
||||
|
@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -0,0 +1,39 @@
|
||||
model: gemini-2.0-flash-exp
|
||||
label:
|
||||
en_US: Gemini 2.0 Flash Exp
|
||||
model_type: llm
|
||||
features:
|
||||
- agent-thought
|
||||
- vision
|
||||
- tool-call
|
||||
- stream-tool-call
|
||||
- document
|
||||
model_properties:
|
||||
mode: chat
|
||||
context_size: 1048576
|
||||
parameter_rules:
|
||||
- name: temperature
|
||||
use_template: temperature
|
||||
- name: top_p
|
||||
use_template: top_p
|
||||
- name: top_k
|
||||
label:
|
||||
zh_Hans: 取样数量
|
||||
en_US: Top k
|
||||
type: int
|
||||
help:
|
||||
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
|
||||
en_US: Only sample from the top K options for each subsequent token.
|
||||
required: false
|
||||
- name: max_output_tokens
|
||||
use_template: max_tokens
|
||||
default: 8192
|
||||
min: 1
|
||||
max: 8192
|
||||
- name: json_schema
|
||||
use_template: json_schema
|
||||
pricing:
|
||||
input: '0.00'
|
||||
output: '0.00'
|
||||
unit: '0.000001'
|
||||
currency: USD
|
@ -4,7 +4,7 @@ import os
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional
|
||||
|
||||
from langfuse import Langfuse
|
||||
from langfuse import Langfuse # type: ignore
|
||||
|
||||
from core.ops.base_trace_instance import BaseTraceInstance
|
||||
from core.ops.entities.config_entity import LangfuseConfig
|
||||
@ -65,8 +65,11 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
trace_id = trace_info.workflow_app_log_id or trace_info.workflow_run_id
|
||||
trace_id = trace_info.workflow_run_id
|
||||
user_id = trace_info.metadata.get("user_id")
|
||||
metadata = trace_info.metadata
|
||||
metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id
|
||||
|
||||
if trace_info.message_id:
|
||||
trace_id = trace_info.message_id
|
||||
name = TraceTaskName.MESSAGE_TRACE.value
|
||||
@ -76,22 +79,20 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
name=name,
|
||||
input=trace_info.workflow_run_inputs,
|
||||
output=trace_info.workflow_run_outputs,
|
||||
metadata=trace_info.metadata,
|
||||
metadata=metadata,
|
||||
session_id=trace_info.conversation_id,
|
||||
tags=["message", "workflow"],
|
||||
created_at=trace_info.start_time,
|
||||
updated_at=trace_info.end_time,
|
||||
)
|
||||
self.add_trace(langfuse_trace_data=trace_data)
|
||||
workflow_span_data = LangfuseSpan(
|
||||
id=(trace_info.workflow_app_log_id or trace_info.workflow_run_id),
|
||||
id=trace_info.workflow_run_id,
|
||||
name=TraceTaskName.WORKFLOW_TRACE.value,
|
||||
input=trace_info.workflow_run_inputs,
|
||||
output=trace_info.workflow_run_outputs,
|
||||
trace_id=trace_id,
|
||||
start_time=trace_info.start_time,
|
||||
end_time=trace_info.end_time,
|
||||
metadata=trace_info.metadata,
|
||||
metadata=metadata,
|
||||
level=LevelEnum.DEFAULT if trace_info.error == "" else LevelEnum.ERROR,
|
||||
status_message=trace_info.error or "",
|
||||
)
|
||||
@ -103,7 +104,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
name=TraceTaskName.WORKFLOW_TRACE.value,
|
||||
input=trace_info.workflow_run_inputs,
|
||||
output=trace_info.workflow_run_outputs,
|
||||
metadata=trace_info.metadata,
|
||||
metadata=metadata,
|
||||
session_id=trace_info.conversation_id,
|
||||
tags=["workflow"],
|
||||
)
|
||||
@ -192,7 +193,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
metadata=metadata,
|
||||
level=(LevelEnum.DEFAULT if status == "succeeded" else LevelEnum.ERROR),
|
||||
status_message=trace_info.error or "",
|
||||
parent_observation_id=(trace_info.workflow_app_log_id or trace_info.workflow_run_id),
|
||||
parent_observation_id=trace_info.workflow_run_id,
|
||||
)
|
||||
else:
|
||||
span_data = LangfuseSpan(
|
||||
@ -239,11 +240,13 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
file_list = trace_info.file_list
|
||||
metadata = trace_info.metadata
|
||||
message_data = trace_info.message_data
|
||||
if message_data is None:
|
||||
return
|
||||
message_id = message_data.id
|
||||
|
||||
user_id = message_data.from_account_id
|
||||
if message_data.from_end_user_id:
|
||||
end_user_data: EndUser = (
|
||||
end_user_data: Optional[EndUser] = (
|
||||
db.session.query(EndUser).filter(EndUser.id == message_data.from_end_user_id).first()
|
||||
)
|
||||
if end_user_data is not None:
|
||||
@ -300,6 +303,8 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
self.add_generation(langfuse_generation_data)
|
||||
|
||||
def moderation_trace(self, trace_info: ModerationTraceInfo):
|
||||
if trace_info.message_data is None:
|
||||
return
|
||||
span_data = LangfuseSpan(
|
||||
name=TraceTaskName.MODERATION_TRACE.value,
|
||||
input=trace_info.inputs,
|
||||
@ -319,9 +324,11 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
|
||||
def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
|
||||
message_data = trace_info.message_data
|
||||
if message_data is None:
|
||||
return
|
||||
generation_usage = GenerationUsage(
|
||||
total=len(str(trace_info.suggested_question)),
|
||||
input=len(trace_info.inputs),
|
||||
input=len(trace_info.inputs) if trace_info.inputs else 0,
|
||||
output=len(trace_info.suggested_question),
|
||||
unit=UnitEnum.CHARACTERS,
|
||||
)
|
||||
@ -342,6 +349,8 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
self.add_generation(langfuse_generation_data=generation_data)
|
||||
|
||||
def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
|
||||
if trace_info.message_data is None:
|
||||
return
|
||||
dataset_retrieval_span_data = LangfuseSpan(
|
||||
name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
|
||||
input=trace_info.inputs,
|
||||
|
@ -62,15 +62,17 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
trace_id = trace_info.message_id or trace_info.workflow_app_log_id or trace_info.workflow_run_id
|
||||
trace_id = trace_info.message_id or trace_info.workflow_run_id
|
||||
message_dotted_order = (
|
||||
generate_dotted_order(trace_info.message_id, trace_info.start_time) if trace_info.message_id else None
|
||||
)
|
||||
workflow_dotted_order = generate_dotted_order(
|
||||
trace_info.workflow_app_log_id or trace_info.workflow_run_id,
|
||||
trace_info.workflow_run_id,
|
||||
trace_info.workflow_data.created_at,
|
||||
message_dotted_order,
|
||||
)
|
||||
metadata = trace_info.metadata
|
||||
metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id
|
||||
|
||||
if trace_info.message_id:
|
||||
message_run = LangSmithRunModel(
|
||||
@ -82,7 +84,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
start_time=trace_info.start_time,
|
||||
end_time=trace_info.end_time,
|
||||
extra={
|
||||
"metadata": trace_info.metadata,
|
||||
"metadata": metadata,
|
||||
},
|
||||
tags=["message", "workflow"],
|
||||
error=trace_info.error,
|
||||
@ -94,7 +96,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
langsmith_run = LangSmithRunModel(
|
||||
file_list=trace_info.file_list,
|
||||
total_tokens=trace_info.total_tokens,
|
||||
id=trace_info.workflow_app_log_id or trace_info.workflow_run_id,
|
||||
id=trace_info.workflow_run_id,
|
||||
name=TraceTaskName.WORKFLOW_TRACE.value,
|
||||
inputs=trace_info.workflow_run_inputs,
|
||||
run_type=LangSmithRunType.tool,
|
||||
@ -102,7 +104,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
end_time=trace_info.workflow_data.finished_at,
|
||||
outputs=trace_info.workflow_run_outputs,
|
||||
extra={
|
||||
"metadata": trace_info.metadata,
|
||||
"metadata": metadata,
|
||||
},
|
||||
error=trace_info.error,
|
||||
tags=["workflow"],
|
||||
@ -204,7 +206,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
extra={
|
||||
"metadata": metadata,
|
||||
},
|
||||
parent_run_id=trace_info.workflow_app_log_id or trace_info.workflow_run_id,
|
||||
parent_run_id=trace_info.workflow_run_id,
|
||||
tags=["node_execution"],
|
||||
id=node_execution_id,
|
||||
trace_id=trace_id,
|
||||
|
@ -1,13 +1,10 @@
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
from collections.abc import Iterable
|
||||
from typing import Any, Optional
|
||||
|
||||
from opensearchpy import OpenSearch
|
||||
from opensearchpy.helpers import bulk
|
||||
from pydantic import BaseModel, model_validator
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.datasource.vdb.field import Field
|
||||
@ -23,11 +20,15 @@ logger = logging.getLogger(__name__)
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logging.getLogger("lindorm").setLevel(logging.WARN)
|
||||
|
||||
ROUTING_FIELD = "routing_field"
|
||||
UGC_INDEX_PREFIX = "ugc_index"
|
||||
|
||||
|
||||
class LindormVectorStoreConfig(BaseModel):
|
||||
hosts: str
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
using_ugc: Optional[bool] = False
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
@ -41,9 +42,7 @@ class LindormVectorStoreConfig(BaseModel):
|
||||
return values
|
||||
|
||||
def to_opensearch_params(self) -> dict[str, Any]:
|
||||
params = {
|
||||
"hosts": self.hosts,
|
||||
}
|
||||
params = {"hosts": self.hosts}
|
||||
if self.username and self.password:
|
||||
params["http_auth"] = (self.username, self.password)
|
||||
return params
|
||||
@ -51,9 +50,21 @@ class LindormVectorStoreConfig(BaseModel):
|
||||
|
||||
class LindormVectorStore(BaseVector):
|
||||
def __init__(self, collection_name: str, config: LindormVectorStoreConfig, **kwargs):
|
||||
super().__init__(collection_name.lower())
|
||||
self._routing = None
|
||||
self._routing_field = None
|
||||
if config.using_ugc:
|
||||
routing_value: str = kwargs.get("routing_value")
|
||||
if routing_value is None:
|
||||
raise ValueError("UGC index should init vector with valid 'routing_value' parameter value")
|
||||
self._routing = routing_value.lower()
|
||||
self._routing_field = ROUTING_FIELD
|
||||
ugc_index_name = collection_name
|
||||
super().__init__(ugc_index_name.lower())
|
||||
else:
|
||||
super().__init__(collection_name.lower())
|
||||
self._client_config = config
|
||||
self._client = OpenSearch(**config.to_opensearch_params())
|
||||
self._using_ugc = config.using_ugc
|
||||
self.kwargs = kwargs
|
||||
|
||||
def get_type(self) -> str:
|
||||
@ -66,89 +77,37 @@ class LindormVectorStore(BaseVector):
|
||||
def refresh(self):
|
||||
self._client.indices.refresh(index=self._collection_name)
|
||||
|
||||
def __filter_existed_ids(
|
||||
self,
|
||||
texts: list[str],
|
||||
metadatas: list[dict],
|
||||
ids: list[str],
|
||||
bulk_size: int = 1024,
|
||||
) -> tuple[Iterable[str], Optional[list[dict]], Optional[list[str]]]:
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(60))
|
||||
def __fetch_existing_ids(batch_ids: list[str]) -> set[str]:
|
||||
try:
|
||||
existing_docs = self._client.mget(index=self._collection_name, body={"ids": batch_ids}, _source=False)
|
||||
return {doc["_id"] for doc in existing_docs["docs"] if doc["found"]}
|
||||
except Exception as e:
|
||||
logger.exception(f"Error fetching batch {batch_ids}")
|
||||
return set()
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(60))
|
||||
def __fetch_existing_routing_ids(batch_ids: list[str], route_ids: list[str]) -> set[str]:
|
||||
try:
|
||||
existing_docs = self._client.mget(
|
||||
body={
|
||||
"docs": [
|
||||
{"_index": self._collection_name, "_id": id, "routing": routing}
|
||||
for id, routing in zip(batch_ids, route_ids)
|
||||
]
|
||||
},
|
||||
_source=False,
|
||||
)
|
||||
return {doc["_id"] for doc in existing_docs["docs"] if doc["found"]}
|
||||
except Exception as e:
|
||||
logger.exception(f"Error fetching batch ids: {batch_ids}")
|
||||
return set()
|
||||
|
||||
if ids is None:
|
||||
return texts, metadatas, ids
|
||||
|
||||
if len(texts) != len(ids):
|
||||
raise RuntimeError(f"texts {len(texts)} != {ids}")
|
||||
|
||||
filtered_texts = []
|
||||
filtered_metadatas = []
|
||||
filtered_ids = []
|
||||
|
||||
def batch(iterable, n):
|
||||
length = len(iterable)
|
||||
for idx in range(0, length, n):
|
||||
yield iterable[idx : min(idx + n, length)]
|
||||
|
||||
for ids_batch, texts_batch, metadatas_batch in zip(
|
||||
batch(ids, bulk_size),
|
||||
batch(texts, bulk_size),
|
||||
batch(metadatas, bulk_size) if metadatas is not None else batch([None] * len(ids), bulk_size),
|
||||
):
|
||||
existing_ids_set = __fetch_existing_ids(ids_batch)
|
||||
for text, metadata, doc_id in zip(texts_batch, metadatas_batch, ids_batch):
|
||||
if doc_id not in existing_ids_set:
|
||||
filtered_texts.append(text)
|
||||
filtered_ids.append(doc_id)
|
||||
if metadatas is not None:
|
||||
filtered_metadatas.append(metadata)
|
||||
|
||||
return filtered_texts, metadatas if metadatas is None else filtered_metadatas, filtered_ids
|
||||
|
||||
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||
actions = []
|
||||
uuids = self._get_uuids(documents)
|
||||
for i in range(len(documents)):
|
||||
action = {
|
||||
"_op_type": "index",
|
||||
"_index": self._collection_name.lower(),
|
||||
"_id": uuids[i],
|
||||
"_source": {
|
||||
Field.CONTENT_KEY.value: documents[i].page_content,
|
||||
Field.VECTOR.value: embeddings[i], # Make sure you pass an array here
|
||||
Field.METADATA_KEY.value: documents[i].metadata,
|
||||
},
|
||||
action_header = {
|
||||
"index": {
|
||||
"_index": self.collection_name.lower(),
|
||||
"_id": uuids[i],
|
||||
}
|
||||
}
|
||||
actions.append(action)
|
||||
bulk(self._client, actions)
|
||||
self.refresh()
|
||||
action_values = {
|
||||
Field.CONTENT_KEY.value: documents[i].page_content,
|
||||
Field.VECTOR.value: embeddings[i], # Make sure you pass an array here
|
||||
Field.METADATA_KEY.value: documents[i].metadata,
|
||||
}
|
||||
if self._using_ugc:
|
||||
action_header["index"]["routing"] = self._routing
|
||||
action_values[self._routing_field] = self._routing
|
||||
actions.append(action_header)
|
||||
actions.append(action_values)
|
||||
response = self._client.bulk(actions)
|
||||
if response["errors"]:
|
||||
for item in response["items"]:
|
||||
print(f"{item['index']['status']}: {item['index']['error']['type']}")
|
||||
else:
|
||||
self.refresh()
|
||||
|
||||
def get_ids_by_metadata_field(self, key: str, value: str):
|
||||
query = {"query": {"term": {f"{Field.METADATA_KEY.value}.{key}.keyword": value}}}
|
||||
query = {"query": {"bool": {"must": [{"term": {f"{Field.METADATA_KEY.value}.{key}.keyword": value}}]}}}
|
||||
if self._using_ugc:
|
||||
query["query"]["bool"]["must"].append({"term": {f"{self._routing_field}.keyword": self._routing}})
|
||||
response = self._client.search(index=self._collection_name, body=query)
|
||||
if response["hits"]["hits"]:
|
||||
return [hit["_id"] for hit in response["hits"]["hits"]]
|
||||
@ -156,50 +115,62 @@ class LindormVectorStore(BaseVector):
|
||||
return None
|
||||
|
||||
def delete_by_metadata_field(self, key: str, value: str):
|
||||
query_str = {"query": {"match": {f"metadata.{key}": f"{value}"}}}
|
||||
results = self._client.search(index=self._collection_name, body=query_str)
|
||||
ids = [hit["_id"] for hit in results["hits"]["hits"]]
|
||||
ids = self.get_ids_by_metadata_field(key, value)
|
||||
if ids:
|
||||
self.delete_by_ids(ids)
|
||||
|
||||
def delete_by_ids(self, ids: list[str]) -> None:
|
||||
params = {}
|
||||
if self._using_ugc:
|
||||
params["routing"] = self._routing
|
||||
for id in ids:
|
||||
if self._client.exists(index=self._collection_name, id=id):
|
||||
self._client.delete(index=self._collection_name, id=id)
|
||||
if self._client.exists(index=self._collection_name, id=id, params=params):
|
||||
params = {}
|
||||
if self._using_ugc:
|
||||
params["routing"] = self._routing
|
||||
self._client.delete(index=self._collection_name, id=id, params=params)
|
||||
self.refresh()
|
||||
else:
|
||||
logger.warning(f"DELETE BY ID: ID {id} does not exist in the index.")
|
||||
|
||||
def delete(self) -> None:
|
||||
try:
|
||||
if self._using_ugc:
|
||||
routing_filter_query = {
|
||||
"query": {"bool": {"must": [{"term": {f"{self._routing_field}.keyword": self._routing}}]}}
|
||||
}
|
||||
self._client.delete_by_query(self._collection_name, body=routing_filter_query)
|
||||
self.refresh()
|
||||
else:
|
||||
if self._client.indices.exists(index=self._collection_name):
|
||||
self._client.indices.delete(index=self._collection_name, params={"timeout": 60})
|
||||
logger.info("Delete index success")
|
||||
else:
|
||||
logger.warning(f"Index '{self._collection_name}' does not exist. No deletion performed.")
|
||||
except Exception as e:
|
||||
logger.exception(f"Error occurred while deleting the index: {self._collection_name}")
|
||||
raise e
|
||||
|
||||
def text_exists(self, id: str) -> bool:
|
||||
try:
|
||||
self._client.get(index=self._collection_name, id=id)
|
||||
params = {}
|
||||
if self._using_ugc:
|
||||
params["routing"] = self._routing
|
||||
self._client.get(index=self._collection_name, id=id, params=params)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def search_by_vector(self, query_vector: list[float], **kwargs: Any) -> list[Document]:
|
||||
# Make sure query_vector is a list
|
||||
if not isinstance(query_vector, list):
|
||||
raise ValueError("query_vector should be a list of floats")
|
||||
|
||||
# Check whether query_vector is a floating-point number list
|
||||
if not all(isinstance(x, float) for x in query_vector):
|
||||
raise ValueError("All elements in query_vector should be floats")
|
||||
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
query = default_vector_search_query(query_vector=query_vector, k=top_k, **kwargs)
|
||||
try:
|
||||
response = self._client.search(index=self._collection_name, body=query)
|
||||
params = {}
|
||||
if self._using_ugc:
|
||||
params["routing"] = self._routing
|
||||
response = self._client.search(index=self._collection_name, body=query, params=params)
|
||||
except Exception as e:
|
||||
logger.exception(f"Error executing vector search, query: {query}")
|
||||
raise
|
||||
@ -232,7 +203,7 @@ class LindormVectorStore(BaseVector):
|
||||
minimum_should_match = kwargs.get("minimum_should_match", 0)
|
||||
top_k = kwargs.get("top_k", 10)
|
||||
filters = kwargs.get("filter")
|
||||
routing = kwargs.get("routing")
|
||||
routing = self._routing
|
||||
full_text_query = default_text_search_query(
|
||||
query_text=query,
|
||||
k=top_k,
|
||||
@ -243,6 +214,7 @@ class LindormVectorStore(BaseVector):
|
||||
minimum_should_match=minimum_should_match,
|
||||
filters=filters,
|
||||
routing=routing,
|
||||
routing_field=self._routing_field,
|
||||
)
|
||||
response = self._client.search(index=self._collection_name, body=full_text_query)
|
||||
docs = []
|
||||
@ -265,17 +237,18 @@ class LindormVectorStore(BaseVector):
|
||||
logger.info(f"Collection {self._collection_name} already exists.")
|
||||
return
|
||||
if self._client.indices.exists(index=self._collection_name):
|
||||
logger.info("{self._collection_name.lower()} already exists.")
|
||||
logger.info(f"{self._collection_name.lower()} already exists.")
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
return
|
||||
if len(self.kwargs) == 0 and len(kwargs) != 0:
|
||||
self.kwargs = copy.deepcopy(kwargs)
|
||||
vector_field = kwargs.pop("vector_field", Field.VECTOR.value)
|
||||
shards = kwargs.pop("shards", 2)
|
||||
shards = kwargs.pop("shards", 4)
|
||||
|
||||
engine = kwargs.pop("engine", "lvector")
|
||||
method_name = kwargs.pop("method_name", "hnsw")
|
||||
method_name = kwargs.pop("method_name", dify_config.DEFAULT_INDEX_TYPE)
|
||||
space_type = kwargs.pop("space_type", dify_config.DEFAULT_DISTANCE_TYPE)
|
||||
data_type = kwargs.pop("data_type", "float")
|
||||
space_type = kwargs.pop("space_type", "cosinesimil")
|
||||
|
||||
hnsw_m = kwargs.pop("hnsw_m", 24)
|
||||
hnsw_ef_construction = kwargs.pop("hnsw_ef_construction", 500)
|
||||
@ -288,10 +261,10 @@ class LindormVectorStore(BaseVector):
|
||||
mapping = default_text_mapping(
|
||||
dimension,
|
||||
method_name,
|
||||
space_type=space_type,
|
||||
shards=shards,
|
||||
engine=engine,
|
||||
data_type=data_type,
|
||||
space_type=space_type,
|
||||
vector_field=vector_field,
|
||||
hnsw_m=hnsw_m,
|
||||
hnsw_ef_construction=hnsw_ef_construction,
|
||||
@ -301,6 +274,7 @@ class LindormVectorStore(BaseVector):
|
||||
centroids_hnsw_m=centroids_hnsw_m,
|
||||
centroids_hnsw_ef_construct=centroids_hnsw_ef_construct,
|
||||
centroids_hnsw_ef_search=centroids_hnsw_ef_search,
|
||||
using_ugc=self._using_ugc,
|
||||
**kwargs,
|
||||
)
|
||||
self._client.indices.create(index=self._collection_name.lower(), body=mapping)
|
||||
@ -309,15 +283,20 @@ class LindormVectorStore(BaseVector):
|
||||
|
||||
|
||||
def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dict:
|
||||
routing_field = kwargs.get("routing_field")
|
||||
excludes_from_source = kwargs.get("excludes_from_source")
|
||||
analyzer = kwargs.get("analyzer", "ik_max_word")
|
||||
text_field = kwargs.get("text_field", Field.CONTENT_KEY.value)
|
||||
engine = kwargs["engine"]
|
||||
shard = kwargs["shards"]
|
||||
space_type = kwargs["space_type"]
|
||||
space_type = kwargs.get("space_type")
|
||||
if space_type is None:
|
||||
if method_name == "hnsw":
|
||||
space_type = "l2"
|
||||
else:
|
||||
space_type = "cosine"
|
||||
data_type = kwargs["data_type"]
|
||||
vector_field = kwargs.get("vector_field", Field.VECTOR.value)
|
||||
using_ugc = kwargs.get("using_ugc", False)
|
||||
|
||||
if method_name == "ivfpq":
|
||||
ivfpq_m = kwargs["ivfpq_m"]
|
||||
@ -366,13 +345,11 @@ def default_text_mapping(dimension: int, method_name: str, **kwargs: Any) -> dic
|
||||
if excludes_from_source:
|
||||
mapping["mappings"]["_source"] = {"excludes": excludes_from_source} # e.g. {"excludes": ["vector_field"]}
|
||||
|
||||
if method_name == "ivfpq" and routing_field is not None:
|
||||
if using_ugc and method_name == "ivfpq":
|
||||
mapping["settings"]["index"]["knn_routing"] = True
|
||||
mapping["settings"]["index"]["knn.offline.construction"] = True
|
||||
|
||||
if method_name == "flat" and routing_field is not None:
|
||||
elif using_ugc and method_name == "hnsw" or using_ugc and method_name == "flat":
|
||||
mapping["settings"]["index"]["knn_routing"] = True
|
||||
|
||||
return mapping
|
||||
|
||||
|
||||
@ -386,14 +363,12 @@ def default_text_search_query(
|
||||
minimum_should_match: int = 0,
|
||||
filters: Optional[list[dict]] = None,
|
||||
routing: Optional[str] = None,
|
||||
routing_field: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
if routing is not None:
|
||||
routing_field = kwargs.get("routing_field", "routing_field")
|
||||
query_clause = {
|
||||
"bool": {
|
||||
"must": [{"match": {text_field: query_text}}, {"term": {f"metadata.{routing_field}.keyword": routing}}]
|
||||
}
|
||||
"bool": {"must": [{"match": {text_field: query_text}}, {"term": {f"{routing_field}.keyword": routing}}]}
|
||||
}
|
||||
else:
|
||||
query_clause = {"match": {text_field: query_text}}
|
||||
@ -449,7 +424,7 @@ def default_vector_search_query(
|
||||
) -> dict:
|
||||
if filters is not None:
|
||||
filter_type = "post_filter" if filter_type is None else filter_type
|
||||
if not isinstance(filter, list):
|
||||
if not isinstance(filters, list):
|
||||
raise RuntimeError(f"unexpected filter with {type(filters)}")
|
||||
final_ext = {"lvector": {}}
|
||||
if min_score != "0.0":
|
||||
@ -483,16 +458,40 @@ def default_vector_search_query(
|
||||
|
||||
class LindormVectorStoreFactory(AbstractVectorFactory):
|
||||
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> LindormVectorStore:
|
||||
if dataset.index_struct_dict:
|
||||
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
collection_name = class_prefix
|
||||
else:
|
||||
dataset_id = dataset.id
|
||||
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.LINDORM, collection_name))
|
||||
lindorm_config = LindormVectorStoreConfig(
|
||||
hosts=dify_config.LINDORM_URL,
|
||||
username=dify_config.LINDORM_USERNAME,
|
||||
password=dify_config.LINDORM_PASSWORD,
|
||||
using_ugc=dify_config.USING_UGC_INDEX,
|
||||
)
|
||||
return LindormVectorStore(collection_name, lindorm_config)
|
||||
using_ugc = dify_config.USING_UGC_INDEX
|
||||
routing_value = None
|
||||
if dataset.index_struct:
|
||||
if using_ugc:
|
||||
dimension = dataset.index_struct_dict["dimension"]
|
||||
index_type = dataset.index_struct_dict["index_type"]
|
||||
distance_type = dataset.index_struct_dict["distance_type"]
|
||||
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
|
||||
routing_value = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
else:
|
||||
index_name = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||
else:
|
||||
embedding_vector = embeddings.embed_query("hello word")
|
||||
dimension = len(embedding_vector)
|
||||
index_type = dify_config.DEFAULT_INDEX_TYPE
|
||||
distance_type = dify_config.DEFAULT_DISTANCE_TYPE
|
||||
class_prefix = Dataset.gen_collection_name_by_id(dataset.id)
|
||||
index_struct_dict = {
|
||||
"type": VectorType.LINDORM,
|
||||
"vector_store": {"class_prefix": class_prefix},
|
||||
"index_type": index_type,
|
||||
"dimension": dimension,
|
||||
"distance_type": distance_type,
|
||||
}
|
||||
dataset.index_struct = json.dumps(index_struct_dict)
|
||||
if using_ugc:
|
||||
index_name = f"{UGC_INDEX_PREFIX}_{dimension}_{index_type}_{distance_type}"
|
||||
routing_value = class_prefix
|
||||
else:
|
||||
index_name = class_prefix
|
||||
return LindormVectorStore(index_name, lindorm_config, routing_value=routing_value)
|
||||
|
@ -37,8 +37,6 @@ class TiDBVectorConfig(BaseModel):
|
||||
raise ValueError("config TIDB_VECTOR_PORT is required")
|
||||
if not values["user"]:
|
||||
raise ValueError("config TIDB_VECTOR_USER is required")
|
||||
if not values["password"]:
|
||||
raise ValueError("config TIDB_VECTOR_PASSWORD is required")
|
||||
if not values["database"]:
|
||||
raise ValueError("config TIDB_VECTOR_DATABASE is required")
|
||||
if not values["program_name"]:
|
||||
|
@ -103,7 +103,7 @@ class ExtractProcessor:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
elif file_extension in {".md", ".markdown"}:
|
||||
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||
extractor = (
|
||||
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||
if is_automatic
|
||||
@ -141,7 +141,7 @@ class ExtractProcessor:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
elif file_extension in {".md", ".markdown"}:
|
||||
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
|
||||
elif file_extension in {".htm", ".html"}:
|
||||
extractor = HtmlExtractor(file_path)
|
||||
|
@ -280,9 +280,6 @@ class ApiTool(Tool):
|
||||
elif property["type"] == "object" or property["type"] == "array":
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
# an array str like '[1,2]' also can convert to list [1,2] through json.loads
|
||||
# json not support single quote, but we can support it
|
||||
value = value.replace("'", '"')
|
||||
return json.loads(value)
|
||||
except ValueError:
|
||||
return value
|
||||
|
@ -4,6 +4,7 @@ from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.workflow.graph_engine.entities.event import (
|
||||
GraphEngineEvent,
|
||||
GraphRunFailedEvent,
|
||||
GraphRunPartialSucceededEvent,
|
||||
GraphRunStartedEvent,
|
||||
GraphRunSucceededEvent,
|
||||
IterationRunFailedEvent,
|
||||
@ -39,6 +40,8 @@ class WorkflowLoggingCallback(WorkflowCallback):
|
||||
self.print_text("\n[GraphRunStartedEvent]", color="pink")
|
||||
elif isinstance(event, GraphRunSucceededEvent):
|
||||
self.print_text("\n[GraphRunSucceededEvent]", color="green")
|
||||
elif isinstance(event, GraphRunPartialSucceededEvent):
|
||||
self.print_text("\n[GraphRunPartialSucceededEvent]", color="pink")
|
||||
elif isinstance(event, GraphRunFailedEvent):
|
||||
self.print_text(f"\n[GraphRunFailedEvent] reason: {event.error}", color="red")
|
||||
elif isinstance(event, NodeRunStartedEvent):
|
||||
|
@ -26,6 +26,7 @@ class NodeRunMetadataKey(StrEnum):
|
||||
PARENT_PARALLEL_START_NODE_ID = "parent_parallel_start_node_id"
|
||||
PARALLEL_MODE_RUN_ID = "parallel_mode_run_id"
|
||||
ITERATION_DURATION_MAP = "iteration_duration_map" # single iteration duration if iteration node runs
|
||||
ERROR_STRATEGY = "error_strategy" # node in continue on error mode return the field
|
||||
|
||||
|
||||
class NodeRunResult(BaseModel):
|
||||
@ -44,3 +45,4 @@ class NodeRunResult(BaseModel):
|
||||
edge_source_handle: Optional[str] = None # source handle id of node with multiple branches
|
||||
|
||||
error: Optional[str] = None # error message if status is failed
|
||||
error_type: Optional[str] = None # error type if status is failed
|
||||
|
@ -33,6 +33,12 @@ class GraphRunSucceededEvent(BaseGraphEvent):
|
||||
|
||||
class GraphRunFailedEvent(BaseGraphEvent):
|
||||
error: str = Field(..., description="failed reason")
|
||||
exceptions_count: Optional[int] = Field(description="exception count", default=0)
|
||||
|
||||
|
||||
class GraphRunPartialSucceededEvent(BaseGraphEvent):
|
||||
exceptions_count: int = Field(..., description="exception count")
|
||||
outputs: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
###########################################
|
||||
@ -83,6 +89,10 @@ class NodeRunFailedEvent(BaseNodeEvent):
|
||||
error: str = Field(..., description="error")
|
||||
|
||||
|
||||
class NodeRunExceptionEvent(BaseNodeEvent):
|
||||
error: str = Field(..., description="error")
|
||||
|
||||
|
||||
class NodeInIterationFailedEvent(BaseNodeEvent):
|
||||
error: str = Field(..., description="error")
|
||||
|
||||
|
@ -64,13 +64,21 @@ class Graph(BaseModel):
|
||||
edge_configs = graph_config.get("edges")
|
||||
if edge_configs is None:
|
||||
edge_configs = []
|
||||
# node configs
|
||||
node_configs = graph_config.get("nodes")
|
||||
if not node_configs:
|
||||
raise ValueError("Graph must have at least one node")
|
||||
|
||||
edge_configs = cast(list, edge_configs)
|
||||
node_configs = cast(list, node_configs)
|
||||
|
||||
# reorganize edges mapping
|
||||
edge_mapping: dict[str, list[GraphEdge]] = {}
|
||||
reverse_edge_mapping: dict[str, list[GraphEdge]] = {}
|
||||
target_edge_ids = set()
|
||||
fail_branch_source_node_id = [
|
||||
node["id"] for node in node_configs if node["data"].get("error_strategy") == "fail-branch"
|
||||
]
|
||||
for edge_config in edge_configs:
|
||||
source_node_id = edge_config.get("source")
|
||||
if not source_node_id:
|
||||
@ -90,8 +98,16 @@ class Graph(BaseModel):
|
||||
|
||||
# parse run condition
|
||||
run_condition = None
|
||||
if edge_config.get("sourceHandle") and edge_config.get("sourceHandle") != "source":
|
||||
run_condition = RunCondition(type="branch_identify", branch_identify=edge_config.get("sourceHandle"))
|
||||
if edge_config.get("sourceHandle"):
|
||||
if (
|
||||
edge_config.get("source") in fail_branch_source_node_id
|
||||
and edge_config.get("sourceHandle") != "fail-branch"
|
||||
):
|
||||
run_condition = RunCondition(type="branch_identify", branch_identify="success-branch")
|
||||
elif edge_config.get("sourceHandle") != "source":
|
||||
run_condition = RunCondition(
|
||||
type="branch_identify", branch_identify=edge_config.get("sourceHandle")
|
||||
)
|
||||
|
||||
graph_edge = GraphEdge(
|
||||
source_node_id=source_node_id, target_node_id=target_node_id, run_condition=run_condition
|
||||
@ -100,13 +116,6 @@ class Graph(BaseModel):
|
||||
edge_mapping[source_node_id].append(graph_edge)
|
||||
reverse_edge_mapping[target_node_id].append(graph_edge)
|
||||
|
||||
# node configs
|
||||
node_configs = graph_config.get("nodes")
|
||||
if not node_configs:
|
||||
raise ValueError("Graph must have at least one node")
|
||||
|
||||
node_configs = cast(list, node_configs)
|
||||
|
||||
# fetch nodes that have no predecessor node
|
||||
root_node_configs = []
|
||||
all_node_id_config_mapping: dict[str, dict] = {}
|
||||
|
@ -15,6 +15,7 @@ class RouteNodeState(BaseModel):
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
PAUSED = "paused"
|
||||
EXCEPTION = "exception"
|
||||
|
||||
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
||||
"""node state id"""
|
||||
@ -51,7 +52,11 @@ class RouteNodeState(BaseModel):
|
||||
|
||||
:param run_result: run result
|
||||
"""
|
||||
if self.status in {RouteNodeState.Status.SUCCESS, RouteNodeState.Status.FAILED}:
|
||||
if self.status in {
|
||||
RouteNodeState.Status.SUCCESS,
|
||||
RouteNodeState.Status.FAILED,
|
||||
RouteNodeState.Status.EXCEPTION,
|
||||
}:
|
||||
raise Exception(f"Route state {self.id} already finished")
|
||||
|
||||
if run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED:
|
||||
@ -59,6 +64,9 @@ class RouteNodeState(BaseModel):
|
||||
elif run_result.status == WorkflowNodeExecutionStatus.FAILED:
|
||||
self.status = RouteNodeState.Status.FAILED
|
||||
self.failed_reason = run_result.error
|
||||
elif run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
|
||||
self.status = RouteNodeState.Status.EXCEPTION
|
||||
self.failed_reason = run_result.error
|
||||
else:
|
||||
raise Exception(f"Invalid route status {run_result.status}")
|
||||
|
||||
|
@ -6,21 +6,24 @@ import uuid
|
||||
from collections.abc import Generator, Mapping
|
||||
from concurrent.futures import ThreadPoolExecutor, wait
|
||||
from copy import copy, deepcopy
|
||||
from typing import Any, Optional
|
||||
from typing import Any, Optional, cast
|
||||
|
||||
from flask import Flask, current_app
|
||||
|
||||
from configs import dify_config
|
||||
from core.app.apps.base_app_queue_manager import GenerateTaskStoppedError
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.workflow.entities.node_entities import NodeRunMetadataKey
|
||||
from core.workflow.entities.node_entities import NodeRunMetadataKey, NodeRunResult
|
||||
from core.workflow.entities.variable_pool import VariablePool, VariableValue
|
||||
from core.workflow.graph_engine.condition_handlers.condition_manager import ConditionManager
|
||||
from core.workflow.graph_engine.entities.event import (
|
||||
BaseIterationEvent,
|
||||
GraphEngineEvent,
|
||||
GraphRunFailedEvent,
|
||||
GraphRunPartialSucceededEvent,
|
||||
GraphRunStartedEvent,
|
||||
GraphRunSucceededEvent,
|
||||
NodeRunExceptionEvent,
|
||||
NodeRunFailedEvent,
|
||||
NodeRunRetrieverResourceEvent,
|
||||
NodeRunStartedEvent,
|
||||
@ -37,7 +40,9 @@ from core.workflow.graph_engine.entities.runtime_route_state import RouteNodeSta
|
||||
from core.workflow.nodes import NodeType
|
||||
from core.workflow.nodes.answer.answer_stream_processor import AnswerStreamProcessor
|
||||
from core.workflow.nodes.base import BaseNode
|
||||
from core.workflow.nodes.base.entities import BaseNodeData
|
||||
from core.workflow.nodes.end.end_stream_processor import EndStreamProcessor
|
||||
from core.workflow.nodes.enums import ErrorStrategy, FailBranchSourceHandle
|
||||
from core.workflow.nodes.event import RunCompletedEvent, RunRetrieverResourceEvent, RunStreamChunkEvent
|
||||
from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING
|
||||
from extensions.ext_database import db
|
||||
@ -49,7 +54,12 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class GraphEngineThreadPool(ThreadPoolExecutor):
|
||||
def __init__(
|
||||
self, max_workers=None, thread_name_prefix="", initializer=None, initargs=(), max_submit_count=100
|
||||
self,
|
||||
max_workers=None,
|
||||
thread_name_prefix="",
|
||||
initializer=None,
|
||||
initargs=(),
|
||||
max_submit_count=dify_config.MAX_SUBMIT_COUNT,
|
||||
) -> None:
|
||||
super().__init__(max_workers, thread_name_prefix, initializer, initargs)
|
||||
self.max_submit_count = max_submit_count
|
||||
@ -89,7 +99,7 @@ class GraphEngine:
|
||||
max_execution_time: int,
|
||||
thread_pool_id: Optional[str] = None,
|
||||
) -> None:
|
||||
thread_pool_max_submit_count = 100
|
||||
thread_pool_max_submit_count = dify_config.MAX_SUBMIT_COUNT
|
||||
thread_pool_max_workers = 10
|
||||
|
||||
# init thread pool
|
||||
@ -129,6 +139,7 @@ class GraphEngine:
|
||||
def run(self) -> Generator[GraphEngineEvent, None, None]:
|
||||
# trigger graph run start event
|
||||
yield GraphRunStartedEvent()
|
||||
handle_exceptions = []
|
||||
|
||||
try:
|
||||
if self.init_params.workflow_type == WorkflowType.CHAT:
|
||||
@ -141,13 +152,17 @@ class GraphEngine:
|
||||
)
|
||||
|
||||
# run graph
|
||||
generator = stream_processor.process(self._run(start_node_id=self.graph.root_node_id))
|
||||
|
||||
generator = stream_processor.process(
|
||||
self._run(start_node_id=self.graph.root_node_id, handle_exceptions=handle_exceptions)
|
||||
)
|
||||
for item in generator:
|
||||
try:
|
||||
yield item
|
||||
if isinstance(item, NodeRunFailedEvent):
|
||||
yield GraphRunFailedEvent(error=item.route_node_state.failed_reason or "Unknown error.")
|
||||
yield GraphRunFailedEvent(
|
||||
error=item.route_node_state.failed_reason or "Unknown error.",
|
||||
exceptions_count=len(handle_exceptions),
|
||||
)
|
||||
return
|
||||
elif isinstance(item, NodeRunSucceededEvent):
|
||||
if item.node_type == NodeType.END:
|
||||
@ -173,19 +188,24 @@ class GraphEngine:
|
||||
].strip()
|
||||
except Exception as e:
|
||||
logger.exception("Graph run failed")
|
||||
yield GraphRunFailedEvent(error=str(e))
|
||||
yield GraphRunFailedEvent(error=str(e), exceptions_count=len(handle_exceptions))
|
||||
return
|
||||
|
||||
# trigger graph run success event
|
||||
yield GraphRunSucceededEvent(outputs=self.graph_runtime_state.outputs)
|
||||
# count exceptions to determine partial success
|
||||
if len(handle_exceptions) > 0:
|
||||
yield GraphRunPartialSucceededEvent(
|
||||
exceptions_count=len(handle_exceptions), outputs=self.graph_runtime_state.outputs
|
||||
)
|
||||
else:
|
||||
# trigger graph run success event
|
||||
yield GraphRunSucceededEvent(outputs=self.graph_runtime_state.outputs)
|
||||
self._release_thread()
|
||||
except GraphRunFailedError as e:
|
||||
yield GraphRunFailedEvent(error=e.error)
|
||||
yield GraphRunFailedEvent(error=e.error, exceptions_count=len(handle_exceptions))
|
||||
self._release_thread()
|
||||
return
|
||||
except Exception as e:
|
||||
logger.exception("Unknown Error when graph running")
|
||||
yield GraphRunFailedEvent(error=str(e))
|
||||
yield GraphRunFailedEvent(error=str(e), exceptions_count=len(handle_exceptions))
|
||||
self._release_thread()
|
||||
raise e
|
||||
|
||||
@ -199,6 +219,7 @@ class GraphEngine:
|
||||
in_parallel_id: Optional[str] = None,
|
||||
parent_parallel_id: Optional[str] = None,
|
||||
parent_parallel_start_node_id: Optional[str] = None,
|
||||
handle_exceptions: list[str] = [],
|
||||
) -> Generator[GraphEngineEvent, None, None]:
|
||||
parallel_start_node_id = None
|
||||
if in_parallel_id:
|
||||
@ -243,7 +264,7 @@ class GraphEngine:
|
||||
previous_node_id=previous_node_id,
|
||||
thread_pool_id=self.thread_pool_id,
|
||||
)
|
||||
|
||||
node_instance = cast(BaseNode[BaseNodeData], node_instance)
|
||||
try:
|
||||
# run node
|
||||
generator = self._run_node(
|
||||
@ -253,6 +274,7 @@ class GraphEngine:
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
parent_parallel_id=parent_parallel_id,
|
||||
parent_parallel_start_node_id=parent_parallel_start_node_id,
|
||||
handle_exceptions=handle_exceptions,
|
||||
)
|
||||
|
||||
for item in generator:
|
||||
@ -302,7 +324,12 @@ class GraphEngine:
|
||||
|
||||
if len(edge_mappings) == 1:
|
||||
edge = edge_mappings[0]
|
||||
|
||||
if (
|
||||
previous_route_node_state.status == RouteNodeState.Status.EXCEPTION
|
||||
and node_instance.node_data.error_strategy == ErrorStrategy.FAIL_BRANCH
|
||||
and edge.run_condition is None
|
||||
):
|
||||
break
|
||||
if edge.run_condition:
|
||||
result = ConditionManager.get_condition_handler(
|
||||
init_params=self.init_params,
|
||||
@ -335,7 +362,7 @@ class GraphEngine:
|
||||
if len(sub_edge_mappings) == 0:
|
||||
continue
|
||||
|
||||
edge = sub_edge_mappings[0]
|
||||
edge = cast(GraphEdge, sub_edge_mappings[0])
|
||||
|
||||
result = ConditionManager.get_condition_handler(
|
||||
init_params=self.init_params,
|
||||
@ -356,6 +383,7 @@ class GraphEngine:
|
||||
edge_mappings=sub_edge_mappings,
|
||||
in_parallel_id=in_parallel_id,
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
handle_exceptions=handle_exceptions,
|
||||
)
|
||||
|
||||
for item in parallel_generator:
|
||||
@ -370,11 +398,18 @@ class GraphEngine:
|
||||
break
|
||||
|
||||
next_node_id = final_node_id
|
||||
elif (
|
||||
node_instance.node_data.error_strategy == ErrorStrategy.FAIL_BRANCH
|
||||
and node_instance.should_continue_on_error
|
||||
and previous_route_node_state.status == RouteNodeState.Status.EXCEPTION
|
||||
):
|
||||
break
|
||||
else:
|
||||
parallel_generator = self._run_parallel_branches(
|
||||
edge_mappings=edge_mappings,
|
||||
in_parallel_id=in_parallel_id,
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
handle_exceptions=handle_exceptions,
|
||||
)
|
||||
|
||||
for item in parallel_generator:
|
||||
@ -396,6 +431,7 @@ class GraphEngine:
|
||||
edge_mappings: list[GraphEdge],
|
||||
in_parallel_id: Optional[str] = None,
|
||||
parallel_start_node_id: Optional[str] = None,
|
||||
handle_exceptions: list[str] = [],
|
||||
) -> Generator[GraphEngineEvent | str, None, None]:
|
||||
# if nodes has no run conditions, parallel run all nodes
|
||||
parallel_id = self.graph.node_parallel_mapping.get(edge_mappings[0].target_node_id)
|
||||
@ -440,6 +476,7 @@ class GraphEngine:
|
||||
"parallel_start_node_id": edge.target_node_id,
|
||||
"parent_parallel_id": in_parallel_id,
|
||||
"parent_parallel_start_node_id": parallel_start_node_id,
|
||||
"handle_exceptions": handle_exceptions,
|
||||
},
|
||||
)
|
||||
|
||||
@ -484,6 +521,7 @@ class GraphEngine:
|
||||
parallel_start_node_id: str,
|
||||
parent_parallel_id: Optional[str] = None,
|
||||
parent_parallel_start_node_id: Optional[str] = None,
|
||||
handle_exceptions: list[str] = [],
|
||||
) -> None:
|
||||
"""
|
||||
Run parallel nodes
|
||||
@ -508,6 +546,7 @@ class GraphEngine:
|
||||
in_parallel_id=parallel_id,
|
||||
parent_parallel_id=parent_parallel_id,
|
||||
parent_parallel_start_node_id=parent_parallel_start_node_id,
|
||||
handle_exceptions=handle_exceptions,
|
||||
)
|
||||
|
||||
for item in generator:
|
||||
@ -554,6 +593,7 @@ class GraphEngine:
|
||||
parallel_start_node_id: Optional[str] = None,
|
||||
parent_parallel_id: Optional[str] = None,
|
||||
parent_parallel_start_node_id: Optional[str] = None,
|
||||
handle_exceptions: list[str] = [],
|
||||
) -> Generator[GraphEngineEvent, None, None]:
|
||||
"""
|
||||
Run node
|
||||
@ -593,19 +633,55 @@ class GraphEngine:
|
||||
route_node_state.set_finished(run_result=run_result)
|
||||
|
||||
if run_result.status == WorkflowNodeExecutionStatus.FAILED:
|
||||
yield NodeRunFailedEvent(
|
||||
error=route_node_state.failed_reason or "Unknown error.",
|
||||
id=node_instance.id,
|
||||
node_id=node_instance.node_id,
|
||||
node_type=node_instance.node_type,
|
||||
node_data=node_instance.node_data,
|
||||
route_node_state=route_node_state,
|
||||
parallel_id=parallel_id,
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
parent_parallel_id=parent_parallel_id,
|
||||
parent_parallel_start_node_id=parent_parallel_start_node_id,
|
||||
)
|
||||
if node_instance.should_continue_on_error:
|
||||
# if run failed, handle error
|
||||
run_result = self._handle_continue_on_error(
|
||||
node_instance,
|
||||
item.run_result,
|
||||
self.graph_runtime_state.variable_pool,
|
||||
handle_exceptions=handle_exceptions,
|
||||
)
|
||||
route_node_state.node_run_result = run_result
|
||||
route_node_state.status = RouteNodeState.Status.EXCEPTION
|
||||
if run_result.outputs:
|
||||
for variable_key, variable_value in run_result.outputs.items():
|
||||
# append variables to variable pool recursively
|
||||
self._append_variables_recursively(
|
||||
node_id=node_instance.node_id,
|
||||
variable_key_list=[variable_key],
|
||||
variable_value=variable_value,
|
||||
)
|
||||
yield NodeRunExceptionEvent(
|
||||
error=run_result.error or "System Error",
|
||||
id=node_instance.id,
|
||||
node_id=node_instance.node_id,
|
||||
node_type=node_instance.node_type,
|
||||
node_data=node_instance.node_data,
|
||||
route_node_state=route_node_state,
|
||||
parallel_id=parallel_id,
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
parent_parallel_id=parent_parallel_id,
|
||||
parent_parallel_start_node_id=parent_parallel_start_node_id,
|
||||
)
|
||||
else:
|
||||
yield NodeRunFailedEvent(
|
||||
error=route_node_state.failed_reason or "Unknown error.",
|
||||
id=node_instance.id,
|
||||
node_id=node_instance.node_id,
|
||||
node_type=node_instance.node_type,
|
||||
node_data=node_instance.node_data,
|
||||
route_node_state=route_node_state,
|
||||
parallel_id=parallel_id,
|
||||
parallel_start_node_id=parallel_start_node_id,
|
||||
parent_parallel_id=parent_parallel_id,
|
||||
parent_parallel_start_node_id=parent_parallel_start_node_id,
|
||||
)
|
||||
|
||||
elif run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED:
|
||||
if node_instance.should_continue_on_error and self.graph.edge_mapping.get(
|
||||
node_instance.node_id
|
||||
):
|
||||
run_result.edge_source_handle = FailBranchSourceHandle.SUCCESS
|
||||
if run_result.metadata and run_result.metadata.get(NodeRunMetadataKey.TOTAL_TOKENS):
|
||||
# plus state total_tokens
|
||||
self.graph_runtime_state.total_tokens += int(
|
||||
@ -741,6 +817,56 @@ class GraphEngine:
|
||||
new_instance.graph_runtime_state.variable_pool = deepcopy(self.graph_runtime_state.variable_pool)
|
||||
return new_instance
|
||||
|
||||
def _handle_continue_on_error(
|
||||
self,
|
||||
node_instance: BaseNode[BaseNodeData],
|
||||
error_result: NodeRunResult,
|
||||
variable_pool: VariablePool,
|
||||
handle_exceptions: list[str] = [],
|
||||
) -> NodeRunResult:
|
||||
"""
|
||||
handle continue on error when self._should_continue_on_error is True
|
||||
|
||||
|
||||
:param error_result (NodeRunResult): error run result
|
||||
:param variable_pool (VariablePool): variable pool
|
||||
:return: excption run result
|
||||
"""
|
||||
# add error message and error type to variable pool
|
||||
variable_pool.add([node_instance.node_id, "error_message"], error_result.error)
|
||||
variable_pool.add([node_instance.node_id, "error_type"], error_result.error_type)
|
||||
# add error message to handle_exceptions
|
||||
handle_exceptions.append(error_result.error)
|
||||
node_error_args = {
|
||||
"status": WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
"error": error_result.error,
|
||||
"inputs": error_result.inputs,
|
||||
"metadata": {
|
||||
NodeRunMetadataKey.ERROR_STRATEGY: node_instance.node_data.error_strategy,
|
||||
},
|
||||
}
|
||||
|
||||
if node_instance.node_data.error_strategy is ErrorStrategy.DEFAULT_VALUE:
|
||||
return NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
**node_instance.node_data.default_value_dict,
|
||||
"error_message": error_result.error,
|
||||
"error_type": error_result.error_type,
|
||||
},
|
||||
)
|
||||
elif node_instance.node_data.error_strategy is ErrorStrategy.FAIL_BRANCH:
|
||||
if self.graph.edge_mapping.get(node_instance.node_id):
|
||||
node_error_args["edge_source_handle"] = FailBranchSourceHandle.FAILED
|
||||
return NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
"error_message": error_result.error,
|
||||
"error_type": error_result.error_type,
|
||||
},
|
||||
)
|
||||
return error_result
|
||||
|
||||
|
||||
class GraphRunFailedError(Exception):
|
||||
def __init__(self, error: str):
|
||||
|
@ -6,7 +6,7 @@ from core.workflow.nodes.answer.entities import (
|
||||
TextGenerateRouteChunk,
|
||||
VarGenerateRouteChunk,
|
||||
)
|
||||
from core.workflow.nodes.enums import NodeType
|
||||
from core.workflow.nodes.enums import ErrorStrategy, NodeType
|
||||
from core.workflow.utils.variable_template_parser import VariableTemplateParser
|
||||
|
||||
|
||||
@ -148,13 +148,18 @@ class AnswerStreamGeneratorRouter:
|
||||
for edge in reverse_edges:
|
||||
source_node_id = edge.source_node_id
|
||||
source_node_type = node_id_config_mapping[source_node_id].get("data", {}).get("type")
|
||||
if source_node_type in {
|
||||
NodeType.ANSWER,
|
||||
NodeType.IF_ELSE,
|
||||
NodeType.QUESTION_CLASSIFIER,
|
||||
NodeType.ITERATION,
|
||||
NodeType.VARIABLE_ASSIGNER,
|
||||
}:
|
||||
source_node_data = node_id_config_mapping[source_node_id].get("data", {})
|
||||
if (
|
||||
source_node_type
|
||||
in {
|
||||
NodeType.ANSWER,
|
||||
NodeType.IF_ELSE,
|
||||
NodeType.QUESTION_CLASSIFIER,
|
||||
NodeType.ITERATION,
|
||||
NodeType.VARIABLE_ASSIGNER,
|
||||
}
|
||||
or source_node_data.get("error_strategy") == ErrorStrategy.FAIL_BRANCH
|
||||
):
|
||||
answer_dependencies[answer_node_id].append(source_node_id)
|
||||
else:
|
||||
cls._recursive_fetch_answer_dependencies(
|
||||
|
@ -6,6 +6,7 @@ from core.file import FILE_MODEL_IDENTITY, File
|
||||
from core.workflow.entities.variable_pool import VariablePool
|
||||
from core.workflow.graph_engine.entities.event import (
|
||||
GraphEngineEvent,
|
||||
NodeRunExceptionEvent,
|
||||
NodeRunStartedEvent,
|
||||
NodeRunStreamChunkEvent,
|
||||
NodeRunSucceededEvent,
|
||||
@ -50,7 +51,7 @@ class AnswerStreamProcessor(StreamProcessor):
|
||||
|
||||
for _ in stream_out_answer_node_ids:
|
||||
yield event
|
||||
elif isinstance(event, NodeRunSucceededEvent):
|
||||
elif isinstance(event, NodeRunSucceededEvent | NodeRunExceptionEvent):
|
||||
yield event
|
||||
if event.route_node_state.node_id in self.current_stream_chunk_generating_node_ids:
|
||||
# update self.route_position after all stream event finished
|
||||
|
@ -1,14 +1,124 @@
|
||||
import json
|
||||
from abc import ABC
|
||||
from typing import Optional
|
||||
from enum import StrEnum
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
from core.workflow.nodes.base.exc import DefaultValueTypeError
|
||||
from core.workflow.nodes.enums import ErrorStrategy
|
||||
|
||||
|
||||
class DefaultValueType(StrEnum):
|
||||
STRING = "string"
|
||||
NUMBER = "number"
|
||||
OBJECT = "object"
|
||||
ARRAY_NUMBER = "array[number]"
|
||||
ARRAY_STRING = "array[string]"
|
||||
ARRAY_OBJECT = "array[object]"
|
||||
ARRAY_FILES = "array[file]"
|
||||
|
||||
|
||||
NumberType = Union[int, float]
|
||||
|
||||
|
||||
class DefaultValue(BaseModel):
|
||||
value: Any
|
||||
type: DefaultValueType
|
||||
key: str
|
||||
|
||||
@staticmethod
|
||||
def _parse_json(value: str) -> Any:
|
||||
"""Unified JSON parsing handler"""
|
||||
try:
|
||||
return json.loads(value)
|
||||
except json.JSONDecodeError:
|
||||
raise DefaultValueTypeError(f"Invalid JSON format for value: {value}")
|
||||
|
||||
@staticmethod
|
||||
def _validate_array(value: Any, element_type: DefaultValueType) -> bool:
|
||||
"""Unified array type validation"""
|
||||
return isinstance(value, list) and all(isinstance(x, element_type) for x in value)
|
||||
|
||||
@staticmethod
|
||||
def _convert_number(value: str) -> float:
|
||||
"""Unified number conversion handler"""
|
||||
try:
|
||||
return float(value)
|
||||
except ValueError:
|
||||
raise DefaultValueTypeError(f"Cannot convert to number: {value}")
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_value_type(self) -> "DefaultValue":
|
||||
if self.type is None:
|
||||
raise DefaultValueTypeError("type field is required")
|
||||
|
||||
# Type validation configuration
|
||||
type_validators = {
|
||||
DefaultValueType.STRING: {
|
||||
"type": str,
|
||||
"converter": lambda x: x,
|
||||
},
|
||||
DefaultValueType.NUMBER: {
|
||||
"type": NumberType,
|
||||
"converter": self._convert_number,
|
||||
},
|
||||
DefaultValueType.OBJECT: {
|
||||
"type": dict,
|
||||
"converter": self._parse_json,
|
||||
},
|
||||
DefaultValueType.ARRAY_NUMBER: {
|
||||
"type": list,
|
||||
"element_type": NumberType,
|
||||
"converter": self._parse_json,
|
||||
},
|
||||
DefaultValueType.ARRAY_STRING: {
|
||||
"type": list,
|
||||
"element_type": str,
|
||||
"converter": self._parse_json,
|
||||
},
|
||||
DefaultValueType.ARRAY_OBJECT: {
|
||||
"type": list,
|
||||
"element_type": dict,
|
||||
"converter": self._parse_json,
|
||||
},
|
||||
}
|
||||
|
||||
validator = type_validators.get(self.type)
|
||||
if not validator:
|
||||
if self.type == DefaultValueType.ARRAY_FILES:
|
||||
# Handle files type
|
||||
return self
|
||||
raise DefaultValueTypeError(f"Unsupported type: {self.type}")
|
||||
|
||||
# Handle string input cases
|
||||
if isinstance(self.value, str) and self.type != DefaultValueType.STRING:
|
||||
self.value = validator["converter"](self.value)
|
||||
|
||||
# Validate base type
|
||||
if not isinstance(self.value, validator["type"]):
|
||||
raise DefaultValueTypeError(f"Value must be {validator['type'].__name__} type for {self.value}")
|
||||
|
||||
# Validate array element types
|
||||
if validator["type"] == list and not self._validate_array(self.value, validator["element_type"]):
|
||||
raise DefaultValueTypeError(f"All elements must be {validator['element_type'].__name__} for {self.value}")
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class BaseNodeData(ABC, BaseModel):
|
||||
title: str
|
||||
desc: Optional[str] = None
|
||||
error_strategy: Optional[ErrorStrategy] = None
|
||||
default_value: Optional[list[DefaultValue]] = None
|
||||
version: str = "1"
|
||||
|
||||
@property
|
||||
def default_value_dict(self):
|
||||
if self.default_value:
|
||||
return {item.key: item.value for item in self.default_value}
|
||||
return {}
|
||||
|
||||
|
||||
class BaseIterationNodeData(BaseNodeData):
|
||||
start_node_id: Optional[str] = None
|
||||
|
10
api/core/workflow/nodes/base/exc.py
Normal file
10
api/core/workflow/nodes/base/exc.py
Normal file
@ -0,0 +1,10 @@
|
||||
class BaseNodeError(Exception):
|
||||
"""Base class for node errors."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class DefaultValueTypeError(BaseNodeError):
|
||||
"""Raised when the default value type is invalid."""
|
||||
|
||||
pass
|
@ -4,7 +4,7 @@ from collections.abc import Generator, Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, Generic, Optional, TypeVar, Union, cast
|
||||
|
||||
from core.workflow.entities.node_entities import NodeRunResult
|
||||
from core.workflow.nodes.enums import NodeType
|
||||
from core.workflow.nodes.enums import CONTINUE_ON_ERROR_NODE_TYPE, NodeType
|
||||
from core.workflow.nodes.event import NodeEvent, RunCompletedEvent
|
||||
from models.workflow import WorkflowNodeExecutionStatus
|
||||
|
||||
@ -72,10 +72,7 @@ class BaseNode(Generic[GenericNodeData]):
|
||||
result = self._run()
|
||||
except Exception as e:
|
||||
logger.exception(f"Node {self.node_id} failed to run")
|
||||
result = NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
error=str(e),
|
||||
)
|
||||
result = NodeRunResult(status=WorkflowNodeExecutionStatus.FAILED, error=str(e), error_type="SystemError")
|
||||
|
||||
if isinstance(result, NodeRunResult):
|
||||
yield RunCompletedEvent(run_result=result)
|
||||
@ -137,3 +134,12 @@ class BaseNode(Generic[GenericNodeData]):
|
||||
:return:
|
||||
"""
|
||||
return self._node_type
|
||||
|
||||
@property
|
||||
def should_continue_on_error(self) -> bool:
|
||||
"""judge if should continue on error
|
||||
|
||||
Returns:
|
||||
bool: if should continue on error
|
||||
"""
|
||||
return self.node_data.error_strategy is not None and self.node_type in CONTINUE_ON_ERROR_NODE_TYPE
|
||||
|
@ -61,7 +61,9 @@ class CodeNode(BaseNode[CodeNodeData]):
|
||||
# Transform result
|
||||
result = self._transform_result(result, self.node_data.outputs)
|
||||
except (CodeExecutionError, CodeNodeError) as e:
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e))
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED, inputs=variables, error=str(e), error_type=type(e).__name__
|
||||
)
|
||||
|
||||
return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=result)
|
||||
|
||||
|
@ -23,3 +23,16 @@ class NodeType(StrEnum):
|
||||
DOCUMENT_EXTRACTOR = "document-extractor"
|
||||
LIST_OPERATOR = "list-operator"
|
||||
AGENT = "agent"
|
||||
|
||||
|
||||
class ErrorStrategy(StrEnum):
|
||||
FAIL_BRANCH = "fail-branch"
|
||||
DEFAULT_VALUE = "default-value"
|
||||
|
||||
|
||||
class FailBranchSourceHandle(StrEnum):
|
||||
FAILED = "fail-branch"
|
||||
SUCCESS = "success-branch"
|
||||
|
||||
|
||||
CONTINUE_ON_ERROR_NODE_TYPE = [NodeType.LLM, NodeType.CODE, NodeType.TOOL, NodeType.HTTP_REQUEST]
|
||||
|
@ -21,6 +21,7 @@ from .entities import (
|
||||
from .exc import (
|
||||
AuthorizationConfigError,
|
||||
FileFetchError,
|
||||
HttpRequestNodeError,
|
||||
InvalidHttpMethodError,
|
||||
ResponseSizeError,
|
||||
)
|
||||
@ -36,7 +37,7 @@ BODY_TYPE_TO_CONTENT_TYPE = {
|
||||
class Executor:
|
||||
method: Literal["get", "head", "post", "put", "delete", "patch"]
|
||||
url: str
|
||||
params: Mapping[str, str] | None
|
||||
params: list[tuple[str, str]] | None
|
||||
content: str | bytes | None
|
||||
data: Mapping[str, Any] | None
|
||||
files: Mapping[str, tuple[str | None, bytes, str]] | None
|
||||
@ -66,7 +67,7 @@ class Executor:
|
||||
self.method = node_data.method
|
||||
self.auth = node_data.authorization
|
||||
self.timeout = timeout
|
||||
self.params = {}
|
||||
self.params = []
|
||||
self.headers = {}
|
||||
self.content = None
|
||||
self.files = None
|
||||
@ -88,14 +89,48 @@ class Executor:
|
||||
self.url = self.variable_pool.convert_template(self.node_data.url).text
|
||||
|
||||
def _init_params(self):
|
||||
params = _plain_text_to_dict(self.node_data.params)
|
||||
for key in params:
|
||||
params[key] = self.variable_pool.convert_template(params[key]).text
|
||||
self.params = params
|
||||
"""
|
||||
Almost same as _init_headers(), difference:
|
||||
1. response a list tuple to support same key, like 'aa=1&aa=2'
|
||||
2. param value may have '\n', we need to splitlines then extract the variable value.
|
||||
"""
|
||||
result = []
|
||||
for line in self.node_data.params.splitlines():
|
||||
if not (line := line.strip()):
|
||||
continue
|
||||
|
||||
key, *value = line.split(":", 1)
|
||||
if not (key := key.strip()):
|
||||
continue
|
||||
|
||||
value = value[0].strip() if value else ""
|
||||
result.append(
|
||||
(self.variable_pool.convert_template(key).text, self.variable_pool.convert_template(value).text)
|
||||
)
|
||||
|
||||
self.params = result
|
||||
|
||||
def _init_headers(self):
|
||||
"""
|
||||
Convert the header string of frontend to a dictionary.
|
||||
|
||||
Each line in the header string represents a key-value pair.
|
||||
Keys and values are separated by ':'.
|
||||
Empty values are allowed.
|
||||
|
||||
Examples:
|
||||
'aa:bb\n cc:dd' -> {'aa': 'bb', 'cc': 'dd'}
|
||||
'aa:\n cc:dd\n' -> {'aa': '', 'cc': 'dd'}
|
||||
'aa\n cc : dd' -> {'aa': '', 'cc': 'dd'}
|
||||
|
||||
"""
|
||||
headers = self.variable_pool.convert_template(self.node_data.headers).text
|
||||
self.headers = _plain_text_to_dict(headers)
|
||||
self.headers = {
|
||||
key.strip(): (value[0].strip() if value else "")
|
||||
for line in headers.splitlines()
|
||||
if line.strip()
|
||||
for key, *value in [line.split(":", 1)]
|
||||
}
|
||||
|
||||
def _init_body(self):
|
||||
body = self.node_data.body
|
||||
@ -208,8 +243,10 @@ class Executor:
|
||||
"follow_redirects": True,
|
||||
}
|
||||
# request_args = {k: v for k, v in request_args.items() if v is not None}
|
||||
|
||||
response = getattr(ssrf_proxy, self.method)(**request_args)
|
||||
try:
|
||||
response = getattr(ssrf_proxy, self.method)(**request_args)
|
||||
except ssrf_proxy.MaxRetriesExceededError as e:
|
||||
raise HttpRequestNodeError(str(e))
|
||||
return response
|
||||
|
||||
def invoke(self) -> Response:
|
||||
@ -285,33 +322,6 @@ class Executor:
|
||||
return raw
|
||||
|
||||
|
||||
def _plain_text_to_dict(text: str, /) -> dict[str, str]:
|
||||
"""
|
||||
Convert a string of key-value pairs to a dictionary.
|
||||
|
||||
Each line in the input string represents a key-value pair.
|
||||
Keys and values are separated by ':'.
|
||||
Empty values are allowed.
|
||||
|
||||
Examples:
|
||||
'aa:bb\n cc:dd' -> {'aa': 'bb', 'cc': 'dd'}
|
||||
'aa:\n cc:dd\n' -> {'aa': '', 'cc': 'dd'}
|
||||
'aa\n cc : dd' -> {'aa': '', 'cc': 'dd'}
|
||||
|
||||
Args:
|
||||
convert_text (str): The input string to convert.
|
||||
|
||||
Returns:
|
||||
dict[str, str]: A dictionary of key-value pairs.
|
||||
"""
|
||||
return {
|
||||
key.strip(): (value[0].strip() if value else "")
|
||||
for line in text.splitlines()
|
||||
if line.strip()
|
||||
for key, *value in [line.split(":", 1)]
|
||||
}
|
||||
|
||||
|
||||
def _generate_random_string(n: int) -> str:
|
||||
"""
|
||||
Generate a random string of lowercase ASCII letters.
|
||||
|
@ -65,6 +65,21 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
|
||||
|
||||
response = http_executor.invoke()
|
||||
files = self.extract_files(url=http_executor.url, response=response)
|
||||
if not response.response.is_success and self.should_continue_on_error:
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
outputs={
|
||||
"status_code": response.status_code,
|
||||
"body": response.text if not files else "",
|
||||
"headers": response.headers,
|
||||
"files": files,
|
||||
},
|
||||
process_data={
|
||||
"request": http_executor.to_log(),
|
||||
},
|
||||
error=f"Request failed with status code {response.status_code}",
|
||||
error_type="HTTPResponseCodeError",
|
||||
)
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
outputs={
|
||||
@ -83,6 +98,7 @@ class HttpRequestNode(BaseNode[HttpRequestNodeData]):
|
||||
status=WorkflowNodeExecutionStatus.FAILED,
|
||||
error=str(e),
|
||||
process_data=process_data,
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@ -164,7 +164,9 @@ class IterationNode(BaseNode[IterationNodeData]):
|
||||
if self.node_data.is_parallel:
|
||||
futures: list[Future] = []
|
||||
q: Queue = Queue()
|
||||
thread_pool = GraphEngineThreadPool(max_workers=self.node_data.parallel_nums, max_submit_count=100)
|
||||
thread_pool = GraphEngineThreadPool(
|
||||
max_workers=self.node_data.parallel_nums, max_submit_count=dify_config.MAX_SUBMIT_COUNT
|
||||
)
|
||||
for index, item in enumerate(iterator_list_value):
|
||||
future: Future = thread_pool.submit(
|
||||
self._run_single_iter_parallel,
|
||||
@ -184,7 +186,6 @@ class IterationNode(BaseNode[IterationNodeData]):
|
||||
future.add_done_callback(thread_pool.task_done_callback)
|
||||
futures.append(future)
|
||||
succeeded_count = 0
|
||||
empty_count = 0
|
||||
while True:
|
||||
try:
|
||||
event = q.get(timeout=1)
|
||||
@ -598,3 +599,4 @@ class IterationNode(BaseNode[IterationNodeData]):
|
||||
parallel_mode_run_id=parallel_mode_run_id,
|
||||
):
|
||||
q.put(event)
|
||||
graph_engine.graph_runtime_state.total_tokens += graph_engine_copy.graph_runtime_state.total_tokens
|
||||
|
@ -193,6 +193,7 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
error=str(e),
|
||||
inputs=node_inputs,
|
||||
process_data=process_data,
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
)
|
||||
return
|
||||
@ -615,16 +616,35 @@ class LLMNode(BaseNode[LLMNodeData]):
|
||||
)
|
||||
# Insert histories into the prompt
|
||||
prompt_content = prompt_messages[0].content
|
||||
if "#histories#" in prompt_content:
|
||||
prompt_content = prompt_content.replace("#histories#", memory_text)
|
||||
# For issue #11247 - Check if prompt content is a string or a list
|
||||
prompt_content_type = type(prompt_content)
|
||||
if prompt_content_type == str:
|
||||
if "#histories#" in prompt_content:
|
||||
prompt_content = prompt_content.replace("#histories#", memory_text)
|
||||
else:
|
||||
prompt_content = memory_text + "\n" + prompt_content
|
||||
prompt_messages[0].content = prompt_content
|
||||
elif prompt_content_type == list:
|
||||
for content_item in prompt_content:
|
||||
if content_item.type == PromptMessageContentType.TEXT:
|
||||
if "#histories#" in content_item.data:
|
||||
content_item.data = content_item.data.replace("#histories#", memory_text)
|
||||
else:
|
||||
content_item.data = memory_text + "\n" + content_item.data
|
||||
else:
|
||||
prompt_content = memory_text + "\n" + prompt_content
|
||||
prompt_messages[0].content = prompt_content
|
||||
raise ValueError("Invalid prompt content type")
|
||||
|
||||
# Add current query to the prompt message
|
||||
if user_query:
|
||||
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
|
||||
prompt_messages[0].content = prompt_content
|
||||
if prompt_content_type == str:
|
||||
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
|
||||
prompt_messages[0].content = prompt_content
|
||||
elif prompt_content_type == list:
|
||||
for content_item in prompt_content:
|
||||
if content_item.type == PromptMessageContentType.TEXT:
|
||||
content_item.data = user_query + "\n" + content_item.data
|
||||
else:
|
||||
raise ValueError("Invalid prompt content type")
|
||||
else:
|
||||
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))
|
||||
|
||||
|
@ -139,7 +139,7 @@ class QuestionClassifierNode(LLMNode):
|
||||
"usage": jsonable_encoder(usage),
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
outputs = {"class_name": category_name}
|
||||
outputs = {"class_name": category_name, "class_id": category_id}
|
||||
|
||||
return NodeRunResult(
|
||||
status=WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
|
@ -68,6 +68,7 @@ class ToolNode(BaseNode[ToolNodeData]):
|
||||
inputs={},
|
||||
metadata={NodeRunMetadataKey.TOOL_INFO: tool_info},
|
||||
error=f"Failed to get tool runtime: {str(e)}",
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
)
|
||||
return
|
||||
@ -104,8 +105,10 @@ class ToolNode(BaseNode[ToolNodeData]):
|
||||
inputs=parameters_for_log,
|
||||
metadata={NodeRunMetadataKey.TOOL_INFO: tool_info},
|
||||
error=f"Failed to invoke tool: {str(e)}",
|
||||
error_type=type(e).__name__,
|
||||
)
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
# convert tool messages
|
||||
|
@ -34,7 +34,6 @@ else
|
||||
--workers ${SERVER_WORKER_AMOUNT:-1} \
|
||||
--worker-class ${SERVER_WORKER_CLASS:-gevent} \
|
||||
--timeout ${GUNICORN_TIMEOUT:-200} \
|
||||
--preload \
|
||||
app:app
|
||||
fi
|
||||
fi
|
||||
|
@ -1,8 +1,11 @@
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
import flask
|
||||
|
||||
from configs import dify_config
|
||||
from dify_app import DifyApp
|
||||
|
||||
@ -22,11 +25,14 @@ def init_app(app: DifyApp):
|
||||
)
|
||||
|
||||
# Always add StreamHandler to log to console
|
||||
log_handlers.append(logging.StreamHandler(sys.stdout))
|
||||
sh = logging.StreamHandler(sys.stdout)
|
||||
sh.addFilter(RequestIdFilter())
|
||||
log_formatter = logging.Formatter(fmt=dify_config.LOG_FORMAT)
|
||||
sh.setFormatter(log_formatter)
|
||||
log_handlers.append(sh)
|
||||
|
||||
logging.basicConfig(
|
||||
level=dify_config.LOG_LEVEL,
|
||||
format=dify_config.LOG_FORMAT,
|
||||
datefmt=dify_config.LOG_DATEFORMAT,
|
||||
handlers=log_handlers,
|
||||
force=True,
|
||||
@ -44,3 +50,22 @@ def init_app(app: DifyApp):
|
||||
|
||||
for handler in logging.root.handlers:
|
||||
handler.formatter.converter = time_converter
|
||||
|
||||
|
||||
def get_request_id():
|
||||
if getattr(flask.g, "request_id", None):
|
||||
return flask.g.request_id
|
||||
|
||||
new_uuid = uuid.uuid4().hex[:10]
|
||||
flask.g.request_id = new_uuid
|
||||
|
||||
return new_uuid
|
||||
|
||||
|
||||
class RequestIdFilter(logging.Filter):
|
||||
# This is a logging filter that makes the request ID available for use in
|
||||
# the logging format. Note that we're checking if we're in a request
|
||||
# context, as we may want to log things before Flask is fully loaded.
|
||||
def filter(self, record):
|
||||
record.req_id = get_request_id() if flask.has_request_context() else ""
|
||||
return True
|
||||
|
@ -1,31 +1,43 @@
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Callable, Generator, Mapping
|
||||
from typing import Union
|
||||
|
||||
from flask import Flask
|
||||
|
||||
from configs import dify_config
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from dify_app import DifyApp
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
from extensions.storage.storage_type import StorageType
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Storage:
|
||||
def __init__(self):
|
||||
self.storage_runner = None
|
||||
|
||||
def init_app(self, app: Flask):
|
||||
storage_factory = self.get_storage_factory(dify_config.STORAGE_TYPE)
|
||||
with app.app_context():
|
||||
self.storage_runner = storage_factory()
|
||||
|
||||
@staticmethod
|
||||
def get_storage_factory(storage_type: str) -> type[BaseStorage]:
|
||||
def get_storage_factory(storage_type: str) -> Callable[[], BaseStorage]:
|
||||
match storage_type:
|
||||
case StorageType.S3:
|
||||
from extensions.storage.aws_s3_storage import AwsS3Storage
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
return AwsS3Storage
|
||||
kwargs = _load_s3_storage_kwargs()
|
||||
return lambda: OpenDALStorage(scheme=OpenDALScheme.S3, **kwargs)
|
||||
case StorageType.OPENDAL:
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
scheme = OpenDALScheme(dify_config.STORAGE_OPENDAL_SCHEME)
|
||||
kwargs = _load_opendal_storage_kwargs(scheme)
|
||||
return lambda: OpenDALStorage(scheme=scheme, **kwargs)
|
||||
case StorageType.LOCAL:
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
|
||||
kwargs = _load_local_storage_kwargs()
|
||||
return lambda: OpenDALStorage(scheme=OpenDALScheme.FS, **kwargs)
|
||||
case StorageType.AZURE_BLOB:
|
||||
from extensions.storage.azure_blob_storage import AzureBlobStorage
|
||||
|
||||
@ -62,16 +74,14 @@ class Storage:
|
||||
from extensions.storage.supabase_storage import SupabaseStorage
|
||||
|
||||
return SupabaseStorage
|
||||
case StorageType.LOCAL | _:
|
||||
from extensions.storage.local_fs_storage import LocalFsStorage
|
||||
|
||||
return LocalFsStorage
|
||||
case _:
|
||||
raise ValueError(f"Unsupported storage type {storage_type}")
|
||||
|
||||
def save(self, filename, data):
|
||||
try:
|
||||
self.storage_runner.save(filename, data)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to save file {filename}")
|
||||
logger.exception(f"Failed to save file {filename}")
|
||||
raise e
|
||||
|
||||
def load(self, filename: str, /, *, stream: bool = False) -> Union[bytes, Generator]:
|
||||
@ -81,45 +91,120 @@ class Storage:
|
||||
else:
|
||||
return self.load_once(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load file {filename}")
|
||||
logger.exception(f"Failed to load file {filename}")
|
||||
raise e
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
try:
|
||||
return self.storage_runner.load_once(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load_once file {filename}")
|
||||
logger.exception(f"Failed to load_once file {filename}")
|
||||
raise e
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
try:
|
||||
return self.storage_runner.load_stream(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to load_stream file {filename}")
|
||||
logger.exception(f"Failed to load_stream file {filename}")
|
||||
raise e
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
try:
|
||||
self.storage_runner.download(filename, target_filepath)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to download file {filename}")
|
||||
logger.exception(f"Failed to download file {filename}")
|
||||
raise e
|
||||
|
||||
def exists(self, filename):
|
||||
try:
|
||||
return self.storage_runner.exists(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to check file exists {filename}")
|
||||
logger.exception(f"Failed to check file exists {filename}")
|
||||
raise e
|
||||
|
||||
def delete(self, filename):
|
||||
try:
|
||||
return self.storage_runner.delete(filename)
|
||||
except Exception as e:
|
||||
logging.exception(f"Failed to delete file {filename}")
|
||||
logger.exception(f"Failed to delete file {filename}")
|
||||
raise e
|
||||
|
||||
|
||||
def _load_s3_storage_kwargs() -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for S3 storage based on dify_config.
|
||||
Handles special cases like AWS managed IAM and R2.
|
||||
"""
|
||||
kwargs = {
|
||||
"root": "/",
|
||||
"bucket": dify_config.S3_BUCKET_NAME,
|
||||
"endpoint": dify_config.S3_ENDPOINT,
|
||||
"access_key_id": dify_config.S3_ACCESS_KEY,
|
||||
"secret_access_key": dify_config.S3_SECRET_KEY,
|
||||
"region": dify_config.S3_REGION,
|
||||
}
|
||||
kwargs = {k: v for k, v in kwargs.items() if isinstance(v, str)}
|
||||
|
||||
# For AWS managed IAM
|
||||
if dify_config.S3_USE_AWS_MANAGED_IAM:
|
||||
from extensions.storage.opendal_storage import S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS
|
||||
|
||||
logger.debug("Using AWS managed IAM role for S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS.items() if k not in kwargs}}
|
||||
|
||||
# For Cloudflare R2
|
||||
if kwargs.get("endpoint"):
|
||||
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
|
||||
|
||||
if is_r2_endpoint(kwargs["endpoint"]):
|
||||
logger.debug("Using R2 for OpenDAL S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
|
||||
|
||||
return kwargs
|
||||
|
||||
|
||||
def _load_local_storage_kwargs() -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for local storage based on dify_config.
|
||||
"""
|
||||
return {
|
||||
"root": dify_config.STORAGE_LOCAL_PATH,
|
||||
}
|
||||
|
||||
|
||||
def _load_opendal_storage_kwargs(scheme: OpenDALScheme) -> Mapping[str, str]:
|
||||
"""
|
||||
Load the kwargs for OpenDAL storage based on the given scheme.
|
||||
"""
|
||||
match scheme:
|
||||
case OpenDALScheme.FS:
|
||||
kwargs = {
|
||||
"root": dify_config.OPENDAL_FS_ROOT,
|
||||
}
|
||||
case OpenDALScheme.S3:
|
||||
# Load OpenDAL S3-related configs
|
||||
kwargs = {
|
||||
"root": dify_config.OPENDAL_S3_ROOT,
|
||||
"bucket": dify_config.OPENDAL_S3_BUCKET,
|
||||
"endpoint": dify_config.OPENDAL_S3_ENDPOINT,
|
||||
"access_key_id": dify_config.OPENDAL_S3_ACCESS_KEY_ID,
|
||||
"secret_access_key": dify_config.OPENDAL_S3_SECRET_ACCESS_KEY,
|
||||
"region": dify_config.OPENDAL_S3_REGION,
|
||||
}
|
||||
|
||||
# For Cloudflare R2
|
||||
if kwargs.get("endpoint"):
|
||||
from extensions.storage.opendal_storage import S3_R2_COMPATIBLE_KWARGS, is_r2_endpoint
|
||||
|
||||
if is_r2_endpoint(kwargs["endpoint"]):
|
||||
logger.debug("Using R2 for OpenDAL S3")
|
||||
kwargs = {**kwargs, **{k: v for k, v in S3_R2_COMPATIBLE_KWARGS.items() if k not in kwargs}}
|
||||
case _:
|
||||
logger.warning(f"Unrecognized OpenDAL scheme: {scheme}, will fall back to default.")
|
||||
kwargs = {}
|
||||
return kwargs
|
||||
|
||||
|
||||
storage = Storage()
|
||||
|
||||
|
||||
|
@ -7,9 +7,6 @@ from collections.abc import Generator
|
||||
class BaseStorage(ABC):
|
||||
"""Interface for file storage."""
|
||||
|
||||
def __init__(self): # noqa: B027
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def save(self, filename, data):
|
||||
raise NotImplementedError
|
||||
|
@ -1,62 +0,0 @@
|
||||
import os
|
||||
import shutil
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
from flask import current_app
|
||||
|
||||
from configs import dify_config
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
|
||||
class LocalFsStorage(BaseStorage):
|
||||
"""Implementation for local filesystem storage."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
folder = dify_config.STORAGE_LOCAL_PATH
|
||||
if not os.path.isabs(folder):
|
||||
folder = os.path.join(current_app.root_path, folder)
|
||||
self.folder = folder
|
||||
|
||||
def _build_filepath(self, filename: str) -> str:
|
||||
"""Build the full file path based on the folder and filename."""
|
||||
if not self.folder or self.folder.endswith("/"):
|
||||
return self.folder + filename
|
||||
else:
|
||||
return self.folder + "/" + filename
|
||||
|
||||
def save(self, filename, data):
|
||||
filepath = self._build_filepath(filename)
|
||||
folder = os.path.dirname(filepath)
|
||||
os.makedirs(folder, exist_ok=True)
|
||||
Path(os.path.join(os.getcwd(), filepath)).write_bytes(data)
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
return Path(filepath).read_bytes()
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
with open(filepath, "rb") as f:
|
||||
while chunk := f.read(4096): # Read in chunks of 4KB
|
||||
yield chunk
|
||||
|
||||
def download(self, filename, target_filepath):
|
||||
filepath = self._build_filepath(filename)
|
||||
if not os.path.exists(filepath):
|
||||
raise FileNotFoundError("File not found")
|
||||
shutil.copyfile(filepath, target_filepath)
|
||||
|
||||
def exists(self, filename):
|
||||
filepath = self._build_filepath(filename)
|
||||
return os.path.exists(filepath)
|
||||
|
||||
def delete(self, filename):
|
||||
filepath = self._build_filepath(filename)
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
72
api/extensions/storage/opendal_storage.py
Normal file
72
api/extensions/storage/opendal_storage.py
Normal file
@ -0,0 +1,72 @@
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import opendal
|
||||
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
S3_R2_HOSTNAME = "r2.cloudflarestorage.com"
|
||||
S3_R2_COMPATIBLE_KWARGS = {
|
||||
"delete_max_size": "700",
|
||||
"disable_stat_with_override": "true",
|
||||
"region": "auto",
|
||||
}
|
||||
S3_SSE_WITH_AWS_MANAGED_IAM_KWARGS = {
|
||||
"server_side_encryption": "aws:kms",
|
||||
}
|
||||
|
||||
|
||||
def is_r2_endpoint(endpoint: str) -> bool:
|
||||
if not endpoint:
|
||||
return False
|
||||
|
||||
parsed_url = urlparse(endpoint)
|
||||
return bool(parsed_url.hostname and parsed_url.hostname.endswith(S3_R2_HOSTNAME))
|
||||
|
||||
|
||||
class OpenDALStorage(BaseStorage):
|
||||
def __init__(self, scheme: OpenDALScheme, **kwargs):
|
||||
if scheme == OpenDALScheme.FS:
|
||||
Path(kwargs["root"]).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.op = opendal.Operator(scheme=scheme, **kwargs)
|
||||
|
||||
def save(self, filename: str, data: bytes) -> None:
|
||||
self.op.write(path=filename, bs=data)
|
||||
|
||||
def load_once(self, filename: str) -> bytes:
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
return self.op.read(path=filename)
|
||||
|
||||
def load_stream(self, filename: str) -> Generator:
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
batch_size = 4096
|
||||
file = self.op.open(path=filename, mode="rb")
|
||||
while chunk := file.read(batch_size):
|
||||
yield chunk
|
||||
|
||||
def download(self, filename: str, target_filepath: str):
|
||||
if not self.exists(filename):
|
||||
raise FileNotFoundError("File not found")
|
||||
|
||||
with Path(target_filepath).open("wb") as f:
|
||||
f.write(self.op.read(path=filename))
|
||||
|
||||
def exists(self, filename: str) -> bool:
|
||||
# FIXME this is a workaround for opendal python-binding do not have a exists method and no better
|
||||
# error handler here when opendal python-binding has a exists method, we should use it
|
||||
# more https://github.com/apache/opendal/blob/main/bindings/python/src/operator.rs
|
||||
try:
|
||||
return self.op.stat(path=filename).mode.is_file()
|
||||
except Exception as e:
|
||||
return False
|
||||
|
||||
def delete(self, filename: str):
|
||||
if self.exists(filename):
|
||||
self.op.delete(path=filename)
|
@ -9,6 +9,7 @@ class StorageType(StrEnum):
|
||||
HUAWEI_OBS = "huawei-obs"
|
||||
LOCAL = "local"
|
||||
OCI_STORAGE = "oci-storage"
|
||||
OPENDAL = "opendal"
|
||||
S3 = "s3"
|
||||
TENCENT_COS = "tencent-cos"
|
||||
VOLCENGINE_TOS = "volcengine-tos"
|
||||
|
@ -14,6 +14,7 @@ workflow_run_for_log_fields = {
|
||||
"total_steps": fields.Integer,
|
||||
"created_at": TimestampField,
|
||||
"finished_at": TimestampField,
|
||||
"exceptions_count": fields.Integer,
|
||||
}
|
||||
|
||||
workflow_run_for_list_fields = {
|
||||
@ -27,6 +28,7 @@ workflow_run_for_list_fields = {
|
||||
"created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True),
|
||||
"created_at": TimestampField,
|
||||
"finished_at": TimestampField,
|
||||
"exceptions_count": fields.Integer,
|
||||
}
|
||||
|
||||
advanced_chat_workflow_run_for_list_fields = {
|
||||
@ -42,6 +44,7 @@ advanced_chat_workflow_run_for_list_fields = {
|
||||
"created_by_account": fields.Nested(simple_account_fields, attribute="created_by_account", allow_null=True),
|
||||
"created_at": TimestampField,
|
||||
"finished_at": TimestampField,
|
||||
"exceptions_count": fields.Integer,
|
||||
}
|
||||
|
||||
advanced_chat_workflow_run_pagination_fields = {
|
||||
@ -73,6 +76,7 @@ workflow_run_detail_fields = {
|
||||
"created_by_end_user": fields.Nested(simple_end_user_fields, attribute="created_by_end_user", allow_null=True),
|
||||
"created_at": TimestampField,
|
||||
"finished_at": TimestampField,
|
||||
"exceptions_count": fields.Integer,
|
||||
}
|
||||
|
||||
workflow_run_node_execution_fields = {
|
||||
|
@ -9,7 +9,7 @@ import uuid
|
||||
from collections.abc import Generator
|
||||
from datetime import datetime
|
||||
from hashlib import sha256
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
||||
|
||||
from flask import Response, stream_with_context
|
||||
from flask_restful import fields
|
||||
@ -170,11 +170,11 @@ def generate_string(n):
|
||||
|
||||
def extract_remote_ip(request) -> str:
|
||||
if request.headers.get("CF-Connecting-IP"):
|
||||
return request.headers.get("Cf-Connecting-Ip")
|
||||
return cast(str, request.headers.get("Cf-Connecting-Ip"))
|
||||
elif request.headers.getlist("X-Forwarded-For"):
|
||||
return request.headers.getlist("X-Forwarded-For")[0]
|
||||
return cast(str, request.headers.getlist("X-Forwarded-For")[0])
|
||||
else:
|
||||
return request.remote_addr
|
||||
return cast(str, request.remote_addr)
|
||||
|
||||
|
||||
def generate_text_hash(text: str) -> str:
|
||||
@ -221,12 +221,14 @@ class TokenManager:
|
||||
token_data.update(additional_data)
|
||||
|
||||
expiry_minutes = dify_config.model_dump().get(f"{token_type.upper()}_TOKEN_EXPIRY_MINUTES")
|
||||
if expiry_minutes is None:
|
||||
raise ValueError(f"Expiry minutes for {token_type} token is not set")
|
||||
token_key = cls._get_token_key(token, token_type)
|
||||
expiry_time = int(expiry_minutes * 60)
|
||||
redis_client.setex(token_key, expiry_time, json.dumps(token_data))
|
||||
|
||||
if account_id:
|
||||
cls._set_current_token_for_account(account.id, token, token_type, expiry_minutes)
|
||||
cls._set_current_token_for_account(account_id, token, token_type, expiry_minutes)
|
||||
|
||||
return token
|
||||
|
||||
|
@ -0,0 +1,33 @@
|
||||
"""add exceptions_count field to WorkflowRun model
|
||||
|
||||
Revision ID: cf8f4fc45278
|
||||
Revises: 01d6889832f7
|
||||
Create Date: 2024-11-28 05:53:21.576178
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import models as models
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'cf8f4fc45278'
|
||||
down_revision = '01d6889832f7'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
|
||||
batch_op.add_column(sa.Column('exceptions_count', sa.Integer(), server_default=sa.text('0'), nullable=True))
|
||||
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade():
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
with op.batch_alter_table('workflow_runs', schema=None) as batch_op:
|
||||
batch_op.drop_column('exceptions_count')
|
||||
|
||||
# ### end Alembic commands ###
|
@ -230,8 +230,10 @@ class Workflow(Base):
|
||||
from models.tools import WorkflowToolProvider
|
||||
|
||||
return (
|
||||
db.session.query(WorkflowToolProvider).filter(WorkflowToolProvider.app_id == self.app_id).first()
|
||||
is not None
|
||||
db.session.query(WorkflowToolProvider)
|
||||
.filter(WorkflowToolProvider.tenant_id == self.tenant_id, WorkflowToolProvider.app_id == self.app_id)
|
||||
.count()
|
||||
> 0
|
||||
)
|
||||
|
||||
@property
|
||||
@ -330,6 +332,7 @@ class WorkflowRunStatus(StrEnum):
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
STOPPED = "stopped"
|
||||
PARTIAL_SUCCESSED = "partial-succeeded"
|
||||
|
||||
@classmethod
|
||||
def value_of(cls, value: str) -> "WorkflowRunStatus":
|
||||
@ -400,7 +403,7 @@ class WorkflowRun(Base):
|
||||
version = db.Column(db.String(255), nullable=False)
|
||||
graph = db.Column(db.Text)
|
||||
inputs = db.Column(db.Text)
|
||||
status = db.Column(db.String(255), nullable=False) # running, succeeded, failed, stopped
|
||||
status = db.Column(db.String(255), nullable=False) # running, succeeded, failed, stopped, partial-succeeded
|
||||
outputs: Mapped[str] = mapped_column(sa.Text, default="{}")
|
||||
error = db.Column(db.Text)
|
||||
elapsed_time = db.Column(db.Float, nullable=False, server_default=db.text("0"))
|
||||
@ -410,6 +413,7 @@ class WorkflowRun(Base):
|
||||
created_by = db.Column(StringUUID, nullable=False)
|
||||
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
|
||||
finished_at = db.Column(db.DateTime)
|
||||
exceptions_count = db.Column(db.Integer, server_default=db.text("0"))
|
||||
|
||||
@property
|
||||
def created_by_account(self):
|
||||
@ -469,6 +473,7 @@ class WorkflowRun(Base):
|
||||
"created_by": self.created_by,
|
||||
"created_at": self.created_at,
|
||||
"finished_at": self.finished_at,
|
||||
"exceptions_count": self.exceptions_count,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -494,6 +499,7 @@ class WorkflowRun(Base):
|
||||
created_by=data.get("created_by"),
|
||||
created_at=data.get("created_at"),
|
||||
finished_at=data.get("finished_at"),
|
||||
exceptions_count=data.get("exceptions_count"),
|
||||
)
|
||||
|
||||
|
||||
@ -527,6 +533,7 @@ class WorkflowNodeExecutionStatus(Enum):
|
||||
RUNNING = "running"
|
||||
SUCCEEDED = "succeeded"
|
||||
FAILED = "failed"
|
||||
EXCEPTION = "exception"
|
||||
|
||||
@classmethod
|
||||
def value_of(cls, value: str) -> "WorkflowNodeExecutionStatus":
|
||||
|
892
api/poetry.lock
generated
892
api/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -45,7 +45,7 @@ google-auth-httplib2 = "0.2.0"
|
||||
google-cloud-aiplatform = "1.49.0"
|
||||
google-generativeai = "0.8.1"
|
||||
googleapis-common-protos = "1.63.0"
|
||||
gunicorn = "~22.0.0"
|
||||
gunicorn = "~23.0.0"
|
||||
httpx = { version = "~0.27.0", extras = ["socks"] }
|
||||
huggingface-hub = "~0.16.4"
|
||||
jieba = "0.42.1"
|
||||
@ -134,6 +134,7 @@ bce-python-sdk = "~0.9.23"
|
||||
cos-python-sdk-v5 = "1.9.30"
|
||||
esdk-obs-python = "3.24.6.1"
|
||||
google-cloud-storage = "2.16.0"
|
||||
opendal = "~0.45.12"
|
||||
oss2 = "2.18.5"
|
||||
supabase = "~2.8.1"
|
||||
tos = "~2.7.1"
|
||||
|
@ -36,14 +36,16 @@ def clean_messages():
|
||||
db.session.query(Message)
|
||||
.filter(Message.created_at < plan_sandbox_clean_message_day)
|
||||
.order_by(Message.created_at.desc())
|
||||
.paginate(page=page, per_page=100)
|
||||
.limit(100)
|
||||
.all()
|
||||
)
|
||||
|
||||
except NotFound:
|
||||
break
|
||||
if messages.items is None or len(messages.items) == 0:
|
||||
if not messages:
|
||||
break
|
||||
for message in messages.items:
|
||||
for message in messages:
|
||||
plan_sandbox_clean_message_day = message.created_at
|
||||
app = App.query.filter_by(id=message.app_id).first()
|
||||
features_cache_key = f"features:{app.tenant_id}"
|
||||
plan_cache = redis_client.get(features_cache_key)
|
||||
|
@ -102,6 +102,9 @@ class AppGenerateService:
|
||||
raise ValueError(f"Invalid app mode {app_model.mode}")
|
||||
except RateLimitError as e:
|
||||
raise InvokeRateLimitError(str(e))
|
||||
except Exception:
|
||||
rate_limit.exit(request_id)
|
||||
raise
|
||||
finally:
|
||||
if not streaming:
|
||||
rate_limit.exit(request_id)
|
||||
|
@ -69,7 +69,10 @@ class ExternalDatasetService:
|
||||
endpoint = f"{settings['endpoint']}/retrieval"
|
||||
api_key = settings["api_key"]
|
||||
if not validators.url(endpoint, simple_host=True):
|
||||
raise ValueError(f"invalid endpoint: {endpoint}")
|
||||
if not endpoint.startswith("http://") and not endpoint.startswith("https://"):
|
||||
raise ValueError(f"invalid endpoint: {endpoint} must start with http:// or https://")
|
||||
else:
|
||||
raise ValueError(f"invalid endpoint: {endpoint}")
|
||||
try:
|
||||
response = httpx.post(endpoint, headers={"Authorization": f"Bearer {api_key}"})
|
||||
except Exception as e:
|
||||
|
@ -13,6 +13,7 @@ from core.workflow.errors import WorkflowNodeRunFailedError
|
||||
from core.workflow.graph_engine.entities.event import InNodeEvent
|
||||
from core.workflow.nodes import NodeType
|
||||
from core.workflow.nodes.base.node import BaseNode
|
||||
from core.workflow.nodes.enums import ErrorStrategy
|
||||
from core.workflow.nodes.event import RunCompletedEvent
|
||||
from core.workflow.nodes.event.types import NodeEvent
|
||||
from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING
|
||||
@ -295,8 +296,35 @@ class WorkflowService:
|
||||
|
||||
if not node_run_result:
|
||||
raise ValueError("Node run failed with no run result")
|
||||
|
||||
run_succeeded = True if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED else False
|
||||
# single step debug mode error handling return
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.FAILED and node_instance.should_continue_on_error:
|
||||
node_error_args = {
|
||||
"status": WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
"error": node_run_result.error,
|
||||
"inputs": node_run_result.inputs,
|
||||
"metadata": {"error_strategy": node_instance.node_data.error_strategy},
|
||||
}
|
||||
if node_instance.node_data.error_strategy is ErrorStrategy.DEFAULT_VALUE:
|
||||
node_run_result = NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
**node_instance.node_data.default_value_dict,
|
||||
"error_message": node_run_result.error,
|
||||
"error_type": node_run_result.error_type,
|
||||
},
|
||||
)
|
||||
else:
|
||||
node_run_result = NodeRunResult(
|
||||
**node_error_args,
|
||||
outputs={
|
||||
"error_message": node_run_result.error,
|
||||
"error_type": node_run_result.error_type,
|
||||
},
|
||||
)
|
||||
run_succeeded = node_run_result.status in (
|
||||
WorkflowNodeExecutionStatus.SUCCEEDED,
|
||||
WorkflowNodeExecutionStatus.EXCEPTION,
|
||||
)
|
||||
error = node_run_result.error if not run_succeeded else None
|
||||
except WorkflowNodeRunFailedError as e:
|
||||
node_instance = e.node_instance
|
||||
@ -315,7 +343,6 @@ class WorkflowService:
|
||||
workflow_node_execution.created_by_role = CreatedByRole.ACCOUNT.value
|
||||
workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
workflow_node_execution.finished_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
|
||||
if run_succeeded and node_run_result:
|
||||
# create workflow node execution
|
||||
inputs = WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None
|
||||
@ -332,7 +359,11 @@ class WorkflowService:
|
||||
workflow_node_execution.execution_metadata = (
|
||||
json.dumps(jsonable_encoder(node_run_result.metadata)) if node_run_result.metadata else None
|
||||
)
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
|
||||
if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED:
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value
|
||||
elif node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION:
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.EXCEPTION.value
|
||||
workflow_node_execution.error = node_run_result.error
|
||||
else:
|
||||
# create workflow node execution
|
||||
workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value
|
||||
|
@ -3,7 +3,7 @@ import logging
|
||||
import time
|
||||
|
||||
import click
|
||||
from celery import shared_task
|
||||
from celery import shared_task # type: ignore
|
||||
|
||||
from core.indexing_runner import DocumentIsPausedError
|
||||
from extensions.ext_database import db
|
||||
@ -68,11 +68,9 @@ def external_document_indexing_task(
|
||||
settings = ExternalDatasetService.get_external_knowledge_api_settings(
|
||||
json.loads(external_knowledge_api.settings)
|
||||
)
|
||||
# assemble headers
|
||||
headers = ExternalDatasetService.assembling_headers(settings.authorization, settings.headers)
|
||||
|
||||
# do http request
|
||||
response = ExternalDatasetService.process_external_api(settings, headers, process_parameter, files)
|
||||
response = ExternalDatasetService.process_external_api(settings, files)
|
||||
job_id = response.json().get("job_id")
|
||||
if job_id:
|
||||
# save job_id to dataset
|
||||
|
@ -7,9 +7,10 @@ env = environs.Env()
|
||||
|
||||
|
||||
class Config:
|
||||
SEARCH_ENDPOINT = env.str("SEARCH_ENDPOINT", "http://ld-*************-proxy-search-pub.lindorm.aliyuncs.com:30070")
|
||||
SEARCH_ENDPOINT = env.str("SEARCH_ENDPOINT", "http://ld-************-proxy-search-pub.lindorm.aliyuncs.com:30070")
|
||||
SEARCH_USERNAME = env.str("SEARCH_USERNAME", "ADMIN")
|
||||
SEARCH_PWD = env.str("SEARCH_PWD", "PWD")
|
||||
SEARCH_PWD = env.str("SEARCH_PWD", "ADMIN")
|
||||
USING_UGC = env.bool("USING_UGC", True)
|
||||
|
||||
|
||||
class TestLindormVectorStore(AbstractVectorTest):
|
||||
@ -31,5 +32,27 @@ class TestLindormVectorStore(AbstractVectorTest):
|
||||
assert ids[0] == self.example_doc_id
|
||||
|
||||
|
||||
def test_lindorm_vector(setup_mock_redis):
|
||||
class TestLindormVectorStoreUGC(AbstractVectorTest):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.vector = LindormVectorStore(
|
||||
collection_name="ugc_index_test",
|
||||
config=LindormVectorStoreConfig(
|
||||
hosts=Config.SEARCH_ENDPOINT,
|
||||
username=Config.SEARCH_USERNAME,
|
||||
password=Config.SEARCH_PWD,
|
||||
using_ugc=Config.USING_UGC,
|
||||
),
|
||||
routing_value=self.collection_name,
|
||||
)
|
||||
|
||||
def get_ids_by_metadata_field(self):
|
||||
ids = self.vector.get_ids_by_metadata_field(key="doc_id", value=self.example_doc_id)
|
||||
assert ids is not None
|
||||
assert len(ids) == 1
|
||||
assert ids[0] == self.example_doc_id
|
||||
|
||||
|
||||
def test_lindorm_vector_ugc(setup_mock_redis):
|
||||
TestLindormVectorStore().run_all_tests()
|
||||
TestLindormVectorStoreUGC().run_all_tests()
|
||||
|
@ -12,11 +12,11 @@ def tidb_vector():
|
||||
return TiDBVector(
|
||||
collection_name="test_collection",
|
||||
config=TiDBVectorConfig(
|
||||
host="xxx.eu-central-1.xxx.aws.tidbcloud.com",
|
||||
port="4000",
|
||||
user="xxx.root",
|
||||
password="xxxxxx",
|
||||
database="dify",
|
||||
host="localhost",
|
||||
port=4000,
|
||||
user="root",
|
||||
password="",
|
||||
database="test",
|
||||
program_name="langgenius/dify",
|
||||
),
|
||||
)
|
||||
@ -27,35 +27,14 @@ class TiDBVectorTest(AbstractVectorTest):
|
||||
super().__init__()
|
||||
self.vector = vector
|
||||
|
||||
def text_exists(self):
|
||||
exist = self.vector.text_exists(self.example_doc_id)
|
||||
assert exist == False
|
||||
|
||||
def search_by_vector(self):
|
||||
hits_by_vector: list[Document] = self.vector.search_by_vector(query_vector=self.example_embedding)
|
||||
assert len(hits_by_vector) == 0
|
||||
|
||||
def search_by_full_text(self):
|
||||
hits_by_full_text: list[Document] = self.vector.search_by_full_text(query=get_example_text())
|
||||
assert len(hits_by_full_text) == 0
|
||||
|
||||
def get_ids_by_metadata_field(self):
|
||||
ids = self.vector.get_ids_by_metadata_field(key="document_id", value=self.example_doc_id)
|
||||
assert len(ids) == 0
|
||||
ids = self.vector.get_ids_by_metadata_field(key="doc_id", value=self.example_doc_id)
|
||||
assert len(ids) == 1
|
||||
|
||||
|
||||
def test_tidb_vector(setup_mock_redis, setup_tidbvector_mock, tidb_vector, mock_session):
|
||||
def test_tidb_vector(setup_mock_redis, tidb_vector):
|
||||
TiDBVectorTest(vector=tidb_vector).run_all_tests()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_session():
|
||||
with patch("core.rag.datasource.vdb.tidb_vector.tidb_vector.Session", new_callable=MagicMock) as mock_session:
|
||||
yield mock_session
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def setup_tidbvector_mock(tidb_vector, mock_session):
|
||||
with patch("core.rag.datasource.vdb.tidb_vector.tidb_vector.create_engine"):
|
||||
with patch.object(tidb_vector._engine, "connect"):
|
||||
yield tidb_vector
|
||||
|
20
api/tests/unit_tests/configs/test_opendal_config_parse.py
Normal file
20
api/tests/unit_tests/configs/test_opendal_config_parse.py
Normal file
@ -0,0 +1,20 @@
|
||||
import pytest
|
||||
|
||||
from extensions.storage.opendal_storage import is_r2_endpoint
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("endpoint", "expected"),
|
||||
[
|
||||
("https://bucket.r2.cloudflarestorage.com", True),
|
||||
("https://custom-domain.r2.cloudflarestorage.com/", True),
|
||||
("https://bucket.r2.cloudflarestorage.com/path", True),
|
||||
("https://s3.amazonaws.com", False),
|
||||
("https://storage.googleapis.com", False),
|
||||
("http://localhost:9000", False),
|
||||
("invalid-url", False),
|
||||
("", False),
|
||||
],
|
||||
)
|
||||
def test_is_r2_endpoint(endpoint: str, expected: bool):
|
||||
assert is_r2_endpoint(endpoint) == expected
|
@ -48,7 +48,7 @@ def test_executor_with_json_body_and_number_variable():
|
||||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == {}
|
||||
assert executor.params == []
|
||||
assert executor.json == {"number": 42}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
@ -101,7 +101,7 @@ def test_executor_with_json_body_and_object_variable():
|
||||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == {}
|
||||
assert executor.params == []
|
||||
assert executor.json == {"name": "John Doe", "age": 30, "email": "john@example.com"}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
@ -156,7 +156,7 @@ def test_executor_with_json_body_and_nested_object_variable():
|
||||
assert executor.method == "post"
|
||||
assert executor.url == "https://api.example.com/data"
|
||||
assert executor.headers == {"Content-Type": "application/json"}
|
||||
assert executor.params == {}
|
||||
assert executor.params == []
|
||||
assert executor.json == {"object": {"name": "John Doe", "age": 30, "email": "john@example.com"}}
|
||||
assert executor.data is None
|
||||
assert executor.files is None
|
||||
@ -195,7 +195,7 @@ def test_extract_selectors_from_template_with_newline():
|
||||
variable_pool=variable_pool,
|
||||
)
|
||||
|
||||
assert executor.params == {"test": "line1\nline2"}
|
||||
assert executor.params == [("test", "line1\nline2")]
|
||||
|
||||
|
||||
def test_executor_with_form_data():
|
||||
@ -244,7 +244,7 @@ def test_executor_with_form_data():
|
||||
assert executor.url == "https://api.example.com/upload"
|
||||
assert "Content-Type" in executor.headers
|
||||
assert "multipart/form-data" in executor.headers["Content-Type"]
|
||||
assert executor.params == {}
|
||||
assert executor.params == []
|
||||
assert executor.json is None
|
||||
assert executor.files is None
|
||||
assert executor.content is None
|
||||
@ -265,3 +265,72 @@ def test_executor_with_form_data():
|
||||
assert "Hello, World!" in raw_request
|
||||
assert "number_field" in raw_request
|
||||
assert "42" in raw_request
|
||||
|
||||
|
||||
def test_init_headers():
|
||||
def create_executor(headers: str) -> Executor:
|
||||
node_data = HttpRequestNodeData(
|
||||
title="test",
|
||||
method="get",
|
||||
url="http://example.com",
|
||||
headers=headers,
|
||||
params="",
|
||||
authorization=HttpRequestNodeAuthorization(type="no-auth"),
|
||||
)
|
||||
timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30)
|
||||
return Executor(node_data=node_data, timeout=timeout, variable_pool=VariablePool())
|
||||
|
||||
executor = create_executor("aa\n cc:")
|
||||
executor._init_headers()
|
||||
assert executor.headers == {"aa": "", "cc": ""}
|
||||
|
||||
executor = create_executor("aa:bb\n cc:dd")
|
||||
executor._init_headers()
|
||||
assert executor.headers == {"aa": "bb", "cc": "dd"}
|
||||
|
||||
executor = create_executor("aa:bb\n cc:dd\n")
|
||||
executor._init_headers()
|
||||
assert executor.headers == {"aa": "bb", "cc": "dd"}
|
||||
|
||||
executor = create_executor("aa:bb\n\n cc : dd\n\n")
|
||||
executor._init_headers()
|
||||
assert executor.headers == {"aa": "bb", "cc": "dd"}
|
||||
|
||||
|
||||
def test_init_params():
|
||||
def create_executor(params: str) -> Executor:
|
||||
node_data = HttpRequestNodeData(
|
||||
title="test",
|
||||
method="get",
|
||||
url="http://example.com",
|
||||
headers="",
|
||||
params=params,
|
||||
authorization=HttpRequestNodeAuthorization(type="no-auth"),
|
||||
)
|
||||
timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30)
|
||||
return Executor(node_data=node_data, timeout=timeout, variable_pool=VariablePool())
|
||||
|
||||
# Test basic key-value pairs
|
||||
executor = create_executor("key1:value1\nkey2:value2")
|
||||
executor._init_params()
|
||||
assert executor.params == [("key1", "value1"), ("key2", "value2")]
|
||||
|
||||
# Test empty values
|
||||
executor = create_executor("key1:\nkey2:")
|
||||
executor._init_params()
|
||||
assert executor.params == [("key1", ""), ("key2", "")]
|
||||
|
||||
# Test duplicate keys (which is allowed for params)
|
||||
executor = create_executor("key1:value1\nkey1:value2")
|
||||
executor._init_params()
|
||||
assert executor.params == [("key1", "value1"), ("key1", "value2")]
|
||||
|
||||
# Test whitespace handling
|
||||
executor = create_executor(" key1 : value1 \n key2 : value2 ")
|
||||
executor._init_params()
|
||||
assert executor.params == [("key1", "value1"), ("key2", "value2")]
|
||||
|
||||
# Test empty lines and extra whitespace
|
||||
executor = create_executor("key1:value1\n\nkey2:value2\n\n")
|
||||
executor._init_params()
|
||||
assert executor.params == [("key1", "value1"), ("key2", "value2")]
|
||||
|
@ -14,18 +14,10 @@ from core.workflow.nodes.http_request import (
|
||||
HttpRequestNodeBody,
|
||||
HttpRequestNodeData,
|
||||
)
|
||||
from core.workflow.nodes.http_request.executor import _plain_text_to_dict
|
||||
from models.enums import UserFrom
|
||||
from models.workflow import WorkflowNodeExecutionStatus, WorkflowType
|
||||
|
||||
|
||||
def test_plain_text_to_dict():
|
||||
assert _plain_text_to_dict("aa\n cc:") == {"aa": "", "cc": ""}
|
||||
assert _plain_text_to_dict("aa:bb\n cc:dd") == {"aa": "bb", "cc": "dd"}
|
||||
assert _plain_text_to_dict("aa:bb\n cc:dd\n") == {"aa": "bb", "cc": "dd"}
|
||||
assert _plain_text_to_dict("aa:bb\n\n cc : dd\n\n") == {"aa": "bb", "cc": "dd"}
|
||||
|
||||
|
||||
def test_http_request_node_binary_file(monkeypatch):
|
||||
data = HttpRequestNodeData(
|
||||
title="test",
|
||||
|
@ -0,0 +1,502 @@
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.workflow.enums import SystemVariableKey
|
||||
from core.workflow.graph_engine.entities.event import (
|
||||
GraphRunPartialSucceededEvent,
|
||||
GraphRunSucceededEvent,
|
||||
NodeRunExceptionEvent,
|
||||
NodeRunStreamChunkEvent,
|
||||
)
|
||||
from core.workflow.graph_engine.entities.graph import Graph
|
||||
from core.workflow.graph_engine.graph_engine import GraphEngine
|
||||
from models.enums import UserFrom
|
||||
from models.workflow import WorkflowType
|
||||
|
||||
|
||||
class ContinueOnErrorTestHelper:
|
||||
@staticmethod
|
||||
def get_code_node(code: str, error_strategy: str = "fail-branch", default_value: dict | None = None):
|
||||
"""Helper method to create a code node configuration"""
|
||||
node = {
|
||||
"id": "node",
|
||||
"data": {
|
||||
"outputs": {"result": {"type": "number"}},
|
||||
"error_strategy": error_strategy,
|
||||
"title": "code",
|
||||
"variables": [],
|
||||
"code_language": "python3",
|
||||
"code": "\n".join([line[4:] for line in code.split("\n")]),
|
||||
"type": "code",
|
||||
},
|
||||
}
|
||||
if default_value:
|
||||
node["data"]["default_value"] = default_value
|
||||
return node
|
||||
|
||||
@staticmethod
|
||||
def get_http_node(
|
||||
error_strategy: str = "fail-branch", default_value: dict | None = None, authorization_success: bool = False
|
||||
):
|
||||
"""Helper method to create a http node configuration"""
|
||||
authorization = (
|
||||
{
|
||||
"type": "api-key",
|
||||
"config": {
|
||||
"type": "basic",
|
||||
"api_key": "ak-xxx",
|
||||
"header": "api-key",
|
||||
},
|
||||
}
|
||||
if authorization_success
|
||||
else {
|
||||
"type": "api-key",
|
||||
# missing config field
|
||||
}
|
||||
)
|
||||
node = {
|
||||
"id": "node",
|
||||
"data": {
|
||||
"title": "http",
|
||||
"desc": "",
|
||||
"method": "get",
|
||||
"url": "http://example.com",
|
||||
"authorization": authorization,
|
||||
"headers": "X-Header:123",
|
||||
"params": "A:b",
|
||||
"body": None,
|
||||
"type": "http-request",
|
||||
"error_strategy": error_strategy,
|
||||
},
|
||||
}
|
||||
if default_value:
|
||||
node["data"]["default_value"] = default_value
|
||||
return node
|
||||
|
||||
@staticmethod
|
||||
def get_error_status_code_http_node(error_strategy: str = "fail-branch", default_value: dict | None = None):
|
||||
"""Helper method to create a http node configuration"""
|
||||
node = {
|
||||
"id": "node",
|
||||
"data": {
|
||||
"type": "http-request",
|
||||
"title": "HTTP Request",
|
||||
"desc": "",
|
||||
"variables": [],
|
||||
"method": "get",
|
||||
"url": "https://api.github.com/issues",
|
||||
"authorization": {"type": "no-auth", "config": None},
|
||||
"headers": "",
|
||||
"params": "",
|
||||
"body": {"type": "none", "data": []},
|
||||
"timeout": {"max_connect_timeout": 0, "max_read_timeout": 0, "max_write_timeout": 0},
|
||||
"error_strategy": error_strategy,
|
||||
},
|
||||
}
|
||||
if default_value:
|
||||
node["data"]["default_value"] = default_value
|
||||
return node
|
||||
|
||||
@staticmethod
|
||||
def get_tool_node(error_strategy: str = "fail-branch", default_value: dict | None = None):
|
||||
"""Helper method to create a tool node configuration"""
|
||||
node = {
|
||||
"id": "node",
|
||||
"data": {
|
||||
"title": "a",
|
||||
"desc": "a",
|
||||
"provider_id": "maths",
|
||||
"provider_type": "builtin",
|
||||
"provider_name": "maths",
|
||||
"tool_name": "eval_expression",
|
||||
"tool_label": "eval_expression",
|
||||
"tool_configurations": {},
|
||||
"tool_parameters": {
|
||||
"expression": {
|
||||
"type": "variable",
|
||||
"value": ["1", "123", "args1"],
|
||||
}
|
||||
},
|
||||
"type": "tool",
|
||||
"error_strategy": error_strategy,
|
||||
},
|
||||
}
|
||||
if default_value:
|
||||
node["data"]["default_value"] = default_value
|
||||
return node
|
||||
|
||||
@staticmethod
|
||||
def get_llm_node(error_strategy: str = "fail-branch", default_value: dict | None = None):
|
||||
"""Helper method to create a llm node configuration"""
|
||||
node = {
|
||||
"id": "node",
|
||||
"data": {
|
||||
"title": "123",
|
||||
"type": "llm",
|
||||
"model": {"provider": "openai", "name": "gpt-3.5-turbo", "mode": "chat", "completion_params": {}},
|
||||
"prompt_template": [
|
||||
{"role": "system", "text": "you are a helpful assistant.\ntoday's weather is {{#abc.output#}}."},
|
||||
{"role": "user", "text": "{{#sys.query#}}"},
|
||||
],
|
||||
"memory": None,
|
||||
"context": {"enabled": False},
|
||||
"vision": {"enabled": False},
|
||||
"error_strategy": error_strategy,
|
||||
},
|
||||
}
|
||||
if default_value:
|
||||
node["data"]["default_value"] = default_value
|
||||
return node
|
||||
|
||||
@staticmethod
|
||||
def create_test_graph_engine(graph_config: dict, user_inputs: dict | None = None):
|
||||
"""Helper method to create a graph engine instance for testing"""
|
||||
graph = Graph.init(graph_config=graph_config)
|
||||
variable_pool = {
|
||||
"system_variables": {
|
||||
SystemVariableKey.QUERY: "clear",
|
||||
SystemVariableKey.FILES: [],
|
||||
SystemVariableKey.CONVERSATION_ID: "abababa",
|
||||
SystemVariableKey.USER_ID: "aaa",
|
||||
},
|
||||
"user_inputs": user_inputs or {"uid": "takato"},
|
||||
}
|
||||
|
||||
return GraphEngine(
|
||||
tenant_id="111",
|
||||
app_id="222",
|
||||
workflow_type=WorkflowType.CHAT,
|
||||
workflow_id="333",
|
||||
graph_config=graph_config,
|
||||
user_id="444",
|
||||
user_from=UserFrom.ACCOUNT,
|
||||
invoke_from=InvokeFrom.WEB_APP,
|
||||
call_depth=0,
|
||||
graph=graph,
|
||||
variable_pool=variable_pool,
|
||||
max_execution_steps=500,
|
||||
max_execution_time=1200,
|
||||
)
|
||||
|
||||
|
||||
DEFAULT_VALUE_EDGE = [
|
||||
{
|
||||
"id": "start-source-node-target",
|
||||
"source": "start",
|
||||
"target": "node",
|
||||
"sourceHandle": "source",
|
||||
},
|
||||
{
|
||||
"id": "node-source-answer-target",
|
||||
"source": "node",
|
||||
"target": "answer",
|
||||
"sourceHandle": "source",
|
||||
},
|
||||
]
|
||||
|
||||
FAIL_BRANCH_EDGES = [
|
||||
{
|
||||
"id": "start-source-node-target",
|
||||
"source": "start",
|
||||
"target": "node",
|
||||
"sourceHandle": "source",
|
||||
},
|
||||
{
|
||||
"id": "node-true-success-target",
|
||||
"source": "node",
|
||||
"target": "success",
|
||||
"sourceHandle": "source",
|
||||
},
|
||||
{
|
||||
"id": "node-false-error-target",
|
||||
"source": "node",
|
||||
"target": "error",
|
||||
"sourceHandle": "fail-branch",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_code_default_value_continue_on_error():
|
||||
error_code = """
|
||||
def main() -> dict:
|
||||
return {
|
||||
"result": 1 / 0,
|
||||
}
|
||||
"""
|
||||
|
||||
graph_config = {
|
||||
"edges": DEFAULT_VALUE_EDGE,
|
||||
"nodes": [
|
||||
{"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
|
||||
{"data": {"title": "answer", "type": "answer", "answer": "{{#node.result#}}"}, "id": "answer"},
|
||||
ContinueOnErrorTestHelper.get_code_node(
|
||||
error_code, "default-value", [{"key": "result", "type": "number", "value": 132123}]
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "132123"} for e in events)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_code_fail_branch_continue_on_error():
|
||||
error_code = """
|
||||
def main() -> dict:
|
||||
return {
|
||||
"result": 1 / 0,
|
||||
}
|
||||
"""
|
||||
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "node node run successfully"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "node node run failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_code_node(error_code),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "node node run failed"} for e in events
|
||||
)
|
||||
|
||||
|
||||
def test_http_node_default_value_continue_on_error():
|
||||
"""Test HTTP node with default value error strategy"""
|
||||
graph_config = {
|
||||
"edges": DEFAULT_VALUE_EDGE,
|
||||
"nodes": [
|
||||
{"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
|
||||
{"data": {"title": "answer", "type": "answer", "answer": "{{#node.response#}}"}, "id": "answer"},
|
||||
ContinueOnErrorTestHelper.get_http_node(
|
||||
"default-value", [{"key": "response", "type": "string", "value": "http node got error response"}]
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "http node got error response"}
|
||||
for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_http_node_fail_branch_continue_on_error():
|
||||
"""Test HTTP node with fail-branch error strategy"""
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "HTTP request successful"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "HTTP request failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_http_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "HTTP request failed"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_tool_node_default_value_continue_on_error():
|
||||
"""Test tool node with default value error strategy"""
|
||||
graph_config = {
|
||||
"edges": DEFAULT_VALUE_EDGE,
|
||||
"nodes": [
|
||||
{"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
|
||||
{"data": {"title": "answer", "type": "answer", "answer": "{{#node.result#}}"}, "id": "answer"},
|
||||
ContinueOnErrorTestHelper.get_tool_node(
|
||||
"default-value", [{"key": "result", "type": "string", "value": "default tool result"}]
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "default tool result"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_tool_node_fail_branch_continue_on_error():
|
||||
"""Test HTTP node with fail-branch error strategy"""
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "tool execute successful"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "tool execute failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_tool_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "tool execute failed"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_llm_node_default_value_continue_on_error():
|
||||
"""Test LLM node with default value error strategy"""
|
||||
graph_config = {
|
||||
"edges": DEFAULT_VALUE_EDGE,
|
||||
"nodes": [
|
||||
{"data": {"title": "start", "type": "start", "variables": []}, "id": "start"},
|
||||
{"data": {"title": "answer", "type": "answer", "answer": "{{#node.answer#}}"}, "id": "answer"},
|
||||
ContinueOnErrorTestHelper.get_llm_node(
|
||||
"default-value", [{"key": "answer", "type": "string", "value": "default LLM response"}]
|
||||
),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "default LLM response"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_llm_node_fail_branch_continue_on_error():
|
||||
"""Test LLM node with fail-branch error strategy"""
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "LLM request successful"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "LLM request failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_llm_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "LLM request failed"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_status_code_error_http_node_fail_branch_continue_on_error():
|
||||
"""Test HTTP node with fail-branch error strategy"""
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "http execute successful"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "http execute failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_error_status_code_http_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(
|
||||
isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {"answer": "http execute failed"} for e in events
|
||||
)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 1
|
||||
|
||||
|
||||
def test_variable_pool_error_type_variable():
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES,
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "http execute successful"},
|
||||
"id": "success",
|
||||
},
|
||||
{
|
||||
"data": {"title": "error", "type": "answer", "answer": "http execute failed"},
|
||||
"id": "error",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_error_status_code_http_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
list(graph_engine.run())
|
||||
error_message = graph_engine.graph_runtime_state.variable_pool.get(["node", "error_message"])
|
||||
error_type = graph_engine.graph_runtime_state.variable_pool.get(["node", "error_type"])
|
||||
assert error_message != None
|
||||
assert error_type.value == "HTTPResponseCodeError"
|
||||
|
||||
|
||||
def test_no_node_in_fail_branch_continue_on_error():
|
||||
"""Test HTTP node with fail-branch error strategy"""
|
||||
graph_config = {
|
||||
"edges": FAIL_BRANCH_EDGES[:-1],
|
||||
"nodes": [
|
||||
{"data": {"title": "Start", "type": "start", "variables": []}, "id": "start"},
|
||||
{
|
||||
"data": {"title": "success", "type": "answer", "answer": "HTTP request successful"},
|
||||
"id": "success",
|
||||
},
|
||||
ContinueOnErrorTestHelper.get_http_node(),
|
||||
],
|
||||
}
|
||||
|
||||
graph_engine = ContinueOnErrorTestHelper.create_test_graph_engine(graph_config)
|
||||
events = list(graph_engine.run())
|
||||
|
||||
assert any(isinstance(e, NodeRunExceptionEvent) for e in events)
|
||||
assert any(isinstance(e, GraphRunPartialSucceededEvent) and e.outputs == {} for e in events)
|
||||
assert sum(1 for e in events if isinstance(e, NodeRunStreamChunkEvent)) == 0
|
@ -6,13 +6,17 @@ from extensions.storage.base_storage import BaseStorage
|
||||
|
||||
|
||||
def get_example_folder() -> str:
|
||||
return "/dify"
|
||||
return "~/dify"
|
||||
|
||||
|
||||
def get_example_bucket() -> str:
|
||||
return "dify"
|
||||
|
||||
|
||||
def get_opendal_bucket() -> str:
|
||||
return "./dify"
|
||||
|
||||
|
||||
def get_example_filename() -> str:
|
||||
return "test.txt"
|
||||
|
||||
@ -22,14 +26,14 @@ def get_example_data() -> bytes:
|
||||
|
||||
|
||||
def get_example_filepath() -> str:
|
||||
return "/test"
|
||||
return "~/test"
|
||||
|
||||
|
||||
class BaseStorageTest:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self):
|
||||
def setup_method(self, *args, **kwargs):
|
||||
"""Should be implemented in child classes to setup specific storage."""
|
||||
self.storage = BaseStorage()
|
||||
self.storage: BaseStorage
|
||||
|
||||
def test_save(self):
|
||||
"""Test saving data."""
|
||||
|
@ -1,18 +0,0 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
|
||||
from extensions.storage.local_fs_storage import LocalFsStorage
|
||||
from tests.unit_tests.oss.__mock.base import (
|
||||
BaseStorageTest,
|
||||
get_example_folder,
|
||||
)
|
||||
from tests.unit_tests.oss.__mock.local import setup_local_fs_mock
|
||||
|
||||
|
||||
class TestLocalFS(BaseStorageTest):
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self, setup_local_fs_mock):
|
||||
"""Executed before each test method."""
|
||||
self.storage = LocalFsStorage()
|
||||
self.storage.folder = get_example_folder()
|
88
api/tests/unit_tests/oss/opendal/test_opendal.py
Normal file
88
api/tests/unit_tests/oss/opendal/test_opendal.py
Normal file
@ -0,0 +1,88 @@
|
||||
import os
|
||||
from collections.abc import Generator
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from configs.middleware.storage.opendal_storage_config import OpenDALScheme
|
||||
from extensions.storage.opendal_storage import OpenDALStorage
|
||||
from tests.unit_tests.oss.__mock.base import (
|
||||
get_example_data,
|
||||
get_example_filename,
|
||||
get_example_filepath,
|
||||
get_opendal_bucket,
|
||||
)
|
||||
|
||||
|
||||
class TestOpenDAL:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_method(self, *args, **kwargs):
|
||||
"""Executed before each test method."""
|
||||
self.storage = OpenDALStorage(
|
||||
scheme=OpenDALScheme.FS,
|
||||
root=get_opendal_bucket(),
|
||||
)
|
||||
|
||||
@pytest.fixture(scope="class", autouse=True)
|
||||
def teardown_class(self, request):
|
||||
"""Clean up after all tests in the class."""
|
||||
|
||||
def cleanup():
|
||||
folder = Path(get_opendal_bucket())
|
||||
if folder.exists() and folder.is_dir():
|
||||
for item in folder.iterdir():
|
||||
if item.is_file():
|
||||
item.unlink()
|
||||
elif item.is_dir():
|
||||
item.rmdir()
|
||||
folder.rmdir()
|
||||
|
||||
return cleanup()
|
||||
|
||||
def test_save_and_exists(self):
|
||||
"""Test saving data and checking existence."""
|
||||
filename = get_example_filename()
|
||||
data = get_example_data()
|
||||
|
||||
assert not self.storage.exists(filename)
|
||||
self.storage.save(filename, data)
|
||||
assert self.storage.exists(filename)
|
||||
|
||||
def test_load_once(self):
|
||||
"""Test loading data once."""
|
||||
filename = get_example_filename()
|
||||
data = get_example_data()
|
||||
|
||||
self.storage.save(filename, data)
|
||||
loaded_data = self.storage.load_once(filename)
|
||||
assert loaded_data == data
|
||||
|
||||
def test_load_stream(self):
|
||||
"""Test loading data as a stream."""
|
||||
filename = get_example_filename()
|
||||
data = get_example_data()
|
||||
|
||||
self.storage.save(filename, data)
|
||||
generator = self.storage.load_stream(filename)
|
||||
assert isinstance(generator, Generator)
|
||||
assert next(generator) == data
|
||||
|
||||
def test_download(self):
|
||||
"""Test downloading data to a file."""
|
||||
filename = get_example_filename()
|
||||
filepath = str(Path(get_opendal_bucket()) / filename)
|
||||
data = get_example_data()
|
||||
|
||||
self.storage.save(filename, data)
|
||||
self.storage.download(filename, filepath)
|
||||
|
||||
def test_delete(self):
|
||||
"""Test deleting a file."""
|
||||
filename = get_example_filename()
|
||||
data = get_example_data()
|
||||
|
||||
self.storage.save(filename, data)
|
||||
assert self.storage.exists(filename)
|
||||
|
||||
self.storage.delete(filename)
|
||||
assert not self.storage.exists(filename)
|
@ -14,3 +14,4 @@ pytest api/tests/integration_tests/vdb/chroma \
|
||||
api/tests/integration_tests/vdb/upstash \
|
||||
api/tests/integration_tests/vdb/couchbase \
|
||||
api/tests/integration_tests/vdb/oceanbase \
|
||||
api/tests/integration_tests/vdb/tidb_vector \
|
||||
|
@ -2,7 +2,7 @@ version: '3'
|
||||
services:
|
||||
# API service
|
||||
api:
|
||||
image: langgenius/dify-api:0.13.1
|
||||
image: langgenius/dify-api:0.13.2
|
||||
restart: always
|
||||
environment:
|
||||
# Startup mode, 'api' starts the API server.
|
||||
@ -227,7 +227,7 @@ services:
|
||||
# worker service
|
||||
# The Celery worker for processing the queue.
|
||||
worker:
|
||||
image: langgenius/dify-api:0.13.1
|
||||
image: langgenius/dify-api:0.13.2
|
||||
restart: always
|
||||
environment:
|
||||
CONSOLE_WEB_URL: ''
|
||||
@ -397,7 +397,7 @@ services:
|
||||
|
||||
# Frontend web application.
|
||||
web:
|
||||
image: langgenius/dify-web:0.13.1
|
||||
image: langgenius/dify-web:0.13.2
|
||||
restart: always
|
||||
environment:
|
||||
# The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
|
||||
|
@ -281,10 +281,23 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
|
||||
# ------------------------------
|
||||
|
||||
# The type of storage to use for storing user files.
|
||||
# Supported values are `local` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
|
||||
# Default: `local`
|
||||
STORAGE_TYPE=local
|
||||
STORAGE_LOCAL_PATH=storage
|
||||
# Supported values are `opendal` , `s3` , `azure-blob` , `google-storage`, `tencent-cos`, `huawei-obs`, `volcengine-tos`, `baidu-obs`, `supabase`
|
||||
# Default: `opendal`
|
||||
STORAGE_TYPE=opendal
|
||||
|
||||
# Apache OpenDAL Configuration, refer to https://github.com/apache/opendal
|
||||
# The scheme for the OpenDAL storage.
|
||||
STORAGE_OPENDAL_SCHEME=fs
|
||||
# OpenDAL FS
|
||||
OPENDAL_FS_ROOT=storage
|
||||
# OpenDAL S3
|
||||
OPENDAL_S3_ROOT=/
|
||||
OPENDAL_S3_BUCKET=your-bucket-name
|
||||
OPENDAL_S3_ENDPOINT=https://s3.amazonaws.com
|
||||
OPENDAL_S3_ACCESS_KEY_ID=your-access-key
|
||||
OPENDAL_S3_SECRET_ACCESS_KEY=your-secret-key
|
||||
OPENDAL_S3_REGION=your-region
|
||||
OPENDAL_S3_SERVER_SIDE_ENCRYPTION=
|
||||
|
||||
# S3 Configuration
|
||||
# Whether to use AWS managed IAM roles for authenticating with the S3 service.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user