fix(sqlalchemy_workflow_node_execution_repository): Missing triggered_from while querying WorkflowNodeExecution (#20044)

Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
-LAN- 2025-05-21 16:37:44 +08:00 committed by GitHub
parent 3196dc2d61
commit 57bcb616bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 70 additions and 42 deletions

View File

@ -4,8 +4,8 @@ SQLAlchemy implementation of the WorkflowNodeExecutionRepository.
import json import json
import logging import logging
from collections.abc import Mapping, Sequence from collections.abc import Sequence
from typing import Any, Optional, Union, cast from typing import Optional, Union
from sqlalchemy import UnaryExpression, asc, delete, desc, select from sqlalchemy import UnaryExpression, asc, delete, desc, select
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
@ -86,8 +86,8 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER
# Initialize in-memory cache for node executions # Initialize in-memory cache for node executions
# Key: node_execution_id, Value: NodeExecution # Key: node_execution_id, Value: WorkflowNodeExecution (DB model)
self._node_execution_cache: dict[str, NodeExecution] = {} self._node_execution_cache: dict[str, WorkflowNodeExecution] = {}
def _to_domain_model(self, db_model: WorkflowNodeExecution) -> NodeExecution: def _to_domain_model(self, db_model: WorkflowNodeExecution) -> NodeExecution:
""" """
@ -103,7 +103,10 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
inputs = db_model.inputs_dict inputs = db_model.inputs_dict
process_data = db_model.process_data_dict process_data = db_model.process_data_dict
outputs = db_model.outputs_dict outputs = db_model.outputs_dict
metadata = db_model.execution_metadata_dict if db_model.execution_metadata_dict:
metadata = {NodeRunMetadataKey(k): v for k, v in db_model.execution_metadata_dict.items()}
else:
metadata = {}
# Convert status to domain enum # Convert status to domain enum
status = NodeExecutionStatus(db_model.status) status = NodeExecutionStatus(db_model.status)
@ -124,12 +127,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
status=status, status=status,
error=db_model.error, error=db_model.error,
elapsed_time=db_model.elapsed_time, elapsed_time=db_model.elapsed_time,
# FIXME(QuantumGhost): a temporary workaround for the following type check failure in Python 3.11. metadata=metadata,
# However, this problem is not occurred in Python 3.12.
#
# A case of this error is:
# https://github.com/langgenius/dify/actions/runs/15112698604/job/42475659482?pr=19737#step:9:24
metadata=cast(Mapping[NodeRunMetadataKey, Any] | None, metadata),
created_at=db_model.created_at, created_at=db_model.created_at,
finished_at=db_model.finished_at, finished_at=db_model.finished_at,
) )
@ -211,7 +209,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
# Only cache if we have a node_execution_id to use as the cache key # Only cache if we have a node_execution_id to use as the cache key
if db_model.node_execution_id: if db_model.node_execution_id:
logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}") logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}")
self._node_execution_cache[db_model.node_execution_id] = execution self._node_execution_cache[db_model.node_execution_id] = db_model
def get_by_node_execution_id(self, node_execution_id: str) -> Optional[NodeExecution]: def get_by_node_execution_id(self, node_execution_id: str) -> Optional[NodeExecution]:
""" """
@ -229,7 +227,9 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
# First check the cache # First check the cache
if node_execution_id in self._node_execution_cache: if node_execution_id in self._node_execution_cache:
logger.debug(f"Cache hit for node_execution_id: {node_execution_id}") logger.debug(f"Cache hit for node_execution_id: {node_execution_id}")
return self._node_execution_cache[node_execution_id] # Convert cached DB model to domain model
cached_db_model = self._node_execution_cache[node_execution_id]
return self._to_domain_model(cached_db_model)
# If not in cache, query the database # If not in cache, query the database
logger.debug(f"Cache miss for node_execution_id: {node_execution_id}, querying database") logger.debug(f"Cache miss for node_execution_id: {node_execution_id}, querying database")
@ -244,26 +244,25 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
db_model = session.scalar(stmt) db_model = session.scalar(stmt)
if db_model: if db_model:
# Convert to domain model # Add DB model to cache
domain_model = self._to_domain_model(db_model) self._node_execution_cache[node_execution_id] = db_model
# Add to cache # Convert to domain model and return
self._node_execution_cache[node_execution_id] = domain_model return self._to_domain_model(db_model)
return domain_model
return None return None
def get_by_workflow_run( def get_db_models_by_workflow_run(
self, self,
workflow_run_id: str, workflow_run_id: str,
order_config: Optional[OrderConfig] = None, order_config: Optional[OrderConfig] = None,
) -> Sequence[NodeExecution]: ) -> Sequence[WorkflowNodeExecution]:
""" """
Retrieve all NodeExecution instances for a specific workflow run. Retrieve all WorkflowNodeExecution database models for a specific workflow run.
This method always queries the database to ensure complete and ordered results, This method directly returns database models without converting to domain models,
but updates the cache with any retrieved executions. which is useful when you need to access database-specific fields like triggered_from.
It also updates the in-memory cache with the retrieved models.
Args: Args:
workflow_run_id: The workflow run ID workflow_run_id: The workflow run ID
@ -272,7 +271,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
order_config.order_direction: Direction to order ("asc" or "desc") order_config.order_direction: Direction to order ("asc" or "desc")
Returns: Returns:
A list of NodeExecution instances A list of WorkflowNodeExecution database models
""" """
with self._session_factory() as session: with self._session_factory() as session:
stmt = select(WorkflowNodeExecution).where( stmt = select(WorkflowNodeExecution).where(
@ -301,13 +300,40 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
db_models = session.scalars(stmt).all() db_models = session.scalars(stmt).all()
# Convert database models to domain models and update cache # Update the cache with the retrieved DB models
for model in db_models:
if model.node_execution_id:
self._node_execution_cache[model.node_execution_id] = model
return db_models
def get_by_workflow_run(
self,
workflow_run_id: str,
order_config: Optional[OrderConfig] = None,
) -> Sequence[NodeExecution]:
"""
Retrieve all NodeExecution instances for a specific workflow run.
This method always queries the database to ensure complete and ordered results,
but updates the cache with any retrieved executions.
Args:
workflow_run_id: The workflow run ID
order_config: Optional configuration for ordering results
order_config.order_by: List of fields to order by (e.g., ["index", "created_at"])
order_config.order_direction: Direction to order ("asc" or "desc")
Returns:
A list of NodeExecution instances
"""
# Get the database models using the new method
db_models = self.get_db_models_by_workflow_run(workflow_run_id, order_config)
# Convert database models to domain models
domain_models = [] domain_models = []
for model in db_models: for model in db_models:
domain_model = self._to_domain_model(model) domain_model = self._to_domain_model(model)
# Update cache if node_execution_id is present
if domain_model.node_execution_id:
self._node_execution_cache[domain_model.node_execution_id] = domain_model
domain_models.append(domain_model) domain_models.append(domain_model)
return domain_models return domain_models
@ -340,10 +366,12 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository)
domain_models = [] domain_models = []
for model in db_models: for model in db_models:
domain_model = self._to_domain_model(model)
# Update cache if node_execution_id is present # Update cache if node_execution_id is present
if domain_model.node_execution_id: if model.node_execution_id:
self._node_execution_cache[domain_model.node_execution_id] = domain_model self._node_execution_cache[model.node_execution_id] = model
# Convert to domain model
domain_model = self._to_domain_model(model)
domain_models.append(domain_model) domain_models.append(domain_model)
return domain_models return domain_models

View File

@ -15,6 +15,7 @@ from models import (
WorkflowRun, WorkflowRun,
WorkflowRunTriggeredFrom, WorkflowRunTriggeredFrom,
) )
from models.workflow import WorkflowNodeExecutionTriggeredFrom
class WorkflowRunService: class WorkflowRunService:
@ -140,14 +141,13 @@ class WorkflowRunService:
session_factory=db.engine, session_factory=db.engine,
user=user, user=user,
app_id=app_model.id, app_id=app_model.id,
triggered_from=None, triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN,
) )
# Use the repository to get the node executions with ordering # Use the repository to get the database models directly
order_config = OrderConfig(order_by=["index"], order_direction="desc") order_config = OrderConfig(order_by=["index"], order_direction="desc")
node_executions = repository.get_by_workflow_run(workflow_run_id=run_id, order_config=order_config) workflow_node_executions = repository.get_db_models_by_workflow_run(
workflow_run_id=run_id, order_config=order_config
# Convert domain models to database models )
workflow_node_executions = [repository.to_db_model(node_execution) for node_execution in node_executions]
return workflow_node_executions return workflow_node_executions