diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index e7a8c98d26..44c1ddf739 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,5 +1,4 @@ FROM mcr.microsoft.com/devcontainers/python:3.12 -# [Optional] Uncomment this section to install additional OS packages. -# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ -# && apt-get -y install --no-install-recommends +RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ + && apt-get -y install libgmp-dev libmpfr-dev libmpc-dev diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index cc8eb552b0..62f7ee3b4d 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -7,6 +7,7 @@ pipx install uv echo 'alias start-api="cd /workspaces/dify/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug"' >> ~/.bashrc echo 'alias start-worker="cd /workspaces/dify/api && uv run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc echo 'alias start-web="cd /workspaces/dify/web && pnpm dev"' >> ~/.bashrc +echo 'alias start-web-prod="cd /workspaces/dify/web && pnpm build && pnpm start"' >> ~/.bashrc echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d"' >> ~/.bashrc echo 'alias stop-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env down"' >> ~/.bashrc diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 30c0ff000d..b06ab9653e 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -139,6 +139,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: + fetch-depth: 0 persist-credentials: false - name: Check changed files diff --git a/.github/workflows/translate-i18n-base-on-english.yml b/.github/workflows/translate-i18n-base-on-english.yml index 3f8082eb69..c79d58563f 100644 --- a/.github/workflows/translate-i18n-base-on-english.yml +++ b/.github/workflows/translate-i18n-base-on-english.yml @@ -31,11 +31,19 @@ jobs: echo "FILES_CHANGED=false" >> $GITHUB_ENV fi + - name: Install pnpm + uses: pnpm/action-setup@v4 + with: + version: 10 + run_install: false + - name: Set up Node.js if: env.FILES_CHANGED == 'true' uses: actions/setup-node@v4 with: node-version: 'lts/*' + cache: pnpm + cache-dependency-path: ./web/package.json - name: Install dependencies if: env.FILES_CHANGED == 'true' diff --git a/README.md b/README.md index efb37d6083..ca09adec08 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 Introducing Dify Workflow File Upload: Recreate Google NotebookLM Podcast @@ -87,8 +87,6 @@ Please refer to our [FAQ](https://docs.dify.ai/getting-started/install-self-host **1. Workflow**: Build and test powerful AI workflows on a visual canvas, leveraging all the following features and beyond. -https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - **2. Comprehensive model support**: Seamless integration with hundreds of proprietary / open-source LLMs from dozens of inference providers and self-hosted solutions, covering GPT, Mistral, Llama3, and any OpenAI API-compatible models. A full list of supported model providers can be found [here](https://docs.dify.ai/getting-started/readme/model-providers). @@ -237,7 +235,7 @@ At the same time, please consider supporting Dify by sharing it on social media ## Community & contact -- [Github Discussion](https://github.com/langgenius/dify/discussions). Best for: sharing feedback and asking questions. +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). Best for: sharing feedback and asking questions. - [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). - [Discord](https://discord.gg/FngNHpbcY7). Best for: sharing your applications and hanging out with the community. - [X(Twitter)](https://twitter.com/dify_ai). Best for: sharing your applications and hanging out with the community. diff --git a/README_AR.md b/README_AR.md index 4f93802fda..df288fd33c 100644 --- a/README_AR.md +++ b/README_AR.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -54,8 +54,6 @@ **1. سير العمل**: قم ببناء واختبار سير عمل الذكاء الاصطناعي القوي على قماش بصري، مستفيدًا من جميع الميزات التالية وأكثر. - - **2. الدعم الشامل للنماذج**: تكامل سلس مع مئات من LLMs الخاصة / مفتوحة المصدر من عشرات من موفري التحليل والحلول المستضافة ذاتيًا، مما يغطي GPT و Mistral و Llama3 وأي نماذج متوافقة مع واجهة OpenAI API. يمكن العثور على قائمة كاملة بمزودي النموذج المدعومين [هنا](https://docs.dify.ai/getting-started/readme/model-providers). ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) @@ -225,7 +223,7 @@ docker compose up -d ## المجتمع والاتصال -- [مناقشة Github](https://github.com/langgenius/dify/discussions). الأفضل لـ: مشاركة التعليقات وطرح الأسئلة. +- [مناقشة GitHub](https://github.com/langgenius/dify/discussions). الأفضل لـ: مشاركة التعليقات وطرح الأسئلة. - [المشكلات على GitHub](https://github.com/langgenius/dify/issues). الأفضل لـ: الأخطاء التي تواجهها في استخدام Dify.AI، واقتراحات الميزات. انظر [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). - [Discord](https://discord.gg/FngNHpbcY7). الأفضل لـ: مشاركة تطبيقاتك والترفيه مع المجتمع. - [تويتر](https://twitter.com/dify_ai). الأفضل لـ: مشاركة تطبيقاتك والترفيه مع المجتمع. diff --git a/README_BN.md b/README_BN.md index 7599fae9ff..4a5b5f3928 100644 --- a/README_BN.md +++ b/README_BN.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 ডিফাই ওয়ার্কফ্লো ফাইল আপলোড পরিচিতি: গুগল নোটবুক-এলএম পডকাস্ট পুনর্নির্মাণ @@ -84,8 +84,6 @@ docker compose up -d **১. ওয়ার্কফ্লো**: ভিজ্যুয়াল ক্যানভাসে AI ওয়ার্কফ্লো তৈরি এবং পরীক্ষা করুন, নিম্নলিখিত সব ফিচার এবং তার বাইরেও আরও অনেক কিছু ব্যবহার করে। - - **২. মডেল সাপোর্ট**: GPT, Mistral, Llama3, এবং যেকোনো OpenAI API-সামঞ্জস্যপূর্ণ মডেলসহ, কয়েক ডজন ইনফারেন্স প্রদানকারী এবং সেল্ফ-হোস্টেড সমাধান থেকে শুরু করে প্রোপ্রাইটরি/ওপেন-সোর্স LLM-এর সাথে সহজে ইন্টিগ্রেশন। সমর্থিত মডেল প্রদানকারীদের একটি সম্পূর্ণ তালিকা পাওয়া যাবে [এখানে](https://docs.dify.ai/getting-started/readme/model-providers)। @@ -236,7 +234,7 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন ## কমিউনিটি এবং যোগাযোগ -- [Github Discussion](https://github.com/langgenius/dify/discussions) ফিডব্যাক এবং প্রতিক্রিয়া জানানোর মাধ্যম। +- [GitHub Discussion](https://github.com/langgenius/dify/discussions) ফিডব্যাক এবং প্রতিক্রিয়া জানানোর মাধ্যম। - [GitHub Issues](https://github.com/langgenius/dify/issues). Dify.AI ব্যবহার করে আপনি যেসব বাগের সম্মুখীন হন এবং ফিচার প্রস্তাবনা। আমাদের [অবদান নির্দেশিকা](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) দেখুন। - [Discord](https://discord.gg/FngNHpbcY7) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। - [X(Twitter)](https://twitter.com/dify_ai) আপনার এপ্লিকেশন শেয়ার এবং কমিউনিটি আড্ডার মাধ্যম। diff --git a/README_CN.md b/README_CN.md index 973629f459..ba7ee0006d 100644 --- a/README_CN.md +++ b/README_CN.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify 云服务 · @@ -61,11 +61,6 @@ Dify 是一个开源的 LLM 应用开发平台。其直观的界面结合了 AI **1. 工作流**: 在画布上构建和测试功能强大的 AI 工作流程,利用以下所有功能以及更多功能。 - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. 全面的模型支持**: 与数百种专有/开源 LLMs 以及数十种推理提供商和自托管解决方案无缝集成,涵盖 GPT、Mistral、Llama3 以及任何与 OpenAI API 兼容的模型。完整的支持模型提供商列表可在[此处](https://docs.dify.ai/getting-started/readme/model-providers)找到。 @@ -248,7 +243,7 @@ docker compose up -d 我们欢迎您为 Dify 做出贡献,以帮助改善 Dify。包括:提交代码、问题、新想法,或分享您基于 Dify 创建的有趣且有用的 AI 应用程序。同时,我们也欢迎您在不同的活动、会议和社交媒体上分享 Dify。 -- [Github Discussion](https://github.com/langgenius/dify/discussions). 👉:分享您的应用程序并与社区交流。 +- [GitHub Discussion](https://github.com/langgenius/dify/discussions). 👉:分享您的应用程序并与社区交流。 - [GitHub Issues](https://github.com/langgenius/dify/issues)。👉:使用 Dify.AI 时遇到的错误和问题,请参阅[贡献指南](CONTRIBUTING.md)。 - [电子邮件支持](mailto:hello@dify.ai?subject=[GitHub]Questions%20About%20Dify)。👉:关于使用 Dify.AI 的问题。 - [Discord](https://discord.gg/FngNHpbcY7)。👉:分享您的应用程序并与社区交流。 diff --git a/README_DE.md b/README_DE.md index 738c0e3b67..f6023a3935 100644 --- a/README_DE.md +++ b/README_DE.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 Einführung in Dify Workflow File Upload: Google NotebookLM Podcast nachbilden @@ -83,11 +83,6 @@ Bitte beachten Sie unsere [FAQ](https://docs.dify.ai/getting-started/install-sel **1. Workflow**: Erstellen und testen Sie leistungsstarke KI-Workflows auf einer visuellen Oberfläche, wobei Sie alle der folgenden Funktionen und darüber hinaus nutzen können. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Umfassende Modellunterstützung**: Nahtlose Integration mit Hunderten von proprietären und Open-Source-LLMs von Dutzenden Inferenzanbietern und selbstgehosteten Lösungen, die GPT, Mistral, Llama3 und alle mit der OpenAI API kompatiblen Modelle abdecken. Eine vollständige Liste der unterstützten Modellanbieter finden Sie [hier](https://docs.dify.ai/getting-started/readme/model-providers). @@ -235,7 +230,7 @@ Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide]( ## Gemeinschaft & Kontakt -* [Github Discussion](https://github.com/langgenius/dify/discussions). Am besten geeignet für: den Austausch von Feedback und das Stellen von Fragen. +* [GitHub Discussion](https://github.com/langgenius/dify/discussions). Am besten geeignet für: den Austausch von Feedback und das Stellen von Fragen. * [GitHub Issues](https://github.com/langgenius/dify/issues). Am besten für: Fehler, auf die Sie bei der Verwendung von Dify.AI stoßen, und Funktionsvorschläge. Siehe unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). * [Discord](https://discord.gg/FngNHpbcY7). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. * [X(Twitter)](https://twitter.com/dify_ai). Am besten geeignet für: den Austausch von Bewerbungen und den Austausch mit der Community. diff --git a/README_ES.md b/README_ES.md index 212268b73d..12f2ce8c11 100644 --- a/README_ES.md +++ b/README_ES.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -59,11 +59,6 @@ Dify es una plataforma de desarrollo de aplicaciones de LLM de código abierto. **1. Flujo de trabajo**: Construye y prueba potentes flujos de trabajo de IA en un lienzo visual, aprovechando todas las siguientes características y más. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Soporte de modelos completo**: Integración perfecta con cientos de LLMs propietarios / de código abierto de docenas de proveedores de inferencia y soluciones auto-alojadas, que cubren GPT, Mistral, Llama3 y cualquier modelo compatible con la API de OpenAI. Se puede encontrar una lista completa de proveedores de modelos admitidos [aquí](https://docs.dify.ai/getting-started/readme/model-providers). diff --git a/README_FR.md b/README_FR.md index 89eea7d058..b106615b31 100644 --- a/README_FR.md +++ b/README_FR.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -59,11 +59,6 @@ Dify est une plateforme de développement d'applications LLM open source. Son in **1. Flux de travail** : Construisez et testez des flux de travail d'IA puissants sur un canevas visuel, en utilisant toutes les fonctionnalités suivantes et plus encore. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Prise en charge complète des modèles** : Intégration transparente avec des centaines de LLM propriétaires / open source provenant de dizaines de fournisseurs d'inférence et de solutions auto-hébergées, couvrant GPT, Mistral, Llama3, et tous les modèles compatibles avec l'API OpenAI. Une liste complète des fournisseurs de modèles pris en charge se trouve [ici](https://docs.dify.ai/getting-started/readme/model-providers). diff --git a/README_JA.md b/README_JA.md index adca219753..26703f3958 100644 --- a/README_JA.md +++ b/README_JA.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -60,11 +60,6 @@ DifyはオープンソースのLLMアプリケーション開発プラットフ **1. ワークフロー**: 強力なAIワークフローをビジュアルキャンバス上で構築し、テストできます。すべての機能、および以下の機能を使用できます。 - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. 総合的なモデルサポート**: 数百ものプロプライエタリ/オープンソースのLLMと、数十もの推論プロバイダーおよびセルフホスティングソリューションとのシームレスな統合を提供します。GPT、Mistral、Llama3、OpenAI APIと互換性のあるすべてのモデルを統合されています。サポートされているモデルプロバイダーの完全なリストは[こちら](https://docs.dify.ai/getting-started/readme/model-providers)をご覧ください。 @@ -241,7 +236,7 @@ docker compose up -d ## コミュニティ & お問い合わせ -* [Github Discussion](https://github.com/langgenius/dify/discussions). 主に: フィードバックの共有や質問。 +* [GitHub Discussion](https://github.com/langgenius/dify/discussions). 主に: フィードバックの共有や質問。 * [GitHub Issues](https://github.com/langgenius/dify/issues). 主に: Dify.AIを使用する際に発生するエラーや問題については、[貢献ガイド](CONTRIBUTING_JA.md)を参照してください * [Discord](https://discord.gg/FngNHpbcY7). 主に: アプリケーションの共有やコミュニティとの交流。 * [X(Twitter)](https://twitter.com/dify_ai). 主に: アプリケーションの共有やコミュニティとの交流。 diff --git a/README_KL.md b/README_KL.md index 17e6c9d509..ea91baa5aa 100644 --- a/README_KL.md +++ b/README_KL.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -59,11 +59,6 @@ Dify is an open-source LLM app development platform. Its intuitive interface com **1. Workflow**: Build and test powerful AI workflows on a visual canvas, leveraging all the following features and beyond. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Comprehensive model support**: Seamless integration with hundreds of proprietary / open-source LLMs from dozens of inference providers and self-hosted solutions, covering GPT, Mistral, Llama3, and any OpenAI API-compatible models. A full list of supported model providers can be found [here](https://docs.dify.ai/getting-started/readme/model-providers). @@ -240,7 +235,7 @@ At the same time, please consider supporting Dify by sharing it on social media ## Community & Contact -* [Github Discussion](https://github.com/langgenius/dify/discussions +* [GitHub Discussion](https://github.com/langgenius/dify/discussions ). Best for: sharing feedback and asking questions. * [GitHub Issues](https://github.com/langgenius/dify/issues). Best for: bugs you encounter using Dify.AI, and feature proposals. See our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). diff --git a/README_KR.md b/README_KR.md index d44723f9b6..89301e8b2c 100644 --- a/README_KR.md +++ b/README_KR.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify 클라우드 · @@ -54,11 +54,6 @@ **1. 워크플로우**: 다음 기능들을 비롯한 다양한 기능을 활용하여 시각적 캔버스에서 강력한 AI 워크플로우를 구축하고 테스트하세요. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. 포괄적인 모델 지원:**: 수십 개의 추론 제공업체와 자체 호스팅 솔루션에서 제공하는 수백 개의 독점 및 오픈 소스 LLM과 원활하게 통합되며, GPT, Mistral, Llama3 및 모든 OpenAI API 호환 모델을 포함합니다. 지원되는 모델 제공업체의 전체 목록은 [여기](https://docs.dify.ai/getting-started/readme/model-providers)에서 확인할 수 있습니다. @@ -234,7 +229,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했 ## 커뮤니티 & 연락처 -* [Github 토론](https://github.com/langgenius/dify/discussions). 피드백 공유 및 질문하기에 적합합니다. +* [GitHub 토론](https://github.com/langgenius/dify/discussions). 피드백 공유 및 질문하기에 적합합니다. * [GitHub 이슈](https://github.com/langgenius/dify/issues). Dify.AI 사용 중 발견한 버그와 기능 제안에 적합합니다. [기여 가이드](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)를 참조하세요. * [디스코드](https://discord.gg/FngNHpbcY7). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. * [트위터](https://twitter.com/dify_ai). 애플리케이션 공유 및 커뮤니티와 소통하기에 적합합니다. diff --git a/README_PT.md b/README_PT.md index 9dc2207279..157772d528 100644 --- a/README_PT.md +++ b/README_PT.md @@ -1,5 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) - +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 Introduzindo o Dify Workflow com Upload de Arquivo: Recrie o Podcast Google NotebookLM

@@ -59,11 +58,6 @@ Dify é uma plataforma de desenvolvimento de aplicativos LLM de código aberto. **1. Workflow**: Construa e teste workflows poderosos de IA em uma interface visual, aproveitando todos os recursos a seguir e muito mais. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Suporte abrangente a modelos**: Integração perfeita com centenas de LLMs proprietários e de código aberto de diversas provedoras e soluções auto-hospedadas, abrangendo GPT, Mistral, Llama3 e qualquer modelo compatível com a API da OpenAI. A lista completa de provedores suportados pode ser encontrada [aqui](https://docs.dify.ai/getting-started/readme/model-providers). diff --git a/README_SI.md b/README_SI.md index 9a38b558b4..14de1ea792 100644 --- a/README_SI.md +++ b/README_SI.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 Predstavljamo nalaganje datotek Dify Workflow: znova ustvarite Google NotebookLM Podcast @@ -81,11 +81,6 @@ Prosimo, glejte naša pogosta vprašanja [FAQ](https://docs.dify.ai/getting-star **1. Potek dela**: Zgradite in preizkusite zmogljive poteke dela AI na vizualnem platnu, pri čemer izkoristite vse naslednje funkcije in več. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Celovita podpora za modele**: Brezhibna integracija s stotinami lastniških/odprtokodnih LLM-jev ducatov ponudnikov sklepanja in samostojnih rešitev, ki pokrivajo GPT, Mistral, Llama3 in vse modele, združljive z API-jem OpenAI. Celoten seznam podprtih ponudnikov modelov najdete [tukaj](https://docs.dify.ai/getting-started/readme/model-providers). @@ -234,7 +229,7 @@ Za tiste, ki bi radi prispevali kodo, si oglejte naš vodnik za prispevke . Hkra ## Skupnost in stik -* [Github Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj. +* [GitHub Discussion](https://github.com/langgenius/dify/discussions). Najboljše za: izmenjavo povratnih informacij in postavljanje vprašanj. * [GitHub Issues](https://github.com/langgenius/dify/issues). Najboljše za: hrošče, na katere naletite pri uporabi Dify.AI, in predloge funkcij. Oglejte si naš [vodnik za prispevke](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). * [Discord](https://discord.gg/FngNHpbcY7). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. * [X(Twitter)](https://twitter.com/dify_ai). Najboljše za: deljenje vaših aplikacij in druženje s skupnostjo. diff --git a/README_TR.md b/README_TR.md index ab2853a019..563a05af3c 100644 --- a/README_TR.md +++ b/README_TR.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Bulut · @@ -55,11 +55,6 @@ Dify, açık kaynaklı bir LLM uygulama geliştirme platformudur. Sezgisel aray **1. Workflow**: Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edin, aşağıdaki tüm özellikleri ve daha fazlasını kullanarak. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Kapsamlı model desteği**: Çok sayıda çıkarım sağlayıcısı ve kendi kendine barındırılan çözümlerden yüzlerce özel / açık kaynaklı LLM ile sorunsuz entegrasyon sağlar. GPT, Mistral, Llama3 ve OpenAI API uyumlu tüm modelleri kapsar. Desteklenen model sağlayıcılarının tam listesine [buradan](https://docs.dify.ai/getting-started/readme/model-providers) ulaşabilirsiniz. @@ -232,7 +227,7 @@ Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda p ## Topluluk & iletişim -* [Github Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için. +* [GitHub Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için. * [GitHub Sorunları](https://github.com/langgenius/dify/issues). En uygun: Dify.AI kullanırken karşılaştığınız hatalar ve özellik önerileri için. [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakın. * [Discord](https://discord.gg/FngNHpbcY7). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. * [X(Twitter)](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. diff --git a/README_TW.md b/README_TW.md index 8263a22b64..f4a76ac109 100644 --- a/README_TW.md +++ b/README_TW.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

📌 介紹 Dify 工作流程檔案上傳功能:重現 Google NotebookLM Podcast @@ -86,8 +86,6 @@ docker compose up -d **1. 工作流程**: 在視覺化畫布上建立和測試強大的 AI 工作流程,利用以下所有功能及更多。 -https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - **2. 全面的模型支援**: 無縫整合來自數十個推理提供商和自託管解決方案的數百個專有/開源 LLM,涵蓋 GPT、Mistral、Llama3 和任何與 OpenAI API 兼容的模型。您可以在[此處](https://docs.dify.ai/getting-started/readme/model-providers)找到支援的模型提供商完整列表。 @@ -235,7 +233,7 @@ Dify 的所有功能都提供相應的 API,因此您可以輕鬆地將 Dify ## 社群與聯絡方式 -- [Github Discussion](https://github.com/langgenius/dify/discussions):最適合分享反饋和提問。 +- [GitHub Discussion](https://github.com/langgenius/dify/discussions):最適合分享反饋和提問。 - [GitHub Issues](https://github.com/langgenius/dify/issues):最適合報告使用 Dify.AI 時遇到的問題和提出功能建議。請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。 - [Discord](https://discord.gg/FngNHpbcY7):最適合分享您的應用程式並與社群互動。 - [X(Twitter)](https://twitter.com/dify_ai):最適合分享您的應用程式並與社群互動。 diff --git a/README_VI.md b/README_VI.md index 852ed7aaa0..4e1e05cbf3 100644 --- a/README_VI.md +++ b/README_VI.md @@ -1,4 +1,4 @@ -![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) +![cover-v5-optimized](./images/GitHub_README_if.png)

Dify Cloud · @@ -55,11 +55,6 @@ Dify là một nền tảng phát triển ứng dụng LLM mã nguồn mở. Gia **1. Quy trình làm việc**: Xây dựng và kiểm tra các quy trình làm việc AI mạnh mẽ trên một canvas trực quan, tận dụng tất cả các tính năng sau đây và hơn thế nữa. - - https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa - - - **2. Hỗ trợ mô hình toàn diện**: Tích hợp liền mạch với hàng trăm mô hình LLM độc quyền / mã nguồn mở từ hàng chục nhà cung cấp suy luận và giải pháp tự lưu trữ, bao gồm GPT, Mistral, Llama3, và bất kỳ mô hình tương thích API OpenAI nào. Danh sách đầy đủ các nhà cung cấp mô hình được hỗ trợ có thể được tìm thấy [tại đây](https://docs.dify.ai/getting-started/readme/model-providers). diff --git a/api/.env.example b/api/.env.example index 2cc6410cdd..ae7e82c779 100644 --- a/api/.env.example +++ b/api/.env.example @@ -152,6 +152,7 @@ QDRANT_API_KEY=difyai123456 QDRANT_CLIENT_TIMEOUT=20 QDRANT_GRPC_ENABLED=false QDRANT_GRPC_PORT=6334 +QDRANT_REPLICATION_FACTOR=1 #Couchbase configuration COUCHBASE_CONNECTION_STRING=127.0.0.1 @@ -269,6 +270,7 @@ OPENSEARCH_PORT=9200 OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=admin OPENSEARCH_SECURE=true +OPENSEARCH_VERIFY_CERTS=true # Baidu configuration BAIDU_VECTOR_DB_ENDPOINT=http://127.0.0.1:5287 @@ -348,6 +350,7 @@ SENTRY_DSN= # DEBUG DEBUG=false +ENABLE_REQUEST_LOGGING=False SQLALCHEMY_ECHO=false # Notion import configuration, support public and internal @@ -476,6 +479,7 @@ LOGIN_LOCKOUT_DURATION=86400 ENABLE_OTEL=false OTLP_BASE_ENDPOINT=http://localhost:4318 OTLP_API_KEY= +OTEL_EXPORTER_OTLP_PROTOCOL= OTEL_EXPORTER_TYPE=otlp OTEL_SAMPLING_RATE=0.1 OTEL_BATCH_EXPORT_SCHEDULE_DELAY=5000 diff --git a/api/app_factory.py b/api/app_factory.py index 586f2ded9e..3a258be28f 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -54,7 +54,7 @@ def initialize_extensions(app: DifyApp): ext_otel, ext_proxy_fix, ext_redis, - ext_repositories, + ext_request_logging, ext_sentry, ext_set_secretkey, ext_storage, @@ -75,7 +75,6 @@ def initialize_extensions(app: DifyApp): ext_migrate, ext_redis, ext_storage, - ext_repositories, ext_celery, ext_login, ext_mail, @@ -85,6 +84,7 @@ def initialize_extensions(app: DifyApp): ext_blueprints, ext_commands, ext_otel, + ext_request_logging, ] for ext in extensions: short_name = ext.__name__.split(".")[-1] diff --git a/api/commands.py b/api/commands.py index dc31dc0d80..66278a53a3 100644 --- a/api/commands.py +++ b/api/commands.py @@ -6,6 +6,7 @@ from typing import Optional import click from flask import current_app +from sqlalchemy import select from werkzeug.exceptions import NotFound from configs import dify_config @@ -297,11 +298,11 @@ def migrate_knowledge_vector_database(): page = 1 while True: try: - datasets = ( - Dataset.query.filter(Dataset.indexing_technique == "high_quality") - .order_by(Dataset.created_at.desc()) - .paginate(page=page, per_page=50) + stmt = ( + select(Dataset).filter(Dataset.indexing_technique == "high_quality").order_by(Dataset.created_at.desc()) ) + + datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) except NotFound: break @@ -551,11 +552,12 @@ def old_metadata_migration(): page = 1 while True: try: - documents = ( - DatasetDocument.query.filter(DatasetDocument.doc_metadata is not None) + stmt = ( + select(DatasetDocument) + .filter(DatasetDocument.doc_metadata.is_not(None)) .order_by(DatasetDocument.created_at.desc()) - .paginate(page=page, per_page=50) ) + documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) except NotFound: break if not documents: @@ -592,11 +594,15 @@ def old_metadata_migration(): ) db.session.add(dataset_metadata_binding) else: - dataset_metadata_binding = DatasetMetadataBinding.query.filter( - DatasetMetadataBinding.dataset_id == document.dataset_id, - DatasetMetadataBinding.document_id == document.id, - DatasetMetadataBinding.metadata_id == dataset_metadata.id, - ).first() + dataset_metadata_binding = ( + db.session.query(DatasetMetadataBinding) # type: ignore + .filter( + DatasetMetadataBinding.dataset_id == document.dataset_id, + DatasetMetadataBinding.document_id == document.id, + DatasetMetadataBinding.metadata_id == dataset_metadata.id, + ) + .first() + ) if not dataset_metadata_binding: dataset_metadata_binding = DatasetMetadataBinding( tenant_id=document.tenant_id, diff --git a/api/configs/deploy/__init__.py b/api/configs/deploy/__init__.py index 950936d3c6..63f4dfba63 100644 --- a/api/configs/deploy/__init__.py +++ b/api/configs/deploy/__init__.py @@ -17,6 +17,12 @@ class DeploymentConfig(BaseSettings): default=False, ) + # Request logging configuration + ENABLE_REQUEST_LOGGING: bool = Field( + description="Enable request and response body logging", + default=False, + ) + EDITION: str = Field( description="Deployment edition of the application (e.g., 'SELF_HOSTED', 'CLOUD')", default="SELF_HOSTED", diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 4890b5f746..a3da5c1b49 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -74,7 +74,7 @@ class CodeExecutionSandboxConfig(BaseSettings): CODE_EXECUTION_ENDPOINT: HttpUrl = Field( description="URL endpoint for the code execution service", - default="http://sandbox:8194", + default=HttpUrl("http://sandbox:8194"), ) CODE_EXECUTION_API_KEY: str = Field( @@ -145,7 +145,7 @@ class PluginConfig(BaseSettings): PLUGIN_DAEMON_URL: HttpUrl = Field( description="Plugin API URL", - default="http://localhost:5002", + default=HttpUrl("http://localhost:5002"), ) PLUGIN_DAEMON_KEY: str = Field( @@ -188,7 +188,7 @@ class MarketplaceConfig(BaseSettings): MARKETPLACE_API_URL: HttpUrl = Field( description="Marketplace API URL", - default="https://marketplace.dify.ai", + default=HttpUrl("https://marketplace.dify.ai"), ) diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index d285515998..1b015b3267 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -1,6 +1,6 @@ import os from typing import Any, Literal, Optional -from urllib.parse import quote_plus +from urllib.parse import parse_qsl, quote_plus from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field from pydantic_settings import BaseSettings @@ -173,17 +173,31 @@ class DatabaseConfig(BaseSettings): RETRIEVAL_SERVICE_EXECUTORS: NonNegativeInt = Field( description="Number of processes for the retrieval service, default to CPU cores.", - default=os.cpu_count(), + default=os.cpu_count() or 1, ) - @computed_field + @computed_field # type: ignore[misc] + @property def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]: + # Parse DB_EXTRAS for 'options' + db_extras_dict = dict(parse_qsl(self.DB_EXTRAS)) + options = db_extras_dict.get("options", "") + # Always include timezone + timezone_opt = "-c timezone=UTC" + if options: + # Merge user options and timezone + merged_options = f"{options} {timezone_opt}" + else: + merged_options = timezone_opt + + connect_args = {"options": merged_options} + return { "pool_size": self.SQLALCHEMY_POOL_SIZE, "max_overflow": self.SQLALCHEMY_MAX_OVERFLOW, "pool_recycle": self.SQLALCHEMY_POOL_RECYCLE, "pool_pre_ping": self.SQLALCHEMY_POOL_PRE_PING, - "connect_args": {"options": "-c timezone=UTC"}, + "connect_args": connect_args, } diff --git a/api/configs/middleware/cache/redis_config.py b/api/configs/middleware/cache/redis_config.py index 2e98c31ec3..916f52e165 100644 --- a/api/configs/middleware/cache/redis_config.py +++ b/api/configs/middleware/cache/redis_config.py @@ -83,3 +83,13 @@ class RedisConfig(BaseSettings): description="Password for Redis Clusters authentication (if required)", default=None, ) + + REDIS_SERIALIZATION_PROTOCOL: int = Field( + description="Redis serialization protocol (RESP) version", + default=3, + ) + + REDIS_ENABLE_CLIENT_SIDE_CACHE: bool = Field( + description="Enable client side cache in redis", + default=False, + ) diff --git a/api/configs/middleware/storage/amazon_s3_storage_config.py b/api/configs/middleware/storage/amazon_s3_storage_config.py index f2d94b12ff..e14c210718 100644 --- a/api/configs/middleware/storage/amazon_s3_storage_config.py +++ b/api/configs/middleware/storage/amazon_s3_storage_config.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Literal, Optional from pydantic import Field from pydantic_settings import BaseSettings @@ -34,7 +34,7 @@ class S3StorageConfig(BaseSettings): default=None, ) - S3_ADDRESS_STYLE: str = Field( + S3_ADDRESS_STYLE: Literal["auto", "virtual", "path"] = Field( description="S3 addressing style: 'auto', 'path', or 'virtual'", default="auto", ) diff --git a/api/configs/middleware/vdb/opensearch_config.py b/api/configs/middleware/vdb/opensearch_config.py index 96f478e9a6..9fd9b60194 100644 --- a/api/configs/middleware/vdb/opensearch_config.py +++ b/api/configs/middleware/vdb/opensearch_config.py @@ -33,6 +33,11 @@ class OpenSearchConfig(BaseSettings): default=False, ) + OPENSEARCH_VERIFY_CERTS: bool = Field( + description="Whether to verify SSL certificates for HTTPS connections (recommended to set True in production)", + default=True, + ) + OPENSEARCH_AUTH_METHOD: AuthMethod = Field( description="Authentication method for OpenSearch connection (default is 'basic')", default=AuthMethod.BASIC, diff --git a/api/configs/middleware/vdb/qdrant_config.py b/api/configs/middleware/vdb/qdrant_config.py index b70f624652..0a753eddec 100644 --- a/api/configs/middleware/vdb/qdrant_config.py +++ b/api/configs/middleware/vdb/qdrant_config.py @@ -33,3 +33,8 @@ class QdrantConfig(BaseSettings): description="Port number for gRPC connection to Qdrant server (default is 6334)", default=6334, ) + + QDRANT_REPLICATION_FACTOR: PositiveInt = Field( + description="Replication factor for Qdrant collections (default is 1)", + default=1, + ) diff --git a/api/configs/observability/otel/otel_config.py b/api/configs/observability/otel/otel_config.py index 568a800d10..1b88ddcfe6 100644 --- a/api/configs/observability/otel/otel_config.py +++ b/api/configs/observability/otel/otel_config.py @@ -27,6 +27,11 @@ class OTelConfig(BaseSettings): default="otlp", ) + OTEL_EXPORTER_OTLP_PROTOCOL: str = Field( + description="OTLP exporter protocol ('grpc' or 'http')", + default="http", + ) + OTEL_SAMPLING_RATE: float = Field(default=0.1, description="Sampling rate for traces (0.0 to 1.0)") OTEL_BATCH_EXPORT_SCHEDULE_DELAY: int = Field( diff --git a/api/configs/packaging/__init__.py b/api/configs/packaging/__init__.py index c7960e1356..ae4875d591 100644 --- a/api/configs/packaging/__init__.py +++ b/api/configs/packaging/__init__.py @@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings): CURRENT_VERSION: str = Field( description="Dify version", - default="1.3.1", + default="1.4.1", ) COMMIT_SHA: str = Field( diff --git a/api/contexts/__init__.py b/api/contexts/__init__.py index 127b8fe76d..ae41a2c03a 100644 --- a/api/contexts/__init__.py +++ b/api/contexts/__init__.py @@ -11,10 +11,6 @@ if TYPE_CHECKING: from core.workflow.entities.variable_pool import VariablePool -tenant_id: ContextVar[str] = ContextVar("tenant_id") - -workflow_variable_pool: ContextVar["VariablePool"] = ContextVar("workflow_variable_pool") - """ To avoid race-conditions caused by gunicorn thread recycling, using RecyclableContextVar to replace with """ diff --git a/api/controllers/common/fields.py b/api/controllers/common/fields.py index 79869916ed..3466eea1f6 100644 --- a/api/controllers/common/fields.py +++ b/api/controllers/common/fields.py @@ -1,5 +1,7 @@ from flask_restful import fields +from libs.helper import AppIconUrlField + parameters__system_parameters = { "image_file_size_limit": fields.Integer, "video_file_size_limit": fields.Integer, @@ -22,3 +24,20 @@ parameters_fields = { "file_upload": fields.Raw, "system_parameters": fields.Nested(parameters__system_parameters), } + +site_fields = { + "title": fields.String, + "chat_color_theme": fields.String, + "chat_color_theme_inverted": fields.Boolean, + "icon_type": fields.String, + "icon": fields.String, + "icon_background": fields.String, + "icon_url": AppIconUrlField, + "description": fields.String, + "copyright": fields.String, + "privacy_policy": fields.String, + "custom_disclaimer": fields.String, + "default_language": fields.String, + "show_workflow_steps": fields.Boolean, + "use_icon_as_answer_icon": fields.Boolean, +} diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index f97209c369..860166a61a 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -17,15 +17,13 @@ from controllers.console.wraps import ( ) from core.ops.ops_trace_manager import OpsTraceManager from extensions.ext_database import db -from fields.app_fields import ( - app_detail_fields, - app_detail_fields_with_site, - app_pagination_fields, -) +from fields.app_fields import app_detail_fields, app_detail_fields_with_site, app_pagination_fields from libs.login import login_required from models import Account, App from services.app_dsl_service import AppDslService, ImportMode from services.app_service import AppService +from services.enterprise.enterprise_service import EnterpriseService +from services.feature_service import FeatureService ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"] @@ -75,7 +73,17 @@ class AppListApi(Resource): if not app_pagination: return {"data": [], "total": 0, "page": 1, "limit": 20, "has_more": False} - return marshal(app_pagination, app_pagination_fields) + if FeatureService.get_system_features().webapp_auth.enabled: + app_ids = [str(app.id) for app in app_pagination.items] + res = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids=app_ids) + if len(res) != len(app_ids): + raise BadRequest("Invalid app id in webapp auth") + + for app in app_pagination.items: + if str(app.id) in res: + app.access_mode = res[str(app.id)].access_mode + + return marshal(app_pagination, app_pagination_fields), 200 @setup_required @login_required @@ -119,6 +127,10 @@ class AppApi(Resource): app_model = app_service.get_app(app_model) + if FeatureService.get_system_features().webapp_auth.enabled: + app_setting = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id=str(app_model.id)) + app_model.access_mode = app_setting.access_mode + return app_model @setup_required diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index 0c13adce9b..cbbdd324ba 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -81,8 +81,7 @@ class DraftWorkflowApi(Resource): parser.add_argument("graph", type=dict, required=True, nullable=False, location="json") parser.add_argument("features", type=dict, required=True, nullable=False, location="json") parser.add_argument("hash", type=str, required=False, location="json") - # TODO: set this to required=True after frontend is updated - parser.add_argument("environment_variables", type=list, required=False, location="json") + parser.add_argument("environment_variables", type=list, required=True, location="json") parser.add_argument("conversation_variables", type=list, required=False, location="json") args = parser.parse_args() elif "text/plain" in content_type: diff --git a/api/controllers/console/app/workflow_run.py b/api/controllers/console/app/workflow_run.py index 08ab61bbb9..9099700213 100644 --- a/api/controllers/console/app/workflow_run.py +++ b/api/controllers/console/app/workflow_run.py @@ -1,3 +1,6 @@ +from typing import cast + +from flask_login import current_user from flask_restful import Resource, marshal_with, reqparse from flask_restful.inputs import int_range @@ -12,8 +15,7 @@ from fields.workflow_run_fields import ( ) from libs.helper import uuid_value from libs.login import login_required -from models import App -from models.model import AppMode +from models import Account, App, AppMode, EndUser from services.workflow_run_service import WorkflowRunService @@ -90,7 +92,12 @@ class WorkflowRunNodeExecutionListApi(Resource): run_id = str(run_id) workflow_run_service = WorkflowRunService() - node_executions = workflow_run_service.get_workflow_run_node_executions(app_model=app_model, run_id=run_id) + user = cast("Account | EndUser", current_user) + node_executions = workflow_run_service.get_workflow_run_node_executions( + app_model=app_model, + run_id=run_id, + user=user, + ) return {"data": node_executions} diff --git a/api/controllers/console/auth/forgot_password.py b/api/controllers/console/auth/forgot_password.py index d73d8ce701..8db19095d2 100644 --- a/api/controllers/console/auth/forgot_password.py +++ b/api/controllers/console/auth/forgot_password.py @@ -24,7 +24,7 @@ from libs.password import hash_password, valid_password from models.account import Account from services.account_service import AccountService, TenantService from services.errors.account import AccountRegisterError -from services.errors.workspace import WorkSpaceNotAllowedCreateError +from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError from services.feature_service import FeatureService @@ -119,6 +119,9 @@ class ForgotPasswordResetApi(Resource): if not reset_data: raise InvalidTokenError() # Must use token in reset phase + if reset_data.get("phase", "") != "reset": + raise InvalidTokenError() + # Must use token in reset phase if reset_data.get("phase", "") != "reset": raise InvalidTokenError() @@ -168,6 +171,8 @@ class ForgotPasswordResetApi(Resource): ) except WorkSpaceNotAllowedCreateError: pass + except WorkspacesLimitExceededError: + pass except AccountRegisterError: raise AccountInFreezeError() diff --git a/api/controllers/console/auth/login.py b/api/controllers/console/auth/login.py index 27864bab3d..5f2a24322d 100644 --- a/api/controllers/console/auth/login.py +++ b/api/controllers/console/auth/login.py @@ -21,6 +21,7 @@ from controllers.console.error import ( AccountNotFound, EmailSendIpLimitError, NotAllowedCreateWorkspace, + WorkspacesLimitExceeded, ) from controllers.console.wraps import email_password_login_enabled, setup_required from events.tenant_event import tenant_was_created @@ -30,7 +31,7 @@ from models.account import Account from services.account_service import AccountService, RegisterService, TenantService from services.billing_service import BillingService from services.errors.account import AccountRegisterError -from services.errors.workspace import WorkSpaceNotAllowedCreateError +from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError from services.feature_service import FeatureService @@ -88,10 +89,15 @@ class LoginApi(Resource): # SELF_HOSTED only have one workspace tenants = TenantService.get_join_tenants(account) if len(tenants) == 0: - return { - "result": "fail", - "data": "workspace not found, please contact system admin to invite you to join in a workspace", - } + system_features = FeatureService.get_system_features() + + if system_features.is_allow_create_workspace and not system_features.license.workspaces.is_available(): + raise WorkspacesLimitExceeded() + else: + return { + "result": "fail", + "data": "workspace not found, please contact system admin to invite you to join in a workspace", + } token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request)) AccountService.reset_login_error_rate_limit(args["email"]) @@ -196,15 +202,18 @@ class EmailCodeLoginApi(Resource): except AccountRegisterError as are: raise AccountInFreezeError() if account: - tenant = TenantService.get_join_tenants(account) - if not tenant: + tenants = TenantService.get_join_tenants(account) + if not tenants: + workspaces = FeatureService.get_system_features().license.workspaces + if not workspaces.is_available(): + raise WorkspacesLimitExceeded() if not FeatureService.get_system_features().is_allow_create_workspace: raise NotAllowedCreateWorkspace() else: - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") - TenantService.create_tenant_member(tenant, account, role="owner") - account.current_tenant = tenant - tenant_was_created.send(tenant) + new_tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + TenantService.create_tenant_member(new_tenant, account, role="owner") + account.current_tenant = new_tenant + tenant_was_created.send(new_tenant) if account is None: try: @@ -215,6 +224,8 @@ class EmailCodeLoginApi(Resource): return NotAllowedCreateWorkspace() except AccountRegisterError as are: raise AccountInFreezeError() + except WorkspacesLimitExceededError: + raise WorkspacesLimitExceeded() token_pair = AccountService.login(account, ip_address=extract_remote_ip(request)) AccountService.reset_login_error_rate_limit(args["email"]) return {"result": "success", "data": token_pair.model_dump()} diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index f5284cc43b..395367c9e2 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -148,15 +148,15 @@ def _generate_account(provider: str, user_info: OAuthUserInfo): account = _get_account_by_openid_or_email(provider, user_info) if account: - tenant = TenantService.get_join_tenants(account) - if not tenant: + tenants = TenantService.get_join_tenants(account) + if not tenants: if not FeatureService.get_system_features().is_allow_create_workspace: raise WorkSpaceNotAllowedCreateError() else: - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") - TenantService.create_tenant_member(tenant, account, role="owner") - account.current_tenant = tenant - tenant_was_created.send(tenant) + new_tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + TenantService.create_tenant_member(new_tenant, account, role="owner") + account.current_tenant = new_tenant + tenant_was_created.send(new_tenant) if not account: if not FeatureService.get_system_features().is_allow_register: diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 571a395780..e68273afa6 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -526,17 +526,36 @@ class DatasetIndexingStatusApi(Resource): ) documents_status = [] for document in documents: - completed_segments = DocumentSegment.query.filter( - DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document.id), - DocumentSegment.status != "re_segment", - ).count() - total_segments = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment" - ).count() - document.completed_segments = completed_segments - document.total_segments = total_segments - documents_status.append(marshal(document, document_status_fields)) + completed_segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.completed_at.isnot(None), + DocumentSegment.document_id == str(document.id), + DocumentSegment.status != "re_segment", + ) + .count() + ) + total_segments = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") + .count() + ) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 68601adfed..f7c04102a9 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -6,7 +6,7 @@ from typing import cast from flask import request from flask_login import current_user from flask_restful import Resource, fields, marshal, marshal_with, reqparse -from sqlalchemy import asc, desc +from sqlalchemy import asc, desc, select from werkzeug.exceptions import Forbidden, NotFound import services @@ -112,7 +112,7 @@ class GetProcessRuleApi(Resource): limits = DocumentService.DEFAULT_RULES["limits"] if document_id: # get the latest process rule - document = Document.query.get_or_404(document_id) + document = db.get_or_404(Document, document_id) dataset = DatasetService.get_dataset(document.dataset_id) @@ -175,7 +175,7 @@ class DatasetDocumentListApi(Resource): except services.errors.account.NoPermissionError as e: raise Forbidden(str(e)) - query = Document.query.filter_by(dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id) + query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id) if search: search = f"%{search}%" @@ -209,18 +209,24 @@ class DatasetDocumentListApi(Resource): desc(Document.position), ) - paginated_documents = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) + paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) documents = paginated_documents.items if fetch: for document in documents: - completed_segments = DocumentSegment.query.filter( - DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document.id), - DocumentSegment.status != "re_segment", - ).count() - total_segments = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment" - ).count() + completed_segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.completed_at.isnot(None), + DocumentSegment.document_id == str(document.id), + DocumentSegment.status != "re_segment", + ) + .count() + ) + total_segments = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") + .count() + ) document.completed_segments = completed_segments document.total_segments = total_segments data = marshal(documents, document_with_segments_fields) @@ -563,19 +569,36 @@ class DocumentBatchIndexingStatusApi(DocumentResource): documents = self.get_batch_documents(dataset_id, batch) documents_status = [] for document in documents: - completed_segments = DocumentSegment.query.filter( - DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document.id), - DocumentSegment.status != "re_segment", - ).count() - total_segments = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment" - ).count() - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - documents_status.append(marshal(document, document_status_fields)) + completed_segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.completed_at.isnot(None), + DocumentSegment.document_id == str(document.id), + DocumentSegment.status != "re_segment", + ) + .count() + ) + total_segments = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") + .count() + ) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data @@ -589,20 +612,37 @@ class DocumentIndexingStatusApi(DocumentResource): document_id = str(document_id) document = self.get_document(dataset_id, document_id) - completed_segments = DocumentSegment.query.filter( - DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document_id), - DocumentSegment.status != "re_segment", - ).count() - total_segments = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document_id), DocumentSegment.status != "re_segment" - ).count() + completed_segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.completed_at.isnot(None), + DocumentSegment.document_id == str(document_id), + DocumentSegment.status != "re_segment", + ) + .count() + ) + total_segments = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.document_id == str(document_id), DocumentSegment.status != "re_segment") + .count() + ) - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - return marshal(document, document_status_fields) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + return marshal(document_dict, document_status_fields) class DocumentDetailApi(DocumentResource): diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index a145038672..eee09ac32e 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -4,6 +4,7 @@ import pandas as pd from flask import request from flask_login import current_user from flask_restful import Resource, marshal, reqparse +from sqlalchemy import select from werkzeug.exceptions import Forbidden, NotFound import services @@ -26,6 +27,7 @@ from controllers.console.wraps import ( from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType +from extensions.ext_database import db from extensions.ext_redis import redis_client from fields.segment_fields import child_chunk_fields, segment_fields from libs.login import login_required @@ -74,9 +76,14 @@ class DatasetDocumentSegmentListApi(Resource): hit_count_gte = args["hit_count_gte"] keyword = args["keyword"] - query = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).order_by(DocumentSegment.position.asc()) + query = ( + select(DocumentSegment) + .filter( + DocumentSegment.document_id == str(document_id), + DocumentSegment.tenant_id == current_user.current_tenant_id, + ) + .order_by(DocumentSegment.position.asc()) + ) if status_list: query = query.filter(DocumentSegment.status.in_(status_list)) @@ -93,7 +100,7 @@ class DatasetDocumentSegmentListApi(Resource): elif args["enabled"].lower() == "false": query = query.filter(DocumentSegment.enabled == False) - segments = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) + segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) response = { "data": marshal(segments.items, segment_fields), @@ -276,9 +283,11 @@ class DatasetDocumentSegmentUpdateApi(Resource): raise ProviderNotInitializeError(ex.description) # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor @@ -320,9 +329,11 @@ class DatasetDocumentSegmentUpdateApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor @@ -423,9 +434,11 @@ class ChildChunkAddApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") if not current_user.is_dataset_editor: @@ -478,9 +491,11 @@ class ChildChunkAddApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") parser = reqparse.RequestParser() @@ -523,9 +538,11 @@ class ChildChunkAddApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor @@ -567,16 +584,20 @@ class ChildChunkUpdateApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") # check child chunk child_chunk_id = str(child_chunk_id) - child_chunk = ChildChunk.query.filter( - ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id - ).first() + child_chunk = ( + db.session.query(ChildChunk) + .filter(ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id) + .first() + ) if not child_chunk: raise NotFound("Child chunk not found.") # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor @@ -612,16 +633,20 @@ class ChildChunkUpdateApi(Resource): raise NotFound("Document not found.") # check segment segment_id = str(segment_id) - segment = DocumentSegment.query.filter( - DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id) + .first() + ) if not segment: raise NotFound("Segment not found.") # check child chunk child_chunk_id = str(child_chunk_id) - child_chunk = ChildChunk.query.filter( - ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id - ).first() + child_chunk = ( + db.session.query(ChildChunk) + .filter(ChildChunk.id == str(child_chunk_id), ChildChunk.tenant_id == current_user.current_tenant_id) + .first() + ) if not child_chunk: raise NotFound("Child chunk not found.") # The role of the current user in the ta table must be admin, owner, dataset_operator, or editor diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py index 30b7f63aab..cf9081e154 100644 --- a/api/controllers/console/datasets/external.py +++ b/api/controllers/console/datasets/external.py @@ -209,6 +209,7 @@ class ExternalKnowledgeHitTestingApi(Resource): parser = reqparse.RequestParser() parser.add_argument("query", type=str, location="json") parser.add_argument("external_retrieval_model", type=dict, required=False, location="json") + parser.add_argument("metadata_filtering_conditions", type=dict, required=False, location="json") args = parser.parse_args() HitTestingService.hit_testing_args_check(args) @@ -219,6 +220,7 @@ class ExternalKnowledgeHitTestingApi(Resource): query=args["query"], account=current_user, external_retrieval_model=args["external_retrieval_model"], + metadata_filtering_conditions=args["metadata_filtering_conditions"], ) return response diff --git a/api/controllers/console/error.py b/api/controllers/console/error.py index b8fd1f0358..6944c56bf8 100644 --- a/api/controllers/console/error.py +++ b/api/controllers/console/error.py @@ -46,6 +46,18 @@ class NotAllowedCreateWorkspace(BaseHTTPException): code = 400 +class WorkspaceMembersLimitExceeded(BaseHTTPException): + error_code = "limit_exceeded" + description = "Unable to add member because the maximum workspace's member limit was exceeded" + code = 400 + + +class WorkspacesLimitExceeded(BaseHTTPException): + error_code = "limit_exceeded" + description = "Unable to create workspace because the maximum workspace limit was exceeded" + code = 400 + + class AccountBannedError(BaseHTTPException): error_code = "account_banned" description = "Account is banned." diff --git a/api/controllers/console/explore/error.py b/api/controllers/console/explore/error.py index 18221b7797..1e05ff4206 100644 --- a/api/controllers/console/explore/error.py +++ b/api/controllers/console/explore/error.py @@ -23,3 +23,9 @@ class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException): error_code = "app_suggested_questions_after_answer_disabled" description = "Function Suggested questions after answer disabled." code = 403 + + +class AppAccessDeniedError(BaseHTTPException): + error_code = "access_denied" + description = "App access denied." + code = 403 diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index 9336c35a0d..f73a226c89 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -1,3 +1,4 @@ +import logging from datetime import UTC, datetime from typing import Any @@ -15,6 +16,11 @@ from fields.installed_app_fields import installed_app_list_fields from libs.login import login_required from models import App, InstalledApp, RecommendedApp from services.account_service import TenantService +from services.app_service import AppService +from services.enterprise.enterprise_service import EnterpriseService +from services.feature_service import FeatureService + +logger = logging.getLogger(__name__) class InstalledAppsListApi(Resource): @@ -48,6 +54,21 @@ class InstalledAppsListApi(Resource): for installed_app in installed_apps if installed_app.app is not None ] + + # filter out apps that user doesn't have access to + if FeatureService.get_system_features().webapp_auth.enabled: + user_id = current_user.id + res = [] + for installed_app in installed_app_list: + app_code = AppService.get_app_code_by_id(str(installed_app["app"].id)) + if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp( + user_id=user_id, + app_code=app_code, + ): + res.append(installed_app) + installed_app_list = res + logger.debug(f"installed_app_list: {installed_app_list}, user_id: {user_id}") + installed_app_list.sort( key=lambda app: ( -app["is_pinned"], @@ -66,7 +87,7 @@ class InstalledAppsListApi(Resource): parser.add_argument("app_id", type=str, required=True, help="Invalid app_id") args = parser.parse_args() - recommended_app = RecommendedApp.query.filter(RecommendedApp.app_id == args["app_id"]).first() + recommended_app = db.session.query(RecommendedApp).filter(RecommendedApp.app_id == args["app_id"]).first() if recommended_app is None: raise NotFound("App not found") @@ -79,9 +100,11 @@ class InstalledAppsListApi(Resource): if not app.is_public: raise Forbidden("You can't install a non-public app") - installed_app = InstalledApp.query.filter( - and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id) - ).first() + installed_app = ( + db.session.query(InstalledApp) + .filter(and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id)) + .first() + ) if installed_app is None: # todo: position diff --git a/api/controllers/console/explore/wraps.py b/api/controllers/console/explore/wraps.py index 49ea81a8a0..afbd78bd5b 100644 --- a/api/controllers/console/explore/wraps.py +++ b/api/controllers/console/explore/wraps.py @@ -4,10 +4,14 @@ from flask_login import current_user from flask_restful import Resource from werkzeug.exceptions import NotFound +from controllers.console.explore.error import AppAccessDeniedError from controllers.console.wraps import account_initialization_required from extensions.ext_database import db from libs.login import login_required from models import InstalledApp +from services.app_service import AppService +from services.enterprise.enterprise_service import EnterpriseService +from services.feature_service import FeatureService def installed_app_required(view=None): @@ -48,6 +52,36 @@ def installed_app_required(view=None): return decorator +def user_allowed_to_access_app(view=None): + def decorator(view): + @wraps(view) + def decorated(installed_app: InstalledApp, *args, **kwargs): + feature = FeatureService.get_system_features() + if feature.webapp_auth.enabled: + app_id = installed_app.app_id + app_code = AppService.get_app_code_by_id(app_id) + res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp( + user_id=str(current_user.id), + app_code=app_code, + ) + if not res: + raise AppAccessDeniedError() + + return view(installed_app, *args, **kwargs) + + return decorated + + if view: + return decorator(view) + return decorator + + class InstalledAppResource(Resource): # must be reversed if there are multiple decorators - method_decorators = [installed_app_required, account_initialization_required, login_required] + + method_decorators = [ + user_allowed_to_access_app, + installed_app_required, + account_initialization_required, + login_required, + ] diff --git a/api/controllers/console/workspace/members.py b/api/controllers/console/workspace/members.py index b9918b0d32..db49da7840 100644 --- a/api/controllers/console/workspace/members.py +++ b/api/controllers/console/workspace/members.py @@ -6,6 +6,7 @@ from flask_restful import Resource, abort, marshal_with, reqparse import services from configs import dify_config from controllers.console import api +from controllers.console.error import WorkspaceMembersLimitExceeded from controllers.console.wraps import ( account_initialization_required, cloud_edition_billing_resource_check, @@ -17,6 +18,7 @@ from libs.login import login_required from models.account import Account, TenantAccountRole from services.account_service import RegisterService, TenantService from services.errors.account import AccountAlreadyInTenantError +from services.feature_service import FeatureService class MemberListApi(Resource): @@ -54,6 +56,12 @@ class MemberInviteEmailApi(Resource): inviter = current_user invitation_results = [] console_web_url = dify_config.CONSOLE_WEB_URL + + workspace_members = FeatureService.get_features(tenant_id=inviter.current_tenant.id).workspace_members + + if not workspace_members.is_available(len(invitee_emails)): + raise WorkspaceMembersLimitExceeded() + for invitee_email in invitee_emails: try: token = RegisterService.invite_new_member( @@ -71,7 +79,6 @@ class MemberInviteEmailApi(Resource): invitation_results.append( {"status": "success", "email": invitee_email, "url": f"{console_web_url}/signin"} ) - break except Exception as e: invitation_results.append({"status": "failed", "email": invitee_email, "message": str(e)}) diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index fda5a7d3bb..9bddbb4b4b 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -41,12 +41,16 @@ class PluginListApi(Resource): @account_initialization_required def get(self): tenant_id = current_user.current_tenant_id + parser = reqparse.RequestParser() + parser.add_argument("page", type=int, required=False, location="args", default=1) + parser.add_argument("page_size", type=int, required=False, location="args", default=256) + args = parser.parse_args() try: - plugins = PluginService.list(tenant_id) + plugins_with_total = PluginService.list_with_total(tenant_id, args["page"], args["page_size"]) except PluginDaemonClientSideError as e: raise ValueError(e) - return jsonable_encoder({"plugins": plugins}) + return jsonable_encoder({"plugins": plugins_with_total.list, "total": plugins_with_total.total}) class PluginListLatestVersionsApi(Resource): diff --git a/api/controllers/console/workspace/workspace.py b/api/controllers/console/workspace/workspace.py index 71e6f9178f..19999e7361 100644 --- a/api/controllers/console/workspace/workspace.py +++ b/api/controllers/console/workspace/workspace.py @@ -3,6 +3,7 @@ import logging from flask import request from flask_login import current_user from flask_restful import Resource, fields, inputs, marshal, marshal_with, reqparse +from sqlalchemy import select from werkzeug.exceptions import Unauthorized import services @@ -67,16 +68,24 @@ class TenantListApi(Resource): @account_initialization_required def get(self): tenants = TenantService.get_join_tenants(current_user) + tenant_dicts = [] for tenant in tenants: features = FeatureService.get_features(tenant.id) - if features.billing.enabled: - tenant.plan = features.billing.subscription.plan - else: - tenant.plan = "sandbox" - if tenant.id == current_user.current_tenant_id: - tenant.current = True # Set current=True for current tenant - return {"workspaces": marshal(tenants, tenants_fields)}, 200 + + # Create a dictionary with tenant attributes + tenant_dict = { + "id": tenant.id, + "name": tenant.name, + "status": tenant.status, + "created_at": tenant.created_at, + "plan": features.billing.subscription.plan if features.billing.enabled else "sandbox", + "current": tenant.id == current_user.current_tenant_id, + } + + tenant_dicts.append(tenant_dict) + + return {"workspaces": marshal(tenant_dicts, tenants_fields)}, 200 class WorkspaceListApi(Resource): @@ -88,9 +97,8 @@ class WorkspaceListApi(Resource): parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args") args = parser.parse_args() - tenants = Tenant.query.order_by(Tenant.created_at.desc()).paginate( - page=args["page"], per_page=args["limit"], error_out=False - ) + stmt = select(Tenant).order_by(Tenant.created_at.desc()) + tenants = db.paginate(select=stmt, page=args["page"], per_page=args["limit"], error_out=False) has_more = False if tenants.has_next: @@ -162,7 +170,7 @@ class CustomConfigWorkspaceApi(Resource): parser.add_argument("replace_webapp_logo", type=str, location="json") args = parser.parse_args() - tenant = Tenant.query.filter(Tenant.id == current_user.current_tenant_id).one_or_404() + tenant = db.get_or_404(Tenant, current_user.current_tenant_id) custom_config_dict = { "remove_webapp_brand": args["remove_webapp_brand"], @@ -226,7 +234,7 @@ class WorkspaceInfoApi(Resource): parser.add_argument("name", type=str, required=True, location="json") args = parser.parse_args() - tenant = Tenant.query.filter(Tenant.id == current_user.current_tenant_id).one_or_404() + tenant = db.get_or_404(Tenant, current_user.current_tenant_id) tenant.name = args["name"] db.session.commit() diff --git a/api/controllers/files/upload.py b/api/controllers/files/upload.py index 6641632169..f1a15793c7 100644 --- a/api/controllers/files/upload.py +++ b/api/controllers/files/upload.py @@ -64,9 +64,24 @@ class PluginUploadFileApi(Resource): extension = guess_extension(tool_file.mimetype) or ".bin" preview_url = ToolFileManager.sign_file(tool_file_id=tool_file.id, extension=extension) - tool_file.mime_type = mimetype - tool_file.extension = extension - tool_file.preview_url = preview_url + + # Create a dictionary with all the necessary attributes + result = { + "id": tool_file.id, + "user_id": tool_file.user_id, + "tenant_id": tool_file.tenant_id, + "conversation_id": tool_file.conversation_id, + "file_key": tool_file.file_key, + "mimetype": tool_file.mimetype, + "original_url": tool_file.original_url, + "name": tool_file.name, + "size": tool_file.size, + "mime_type": mimetype, + "extension": extension, + "preview_url": preview_url, + } + + return result, 201 except services.errors.file.FileTooLargeError as file_too_large_error: raise FileTooLargeError(file_too_large_error.description) except services.errors.file.UnsupportedFileTypeError: diff --git a/api/controllers/inner_api/__init__.py b/api/controllers/inner_api/__init__.py index f147a3453f..d51db4322a 100644 --- a/api/controllers/inner_api/__init__.py +++ b/api/controllers/inner_api/__init__.py @@ -5,5 +5,6 @@ from libs.external_api import ExternalApi bp = Blueprint("inner_api", __name__, url_prefix="/inner/api") api = ExternalApi(bp) +from . import mail from .plugin import plugin from .workspace import workspace diff --git a/api/controllers/inner_api/mail.py b/api/controllers/inner_api/mail.py new file mode 100644 index 0000000000..ce3373d65c --- /dev/null +++ b/api/controllers/inner_api/mail.py @@ -0,0 +1,27 @@ +from flask_restful import ( + Resource, # type: ignore + reqparse, +) + +from controllers.console.wraps import setup_required +from controllers.inner_api import api +from controllers.inner_api.wraps import enterprise_inner_api_only +from services.enterprise.mail_service import DifyMail, EnterpriseMailService + + +class EnterpriseMail(Resource): + @setup_required + @enterprise_inner_api_only + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("to", type=str, action="append", required=True) + parser.add_argument("subject", type=str, required=True) + parser.add_argument("body", type=str, required=True) + parser.add_argument("substitutions", type=dict, required=False) + args = parser.parse_args() + + EnterpriseMailService.send_mail(DifyMail(**args)) + return {"message": "success"}, 200 + + +api.add_resource(EnterpriseMail, "/enterprise/mail") diff --git a/api/controllers/service_api/__init__.py b/api/controllers/service_api/__init__.py index d97074e8b9..d964e27819 100644 --- a/api/controllers/service_api/__init__.py +++ b/api/controllers/service_api/__init__.py @@ -6,6 +6,6 @@ bp = Blueprint("service_api", __name__, url_prefix="/v1") api = ExternalApi(bp) from . import index -from .app import annotation, app, audio, completion, conversation, file, message, workflow +from .app import annotation, app, audio, completion, conversation, file, message, site, workflow from .dataset import dataset, document, hit_testing, metadata, segment, upload_file from .workspace import models diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index bd1a23b723..1a7f0c935b 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -3,7 +3,7 @@ from flask_restful import Resource, marshal, marshal_with, reqparse from werkzeug.exceptions import Forbidden from controllers.service_api import api -from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token +from controllers.service_api.wraps import validate_app_token from extensions.ext_redis import redis_client from fields.annotation_fields import ( annotation_fields, @@ -14,7 +14,7 @@ from services.annotation_service import AppAnnotationService class AnnotationReplyActionApi(Resource): - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) + @validate_app_token def post(self, app_model: App, end_user: EndUser, action): parser = reqparse.RequestParser() parser.add_argument("score_threshold", required=True, type=float, location="json") @@ -31,7 +31,7 @@ class AnnotationReplyActionApi(Resource): class AnnotationReplyActionStatusApi(Resource): - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) + @validate_app_token def get(self, app_model: App, end_user: EndUser, job_id, action): job_id = str(job_id) app_annotation_job_key = "{}_app_annotation_job_{}".format(action, str(job_id)) @@ -49,7 +49,7 @@ class AnnotationReplyActionStatusApi(Resource): class AnnotationListApi(Resource): - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) + @validate_app_token def get(self, app_model: App, end_user: EndUser): page = request.args.get("page", default=1, type=int) limit = request.args.get("limit", default=20, type=int) @@ -65,7 +65,7 @@ class AnnotationListApi(Resource): } return response, 200 - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) + @validate_app_token @marshal_with(annotation_fields) def post(self, app_model: App, end_user: EndUser): parser = reqparse.RequestParser() @@ -77,7 +77,7 @@ class AnnotationListApi(Resource): class AnnotationUpdateDeleteApi(Resource): - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) + @validate_app_token @marshal_with(annotation_fields) def put(self, app_model: App, end_user: EndUser, annotation_id): if not current_user.is_editor: @@ -91,7 +91,7 @@ class AnnotationUpdateDeleteApi(Resource): annotation = AppAnnotationService.update_app_annotation_directly(args, app_model.id, annotation_id) return annotation - @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) + @validate_app_token def delete(self, app_model: App, end_user: EndUser, annotation_id): if not current_user.is_editor: raise Forbidden() diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index 1b148a9756..d90fa2081f 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -93,6 +93,18 @@ class MessageFeedbackApi(Resource): return {"result": "success"} +class AppGetFeedbacksApi(Resource): + @validate_app_token + def get(self, app_model: App): + """Get All Feedbacks of an app""" + parser = reqparse.RequestParser() + parser.add_argument("page", type=int, default=1, location="args") + parser.add_argument("limit", type=int_range(1, 101), required=False, default=20, location="args") + args = parser.parse_args() + feedbacks = MessageService.get_all_messages_feedbacks(app_model, page=args["page"], limit=args["limit"]) + return {"data": feedbacks} + + class MessageSuggestedApi(Resource): @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY, required=True)) def get(self, app_model: App, end_user: EndUser, message_id): @@ -119,3 +131,4 @@ class MessageSuggestedApi(Resource): api.add_resource(MessageListApi, "/messages") api.add_resource(MessageFeedbackApi, "/messages//feedbacks") api.add_resource(MessageSuggestedApi, "/messages//suggested") +api.add_resource(AppGetFeedbacksApi, "/app/feedbacks") diff --git a/api/controllers/service_api/app/site.py b/api/controllers/service_api/app/site.py new file mode 100644 index 0000000000..e752dfee30 --- /dev/null +++ b/api/controllers/service_api/app/site.py @@ -0,0 +1,30 @@ +from flask_restful import Resource, marshal_with +from werkzeug.exceptions import Forbidden + +from controllers.common import fields +from controllers.service_api import api +from controllers.service_api.wraps import validate_app_token +from extensions.ext_database import db +from models.account import TenantStatus +from models.model import App, Site + + +class AppSiteApi(Resource): + """Resource for app sites.""" + + @validate_app_token + @marshal_with(fields.site_fields) + def get(self, app_model: App): + """Retrieve app site info.""" + site = db.session.query(Site).filter(Site.app_id == app_model.id).first() + + if not site: + raise Forbidden() + + if app_model.tenant.status == TenantStatus.ARCHIVE: + raise Forbidden() + + return site + + +api.add_resource(AppSiteApi, "/site") diff --git a/api/controllers/service_api/dataset/dataset.py b/api/controllers/service_api/dataset/dataset.py index ee190245d5..394f36c3ff 100644 --- a/api/controllers/service_api/dataset/dataset.py +++ b/api/controllers/service_api/dataset/dataset.py @@ -313,7 +313,7 @@ class DatasetApi(DatasetApiResource): try: if DatasetService.delete_dataset(dataset_id_str, current_user): DatasetPermissionService.clear_partial_member_list(dataset_id_str) - return {"result": "success"}, 204 + return 204 else: raise NotFound("Dataset not found.") except services.errors.dataset.DatasetInUseError: diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 33eda37014..418363ffbb 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -2,10 +2,10 @@ import json from flask import request from flask_restful import marshal, reqparse -from sqlalchemy import desc +from sqlalchemy import desc, select from werkzeug.exceptions import NotFound -import services.dataset_service +import services from controllers.common.errors import FilenameNotExistsError from controllers.service_api import api from controllers.service_api.app.error import ( @@ -323,7 +323,7 @@ class DocumentDeleteApi(DatasetApiResource): except services.errors.document.DocumentIndexingError: raise DocumentIndexingError("Cannot delete document during indexing.") - return {"result": "success"}, 204 + return 204 class DocumentListApi(DatasetApiResource): @@ -337,7 +337,7 @@ class DocumentListApi(DatasetApiResource): if not dataset: raise NotFound("Dataset not found.") - query = Document.query.filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id) + query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=tenant_id) if search: search = f"%{search}%" @@ -345,7 +345,7 @@ class DocumentListApi(DatasetApiResource): query = query.order_by(desc(Document.created_at), desc(Document.position)) - paginated_documents = query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) + paginated_documents = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) documents = paginated_documents.items response = { @@ -374,19 +374,36 @@ class DocumentIndexingStatusApi(DatasetApiResource): raise NotFound("Documents not found.") documents_status = [] for document in documents: - completed_segments = DocumentSegment.query.filter( - DocumentSegment.completed_at.isnot(None), - DocumentSegment.document_id == str(document.id), - DocumentSegment.status != "re_segment", - ).count() - total_segments = DocumentSegment.query.filter( - DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment" - ).count() - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - documents_status.append(marshal(document, document_status_fields)) + completed_segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.completed_at.isnot(None), + DocumentSegment.document_id == str(document.id), + DocumentSegment.status != "re_segment", + ) + .count() + ) + total_segments = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") + .count() + ) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data diff --git a/api/controllers/service_api/dataset/segment.py b/api/controllers/service_api/dataset/segment.py index fb3ca1e15f..ea4be4e511 100644 --- a/api/controllers/service_api/dataset/segment.py +++ b/api/controllers/service_api/dataset/segment.py @@ -159,7 +159,7 @@ class DatasetSegmentApi(DatasetApiResource): if not segment: raise NotFound("Segment not found.") SegmentService.delete_segment(segment, document, dataset) - return {"result": "success"}, 204 + return 204 @cloud_edition_billing_resource_check("vector_space", "dataset") def post(self, tenant_id, dataset_id, document_id, segment_id): @@ -344,7 +344,7 @@ class DatasetChildChunkApi(DatasetApiResource): except ChildChunkDeleteIndexServiceError as e: raise ChildChunkDeleteIndexError(str(e)) - return {"result": "success"}, 204 + return 204 @cloud_edition_billing_resource_check("vector_space", "dataset") @cloud_edition_billing_knowledge_limit_check("add_segment", "dataset") diff --git a/api/controllers/service_api/wraps.py b/api/controllers/service_api/wraps.py index cd35ceac1d..d3316a5159 100644 --- a/api/controllers/service_api/wraps.py +++ b/api/controllers/service_api/wraps.py @@ -99,7 +99,12 @@ def validate_app_token(view: Optional[Callable] = None, *, fetch_user_arg: Optio if user_id: user_id = str(user_id) - kwargs["end_user"] = create_or_update_end_user_for_user_id(app_model, user_id) + end_user = create_or_update_end_user_for_user_id(app_model, user_id) + kwargs["end_user"] = end_user + + # Set EndUser as current logged-in user for flask_login.current_user + current_app.login_manager._update_request_context_with_user(end_user) # type: ignore + user_logged_in.send(current_app._get_current_object(), user=end_user) # type: ignore return view_func(*args, **kwargs) diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py index c9a37af5ed..bb4486bd91 100644 --- a/api/controllers/web/app.py +++ b/api/controllers/web/app.py @@ -1,12 +1,15 @@ -from flask_restful import marshal_with +from flask import request +from flask_restful import Resource, marshal_with, reqparse from controllers.common import fields from controllers.web import api from controllers.web.error import AppUnavailableError from controllers.web.wraps import WebApiResource from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict +from libs.passport import PassportService from models.model import App, AppMode from services.app_service import AppService +from services.enterprise.enterprise_service import EnterpriseService class AppParameterApi(WebApiResource): @@ -40,5 +43,51 @@ class AppMeta(WebApiResource): return AppService().get_app_meta(app_model) +class AppAccessMode(Resource): + def get(self): + parser = reqparse.RequestParser() + parser.add_argument("appId", type=str, required=True, location="args") + args = parser.parse_args() + + app_id = args["appId"] + res = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id) + + return {"accessMode": res.access_mode} + + +class AppWebAuthPermission(Resource): + def get(self): + user_id = "visitor" + try: + auth_header = request.headers.get("Authorization") + if auth_header is None: + raise + if " " not in auth_header: + raise + + auth_scheme, tk = auth_header.split(None, 1) + auth_scheme = auth_scheme.lower() + if auth_scheme != "bearer": + raise + + decoded = PassportService().verify(tk) + user_id = decoded.get("user_id", "visitor") + except Exception as e: + pass + + parser = reqparse.RequestParser() + parser.add_argument("appId", type=str, required=True, location="args") + args = parser.parse_args() + + app_id = args["appId"] + app_code = AppService.get_app_code_by_id(app_id) + + res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(str(user_id), app_code) + return {"result": res} + + api.add_resource(AppParameterApi, "/parameters") api.add_resource(AppMeta, "/meta") +# webapp auth apis +api.add_resource(AppAccessMode, "/webapp/access-mode") +api.add_resource(AppWebAuthPermission, "/webapp/permission") diff --git a/api/controllers/web/error.py b/api/controllers/web/error.py index 9fe5d08d54..4371e679db 100644 --- a/api/controllers/web/error.py +++ b/api/controllers/web/error.py @@ -121,9 +121,15 @@ class UnsupportedFileTypeError(BaseHTTPException): code = 415 -class WebSSOAuthRequiredError(BaseHTTPException): +class WebAppAuthRequiredError(BaseHTTPException): error_code = "web_sso_auth_required" - description = "Web SSO authentication required." + description = "Web app authentication required." + code = 401 + + +class WebAppAuthAccessDeniedError(BaseHTTPException): + error_code = "web_app_access_denied" + description = "You do not have permission to access this web app." code = 401 diff --git a/api/controllers/web/login.py b/api/controllers/web/login.py new file mode 100644 index 0000000000..06c2274440 --- /dev/null +++ b/api/controllers/web/login.py @@ -0,0 +1,120 @@ +from flask import request +from flask_restful import Resource, reqparse +from jwt import InvalidTokenError # type: ignore +from werkzeug.exceptions import BadRequest + +import services +from controllers.console.auth.error import EmailCodeError, EmailOrPasswordMismatchError, InvalidEmailError +from controllers.console.error import AccountBannedError, AccountNotFound +from controllers.console.wraps import setup_required +from libs.helper import email +from libs.password import valid_password +from services.account_service import AccountService +from services.webapp_auth_service import WebAppAuthService + + +class LoginApi(Resource): + """Resource for web app email/password login.""" + + def post(self): + """Authenticate user and login.""" + parser = reqparse.RequestParser() + parser.add_argument("email", type=email, required=True, location="json") + parser.add_argument("password", type=valid_password, required=True, location="json") + args = parser.parse_args() + + app_code = request.headers.get("X-App-Code") + if app_code is None: + raise BadRequest("X-App-Code header is missing.") + + try: + account = WebAppAuthService.authenticate(args["email"], args["password"]) + except services.errors.account.AccountLoginError: + raise AccountBannedError() + except services.errors.account.AccountPasswordError: + raise EmailOrPasswordMismatchError() + except services.errors.account.AccountNotFoundError: + raise AccountNotFound() + + WebAppAuthService._validate_user_accessibility(account=account, app_code=app_code) + + end_user = WebAppAuthService.create_end_user(email=args["email"], app_code=app_code) + + token = WebAppAuthService.login(account=account, app_code=app_code, end_user_id=end_user.id) + return {"result": "success", "token": token} + + +# class LogoutApi(Resource): +# @setup_required +# def get(self): +# account = cast(Account, flask_login.current_user) +# if isinstance(account, flask_login.AnonymousUserMixin): +# return {"result": "success"} +# flask_login.logout_user() +# return {"result": "success"} + + +class EmailCodeLoginSendEmailApi(Resource): + @setup_required + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("email", type=email, required=True, location="json") + parser.add_argument("language", type=str, required=False, location="json") + args = parser.parse_args() + + if args["language"] is not None and args["language"] == "zh-Hans": + language = "zh-Hans" + else: + language = "en-US" + + account = WebAppAuthService.get_user_through_email(args["email"]) + if account is None: + raise AccountNotFound() + else: + token = WebAppAuthService.send_email_code_login_email(account=account, language=language) + + return {"result": "success", "data": token} + + +class EmailCodeLoginApi(Resource): + @setup_required + def post(self): + parser = reqparse.RequestParser() + parser.add_argument("email", type=str, required=True, location="json") + parser.add_argument("code", type=str, required=True, location="json") + parser.add_argument("token", type=str, required=True, location="json") + args = parser.parse_args() + + user_email = args["email"] + app_code = request.headers.get("X-App-Code") + if app_code is None: + raise BadRequest("X-App-Code header is missing.") + + token_data = WebAppAuthService.get_email_code_login_data(args["token"]) + if token_data is None: + raise InvalidTokenError() + + if token_data["email"] != args["email"]: + raise InvalidEmailError() + + if token_data["code"] != args["code"]: + raise EmailCodeError() + + WebAppAuthService.revoke_email_code_login_token(args["token"]) + account = WebAppAuthService.get_user_through_email(user_email) + if not account: + raise AccountNotFound() + + WebAppAuthService._validate_user_accessibility(account=account, app_code=app_code) + + end_user = WebAppAuthService.create_end_user(email=user_email, app_code=app_code) + + token = WebAppAuthService.login(account=account, app_code=app_code, end_user_id=end_user.id) + AccountService.reset_login_error_rate_limit(args["email"]) + return {"result": "success", "token": token} + + +# api.add_resource(LoginApi, "/login") +# api.add_resource(LogoutApi, "/logout") +# api.add_resource(EmailCodeLoginSendEmailApi, "/email-code-login") +# api.add_resource(EmailCodeLoginApi, "/email-code-login/validity") diff --git a/api/controllers/web/passport.py b/api/controllers/web/passport.py index 267dac223d..154c6772b7 100644 --- a/api/controllers/web/passport.py +++ b/api/controllers/web/passport.py @@ -5,7 +5,7 @@ from flask_restful import Resource from werkzeug.exceptions import NotFound, Unauthorized from controllers.web import api -from controllers.web.error import WebSSOAuthRequiredError +from controllers.web.error import WebAppAuthRequiredError from extensions.ext_database import db from libs.passport import PassportService from models.model import App, EndUser, Site @@ -24,10 +24,10 @@ class PassportResource(Resource): if app_code is None: raise Unauthorized("X-App-Code header is missing.") - if system_features.sso_enforced_for_web: - app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False) - if app_web_sso_enabled: - raise WebSSOAuthRequiredError() + if system_features.webapp_auth.enabled: + app_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code) + if not app_settings or not app_settings.access_mode == "public": + raise WebAppAuthRequiredError() # get site from db and check if it is normal site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first() diff --git a/api/controllers/web/wraps.py b/api/controllers/web/wraps.py index c327c3df18..3bb029d6eb 100644 --- a/api/controllers/web/wraps.py +++ b/api/controllers/web/wraps.py @@ -4,7 +4,7 @@ from flask import request from flask_restful import Resource from werkzeug.exceptions import BadRequest, NotFound, Unauthorized -from controllers.web.error import WebSSOAuthRequiredError +from controllers.web.error import WebAppAuthAccessDeniedError, WebAppAuthRequiredError from extensions.ext_database import db from libs.passport import PassportService from models.model import App, EndUser, Site @@ -29,7 +29,7 @@ def validate_jwt_token(view=None): def decode_jwt_token(): system_features = FeatureService.get_system_features() - app_code = request.headers.get("X-App-Code") + app_code = str(request.headers.get("X-App-Code")) try: auth_header = request.headers.get("Authorization") if auth_header is None: @@ -57,35 +57,53 @@ def decode_jwt_token(): if not end_user: raise NotFound() - _validate_web_sso_token(decoded, system_features, app_code) + # for enterprise webapp auth + app_web_auth_enabled = False + if system_features.webapp_auth.enabled: + app_web_auth_enabled = ( + EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code).access_mode != "public" + ) + + _validate_webapp_token(decoded, app_web_auth_enabled, system_features.webapp_auth.enabled) + _validate_user_accessibility(decoded, app_code, app_web_auth_enabled, system_features.webapp_auth.enabled) return app_model, end_user except Unauthorized as e: - if system_features.sso_enforced_for_web: - app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False) - if app_web_sso_enabled: - raise WebSSOAuthRequiredError() + if system_features.webapp_auth.enabled: + app_web_auth_enabled = ( + EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=str(app_code)).access_mode != "public" + ) + if app_web_auth_enabled: + raise WebAppAuthRequiredError() raise Unauthorized(e.description) -def _validate_web_sso_token(decoded, system_features, app_code): - app_web_sso_enabled = False - - # Check if SSO is enforced for web, and if the token source is not SSO, raise an error and redirect to SSO login - if system_features.sso_enforced_for_web: - app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False) - if app_web_sso_enabled: - source = decoded.get("token_source") - if not source or source != "sso": - raise WebSSOAuthRequiredError() - - # Check if SSO is not enforced for web, and if the token source is SSO, - # raise an error and redirect to normal passport login - if not system_features.sso_enforced_for_web or not app_web_sso_enabled: +def _validate_webapp_token(decoded, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool): + # Check if authentication is enforced for web app, and if the token source is not webapp, + # raise an error and redirect to login + if system_webapp_auth_enabled and app_web_auth_enabled: source = decoded.get("token_source") - if source and source == "sso": - raise Unauthorized("sso token expired.") + if not source or source != "webapp": + raise WebAppAuthRequiredError() + + # Check if authentication is not enforced for web, and if the token source is webapp, + # raise an error and redirect to normal passport login + if not system_webapp_auth_enabled or not app_web_auth_enabled: + source = decoded.get("token_source") + if source and source == "webapp": + raise Unauthorized("webapp token expired.") + + +def _validate_user_accessibility(decoded, app_code, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool): + if system_webapp_auth_enabled and app_web_auth_enabled: + # Check if the user is allowed to access the web app + user_id = decoded.get("user_id") + if not user_id: + raise WebAppAuthRequiredError() + + if not EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(user_id, app_code=app_code): + raise WebAppAuthAccessDeniedError() class WebApiResource(Resource): diff --git a/api/core/agent/base_agent_runner.py b/api/core/agent/base_agent_runner.py index e648613605..6998e4d29a 100644 --- a/api/core/agent/base_agent_runner.py +++ b/api/core/agent/base_agent_runner.py @@ -91,6 +91,8 @@ class BaseAgentRunner(AppRunner): return_resource=app_config.additional_features.show_retrieve_source, invoke_from=application_generate_entity.invoke_from, hit_callback=hit_callback, + user_id=user_id, + inputs=cast(dict, application_generate_entity.inputs), ) # get how many agent thoughts have been created self.agent_thought_count = ( diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index de3b7e1ad7..5212d797d8 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -69,13 +69,6 @@ class CotAgentRunner(BaseAgentRunner, ABC): tool_instances, prompt_messages_tools = self._init_prompt_tools() self._prompt_messages_tools = prompt_messages_tools - # fix metadata filter not work - if app_config.dataset is not None: - metadata_filtering_conditions = app_config.dataset.retrieve_config.metadata_filtering_conditions - for key, dataset_retriever_tool in tool_instances.items(): - if hasattr(dataset_retriever_tool, "retrieval_tool"): - dataset_retriever_tool.retrieval_tool.metadata_filtering_conditions = metadata_filtering_conditions - function_call_state = True llm_usage: dict[str, Optional[LLMUsage]] = {"usage": None} final_answer = "" @@ -87,6 +80,7 @@ class CotAgentRunner(BaseAgentRunner, ABC): llm_usage = final_llm_usage_dict["usage"] llm_usage.prompt_tokens += usage.prompt_tokens llm_usage.completion_tokens += usage.completion_tokens + llm_usage.total_tokens += usage.total_tokens llm_usage.prompt_price += usage.prompt_price llm_usage.completion_price += usage.completion_price llm_usage.total_price += usage.total_price diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index 874bd6b93b..611a55b30a 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -45,13 +45,6 @@ class FunctionCallAgentRunner(BaseAgentRunner): # convert tools into ModelRuntime Tool format tool_instances, prompt_messages_tools = self._init_prompt_tools() - # fix metadata filter not work - if app_config.dataset is not None: - metadata_filtering_conditions = app_config.dataset.retrieve_config.metadata_filtering_conditions - for key, dataset_retriever_tool in tool_instances.items(): - if hasattr(dataset_retriever_tool, "retrieval_tool"): - dataset_retriever_tool.retrieval_tool.metadata_filtering_conditions = metadata_filtering_conditions - assert app_config.agent iteration_step = 1 @@ -72,6 +65,7 @@ class FunctionCallAgentRunner(BaseAgentRunner): llm_usage = final_llm_usage_dict["usage"] llm_usage.prompt_tokens += usage.prompt_tokens llm_usage.completion_tokens += usage.completion_tokens + llm_usage.total_tokens += usage.total_tokens llm_usage.prompt_price += usage.prompt_price llm_usage.completion_price += usage.completion_price llm_usage.total_price += usage.total_price diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 8ae52131f2..3f31b1c3d5 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -109,6 +109,7 @@ class VariableEntity(BaseModel): description: str = "" type: VariableEntityType required: bool = False + hide: bool = False max_length: Optional[int] = None options: Sequence[str] = Field(default_factory=list) allowed_file_types: Sequence[FileType] = Field(default_factory=list) diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index fd0d7fafbd..fdd1a776f8 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -5,7 +5,7 @@ import uuid from collections.abc import Generator, Mapping from typing import Any, Literal, Optional, Union, overload -from flask import Flask, current_app +from flask import Flask, copy_current_request_context, current_app, has_request_context from pydantic import ValidationError from sqlalchemy.orm import sessionmaker @@ -25,13 +25,14 @@ from core.app.entities.task_entities import ChatbotAppBlockingResponse, ChatbotA from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager from core.prompt.utils.get_thread_messages_length import get_thread_messages_length -from core.workflow.repository import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from factories import file_factory -from models.account import Account -from models.model import App, Conversation, EndUser, Message -from models.workflow import Workflow +from models import Account, App, Conversation, EndUser, Message, Workflow, WorkflowNodeExecutionTriggeredFrom +from models.enums import WorkflowRunTriggeredFrom from services.conversation_service import ConversationService from services.errors.message import MessageNotExistsError @@ -157,18 +158,30 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): trace_manager=trace_manager, workflow_run_id=workflow_run_id, ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + # Create workflow execution(aka workflow run) repository + if invoke_from == InvokeFrom.DEBUGGER: + workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING + else: + workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=workflow_triggered_from, + ) + # Create workflow node execution repository + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) return self._generate( @@ -176,6 +189,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=invoke_from, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=conversation, stream=streaming, @@ -225,18 +239,26 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): node_id=node_id, inputs=args["inputs"] ), ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + # Create workflow node execution repository + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) return self._generate( @@ -244,6 +266,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, @@ -291,18 +314,26 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): extras={"auto_generate_conversation_name": False}, single_loop_run=AdvancedChatAppGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]), ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + # Create workflow node execution repository + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) return self._generate( @@ -310,6 +341,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, @@ -322,6 +354,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user: Union[Account, EndUser], invoke_from: InvokeFrom, application_generate_entity: AdvancedChatAppGenerateEntity, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, conversation: Optional[Conversation] = None, stream: bool = True, @@ -363,18 +396,23 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): message_id=message.id, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "conversation_id": conversation.id, - "message_id": message.id, - "context": contextvars.copy_context(), - }, - ) + # new thread with request context and contextvars + context = contextvars.copy_context() + + @copy_current_request_context + def worker_with_context(): + # Run the worker within the copied context + return context.run( + self._generate_worker, + flask_app=current_app._get_current_object(), # type: ignore + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id=conversation.id, + message_id=message.id, + context=context, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() @@ -386,6 +424,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation=conversation, message=message, user=user, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, stream=stream, ) @@ -412,8 +451,22 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): """ for var, val in context.items(): var.set(val) + + # FIXME(-LAN-): Save current user before entering new app context + from flask import g + + saved_user = None + if has_request_context() and hasattr(g, "_login_user"): + saved_user = g._login_user + with flask_app.app_context(): try: + # Restore user in new app context + if saved_user is not None: + from flask import g + + g._login_user = saved_user + # get conversation and message conversation = self._get_conversation(conversation_id) message = self._get_message(message_id) @@ -458,6 +511,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation: Conversation, message: Message, user: Union[Account, EndUser], + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]: @@ -481,9 +535,10 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation=conversation, message=message, user=user, - stream=stream, dialogue_count=self._dialogue_count, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, + stream=stream, ) try: diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 1f4db54a9c..0a2401f953 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -9,8 +9,8 @@ from sqlalchemy import select from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME -from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom +from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.entities.app_invoke_entities import ( AdvancedChatAppGenerateEntity, InvokeFrom, @@ -58,19 +58,21 @@ from core.app.entities.task_entities import ( ) from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.app.task_pipeline.message_cycle_manage import MessageCycleManage -from core.app.task_pipeline.workflow_cycle_manage import WorkflowCycleManage +from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.model_runtime.entities.llm_entities import LLMUsage from core.model_runtime.utils.encoders import jsonable_encoder from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import SystemVariableKey from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState from core.workflow.nodes import NodeType +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.workflow_cycle_manager import WorkflowCycleManager from events.message_event import message_was_created from extensions.ext_database import db from models import Conversation, EndUser, Message, MessageFile from models.account import Account -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.workflow import ( Workflow, WorkflowRunStatus, @@ -94,6 +96,7 @@ class AdvancedChatAppGenerateTaskPipeline: user: Union[Account, EndUser], stream: bool, dialogue_count: int, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( @@ -105,15 +108,15 @@ class AdvancedChatAppGenerateTaskPipeline: if isinstance(user, EndUser): self._user_id = user.id user_session_id = user.session_id - self._created_by_role = CreatedByRole.END_USER + self._created_by_role = CreatorUserRole.END_USER elif isinstance(user, Account): self._user_id = user.id user_session_id = user.id - self._created_by_role = CreatedByRole.ACCOUNT + self._created_by_role = CreatorUserRole.ACCOUNT else: raise NotImplementedError(f"User type not supported: {type(user)}") - self._workflow_cycle_manager = WorkflowCycleManage( + self._workflow_cycle_manager = WorkflowCycleManager( application_generate_entity=application_generate_entity, workflow_system_variables={ SystemVariableKey.QUERY: message.query, @@ -125,9 +128,14 @@ class AdvancedChatAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, ) + self._workflow_response_converter = WorkflowResponseConverter( + application_generate_entity=application_generate_entity, + ) + self._task_state = WorkflowTaskState() self._message_cycle_manager = MessageCycleManage( application_generate_entity=application_generate_entity, task_state=self._task_state @@ -294,19 +302,18 @@ class AdvancedChatAppGenerateTaskPipeline: with Session(db.engine, expire_on_commit=False) as session: # init workflow run - workflow_run = self._workflow_cycle_manager._handle_workflow_run_start( + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( session=session, workflow_id=self._workflow_id, - user_id=self._user_id, - created_by_role=self._created_by_role, ) - self._workflow_run_id = workflow_run.id + self._workflow_run_id = workflow_execution.id message = self._get_message(session=session) if not message: raise ValueError(f"Message not found: {self._message_id}") - message.workflow_run_id = workflow_run.id - workflow_start_resp = self._workflow_cycle_manager._workflow_start_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + message.workflow_run_id = workflow_execution.id + workflow_start_resp = self._workflow_response_converter.workflow_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) session.commit() @@ -319,13 +326,10 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( + workflow_execution_id=self._workflow_run_id, event=event ) - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried( - workflow_run=workflow_run, event=event - ) - node_retry_resp = self._workflow_cycle_manager._workflow_node_retry_to_stream_response( + node_retry_resp = self._workflow_response_converter.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -338,20 +342,15 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start( - workflow_run=workflow_run, event=event - ) + workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=self._workflow_run_id, event=event + ) - node_start_resp = self._workflow_cycle_manager._workflow_node_start_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - session.commit() + node_start_resp = self._workflow_response_converter.workflow_node_start_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) if node_start_resp: yield node_start_resp @@ -359,15 +358,15 @@ class AdvancedChatAppGenerateTaskPipeline: # Record files if it's an answer node or end node if event.node_type in [NodeType.ANSWER, NodeType.END]: self._recorded_files.extend( - self._workflow_cycle_manager._fetch_files_from_node_outputs(event.outputs or {}) + self._workflow_response_converter.fetch_files_from_node_outputs(event.outputs or {}) ) with Session(db.engine, expire_on_commit=False) as session: - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( event=event ) - node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -383,11 +382,11 @@ class AdvancedChatAppGenerateTaskPipeline: | QueueNodeInLoopFailedEvent | QueueNodeExceptionEvent, ): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( event=event ) - node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -399,132 +398,92 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_start_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_start_resp = ( + self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_start_resp elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_finish_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_finished_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_finish_resp = ( + self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_finish_resp elif isinstance(event, QueueIterationStartEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_start_resp = self._workflow_cycle_manager._workflow_iteration_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_start_resp = self._workflow_response_converter.workflow_iteration_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_start_resp elif isinstance(event, QueueIterationNextEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_next_resp = self._workflow_cycle_manager._workflow_iteration_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_next_resp = self._workflow_response_converter.workflow_iteration_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_next_resp elif isinstance(event, QueueIterationCompletedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_finish_resp = self._workflow_cycle_manager._workflow_iteration_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_finish_resp = self._workflow_response_converter.workflow_iteration_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_finish_resp elif isinstance(event, QueueLoopStartEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_start_resp = self._workflow_cycle_manager._workflow_loop_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_start_resp = self._workflow_response_converter.workflow_loop_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_start_resp elif isinstance(event, QueueLoopNextEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_next_resp = self._workflow_cycle_manager._workflow_loop_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_next_resp = self._workflow_response_converter.workflow_loop_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_next_resp elif isinstance(event, QueueLoopCompletedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_finish_resp = self._workflow_cycle_manager._workflow_loop_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_finish_resp = self._workflow_response_converter.workflow_loop_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_finish_resp elif isinstance(event, QueueWorkflowSucceededEvent): @@ -535,10 +494,8 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -546,10 +503,11 @@ class AdvancedChatAppGenerateTaskPipeline: trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield workflow_finish_resp self._base_task_pipeline._queue_manager.publish( @@ -562,10 +520,8 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_partial_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -573,10 +529,11 @@ class AdvancedChatAppGenerateTaskPipeline: conversation_id=None, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield workflow_finish_resp self._base_task_pipeline._queue_manager.publish( @@ -589,26 +546,25 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.FAILED, - error=event.error, + error_message=event.error, conversation_id=self._conversation_id, trace_manager=trace_manager, exceptions_count=event.exceptions_count, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}")) + err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_execution.error_message}")) err = self._base_task_pipeline._handle_error( event=err_event, session=session, message_id=self._message_id ) - session.commit() yield workflow_finish_resp yield self._base_task_pipeline._error_to_stream_response(err) @@ -616,21 +572,19 @@ class AdvancedChatAppGenerateTaskPipeline: elif isinstance(event, QueueStopEvent): if self._workflow_run_id and graph_runtime_state: with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.STOPPED, - error=event.get_stop_reason(), + error_message=event.get_stop_reason(), conversation_id=self._conversation_id, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, + workflow_execution=workflow_execution, ) # Save message self._save_message(session=session, graph_runtime_state=graph_runtime_state) @@ -711,7 +665,7 @@ class AdvancedChatAppGenerateTaskPipeline: yield self._message_end_to_stream_response() elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager._handle_agent_log( + yield self._workflow_response_converter.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: @@ -739,9 +693,9 @@ class AdvancedChatAppGenerateTaskPipeline: url=file["remote_url"], belongs_to="assistant", upload_file_id=file["related_id"], - created_by_role=CreatedByRole.ACCOUNT + created_by_role=CreatorUserRole.ACCOUNT if message.invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} - else CreatedByRole.END_USER, + else CreatorUserRole.END_USER, created_by=message.from_account_id or message.from_end_user_id or "", ) for file in self._recorded_files diff --git a/api/core/app/apps/agent_chat/app_generator.py b/api/core/app/apps/agent_chat/app_generator.py index 3ed436c07a..158196f24d 100644 --- a/api/core/app/apps/agent_chat/app_generator.py +++ b/api/core/app/apps/agent_chat/app_generator.py @@ -5,7 +5,7 @@ import uuid from collections.abc import Generator, Mapping from typing import Any, Literal, Union, overload -from flask import Flask, current_app +from flask import Flask, copy_current_request_context, current_app, has_request_context from pydantic import ValidationError from configs import dify_config @@ -179,18 +179,23 @@ class AgentChatAppGenerator(MessageBasedAppGenerator): message_id=message.id, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "context": contextvars.copy_context(), - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "conversation_id": conversation.id, - "message_id": message.id, - }, - ) + # new thread with request context and contextvars + context = contextvars.copy_context() + + @copy_current_request_context + def worker_with_context(): + # Run the worker within the copied context + return context.run( + self._generate_worker, + flask_app=current_app._get_current_object(), # type: ignore + context=context, + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id=conversation.id, + message_id=message.id, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() @@ -227,8 +232,21 @@ class AgentChatAppGenerator(MessageBasedAppGenerator): for var, val in context.items(): var.set(val) + # FIXME(-LAN-): Save current user before entering new app context + from flask import g + + saved_user = None + if has_request_context() and hasattr(g, "_login_user"): + saved_user = g._login_user + with flask_app.app_context(): try: + # Restore user in new app context + if saved_user is not None: + from flask import g + + g._login_user = saved_user + # get conversation and message conversation = self._get_conversation(conversation_id) message = self._get_message(message_id) diff --git a/api/core/app/apps/chat/app_generator.py b/api/core/app/apps/chat/app_generator.py index 2d865795d8..a1329cb938 100644 --- a/api/core/app/apps/chat/app_generator.py +++ b/api/core/app/apps/chat/app_generator.py @@ -4,7 +4,7 @@ import uuid from collections.abc import Generator, Mapping from typing import Any, Literal, Union, overload -from flask import Flask, current_app +from flask import Flask, copy_current_request_context, current_app from pydantic import ValidationError from configs import dify_config @@ -170,17 +170,18 @@ class ChatAppGenerator(MessageBasedAppGenerator): message_id=message.id, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "conversation_id": conversation.id, - "message_id": message.id, - }, - ) + # new thread with request context + @copy_current_request_context + def worker_with_context(): + return self._generate_worker( + flask_app=current_app._get_current_object(), # type: ignore + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id=conversation.id, + message_id=message.id, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() diff --git a/api/core/app/apps/common/__init__.py b/api/core/app/apps/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py new file mode 100644 index 0000000000..7669bf74bb --- /dev/null +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -0,0 +1,564 @@ +import time +from collections.abc import Mapping, Sequence +from datetime import UTC, datetime +from typing import Any, Optional, Union, cast + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity +from core.app.entities.queue_entities import ( + QueueAgentLogEvent, + QueueIterationCompletedEvent, + QueueIterationNextEvent, + QueueIterationStartEvent, + QueueLoopCompletedEvent, + QueueLoopNextEvent, + QueueLoopStartEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeInIterationFailedEvent, + QueueNodeInLoopFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, + QueueParallelBranchRunFailedEvent, + QueueParallelBranchRunStartedEvent, + QueueParallelBranchRunSucceededEvent, +) +from core.app.entities.task_entities import ( + AgentLogStreamResponse, + IterationNodeCompletedStreamResponse, + IterationNodeNextStreamResponse, + IterationNodeStartStreamResponse, + LoopNodeCompletedStreamResponse, + LoopNodeNextStreamResponse, + LoopNodeStartStreamResponse, + NodeFinishStreamResponse, + NodeRetryStreamResponse, + NodeStartStreamResponse, + ParallelBranchFinishedStreamResponse, + ParallelBranchStartStreamResponse, + WorkflowFinishStreamResponse, + WorkflowStartStreamResponse, +) +from core.file import FILE_MODEL_IDENTITY, File +from core.tools.tool_manager import ToolManager +from core.workflow.entities.node_execution_entities import NodeExecution +from core.workflow.entities.workflow_execution_entities import WorkflowExecution +from core.workflow.nodes import NodeType +from core.workflow.nodes.tool.entities import ToolNodeData +from models import ( + Account, + CreatorUserRole, + EndUser, + WorkflowNodeExecutionStatus, + WorkflowRun, +) + + +class WorkflowResponseConverter: + def __init__( + self, + *, + application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], + ) -> None: + self._application_generate_entity = application_generate_entity + + def workflow_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution: WorkflowExecution, + ) -> WorkflowStartStreamResponse: + return WorkflowStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution.id, + data=WorkflowStartStreamResponse.Data( + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + inputs=workflow_execution.inputs, + created_at=int(workflow_execution.started_at.timestamp()), + ), + ) + + def workflow_finish_to_stream_response( + self, + *, + session: Session, + task_id: str, + workflow_execution: WorkflowExecution, + ) -> WorkflowFinishStreamResponse: + created_by = None + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None + if workflow_run.created_by_role == CreatorUserRole.ACCOUNT: + stmt = select(Account).where(Account.id == workflow_run.created_by) + account = session.scalar(stmt) + if account: + created_by = { + "id": account.id, + "name": account.name, + "email": account.email, + } + elif workflow_run.created_by_role == CreatorUserRole.END_USER: + stmt = select(EndUser).where(EndUser.id == workflow_run.created_by) + end_user = session.scalar(stmt) + if end_user: + created_by = { + "id": end_user.id, + "user": end_user.session_id, + } + else: + raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") + + # Handle the case where finished_at is None by using current time as default + finished_at_timestamp = ( + int(workflow_execution.finished_at.timestamp()) + if workflow_execution.finished_at + else int(datetime.now(UTC).timestamp()) + ) + + return WorkflowFinishStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution.id, + data=WorkflowFinishStreamResponse.Data( + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + status=workflow_execution.status, + outputs=workflow_execution.outputs, + error=workflow_execution.error_message, + elapsed_time=workflow_execution.elapsed_time, + total_tokens=workflow_execution.total_tokens, + total_steps=workflow_execution.total_steps, + created_by=created_by, + created_at=int(workflow_execution.started_at.timestamp()), + finished_at=finished_at_timestamp, + files=self.fetch_files_from_node_outputs(workflow_execution.outputs), + exceptions_count=workflow_execution.exceptions_count, + ), + ) + + def workflow_node_start_to_stream_response( + self, + *, + event: QueueNodeStartedEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[NodeStartStreamResponse]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + + response = NodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeStartStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + title=workflow_node_execution.title, + index=workflow_node_execution.index, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + created_at=int(workflow_node_execution.created_at.timestamp()), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + parallel_run_id=event.parallel_mode_run_id, + agent_strategy=event.agent_strategy, + ), + ) + + # extras logic + if event.node_type == NodeType.TOOL: + node_data = cast(ToolNodeData, event.node_data) + response.data.extras["icon"] = ToolManager.get_tool_icon( + tenant_id=self._application_generate_entity.app_config.tenant_id, + provider_type=node_data.provider_type, + provider_id=node_data.provider_id, + ) + + return response + + def workflow_node_finish_to_stream_response( + self, + *, + event: QueueNodeSucceededEvent + | QueueNodeFailedEvent + | QueueNodeInIterationFailedEvent + | QueueNodeInLoopFailedEvent + | QueueNodeExceptionEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[NodeFinishStreamResponse]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + if not workflow_node_execution.finished_at: + return None + + return NodeFinishStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeFinishStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + index=workflow_node_execution.index, + title=workflow_node_execution.title, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + process_data=workflow_node_execution.process_data, + outputs=workflow_node_execution.outputs, + status=workflow_node_execution.status, + error=workflow_node_execution.error, + elapsed_time=workflow_node_execution.elapsed_time, + execution_metadata=workflow_node_execution.metadata, + created_at=int(workflow_node_execution.created_at.timestamp()), + finished_at=int(workflow_node_execution.finished_at.timestamp()), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + ), + ) + + def workflow_node_retry_to_stream_response( + self, + *, + event: QueueNodeRetryEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + if not workflow_node_execution.finished_at: + return None + + return NodeRetryStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeRetryStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + index=workflow_node_execution.index, + title=workflow_node_execution.title, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + process_data=workflow_node_execution.process_data, + outputs=workflow_node_execution.outputs, + status=workflow_node_execution.status, + error=workflow_node_execution.error, + elapsed_time=workflow_node_execution.elapsed_time, + execution_metadata=workflow_node_execution.metadata, + created_at=int(workflow_node_execution.created_at.timestamp()), + finished_at=int(workflow_node_execution.finished_at.timestamp()), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + retry_index=event.retry_index, + ), + ) + + def workflow_parallel_branch_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueParallelBranchRunStartedEvent, + ) -> ParallelBranchStartStreamResponse: + return ParallelBranchStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=ParallelBranchStartStreamResponse.Data( + parallel_id=event.parallel_id, + parallel_branch_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + created_at=int(time.time()), + ), + ) + + def workflow_parallel_branch_finished_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent, + ) -> ParallelBranchFinishedStreamResponse: + return ParallelBranchFinishedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=ParallelBranchFinishedStreamResponse.Data( + parallel_id=event.parallel_id, + parallel_branch_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + status="succeeded" if isinstance(event, QueueParallelBranchRunSucceededEvent) else "failed", + error=event.error if isinstance(event, QueueParallelBranchRunFailedEvent) else None, + created_at=int(time.time()), + ), + ) + + def workflow_iteration_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationStartEvent, + ) -> IterationNodeStartStreamResponse: + return IterationNodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeStartStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + metadata=event.metadata or {}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_iteration_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationNextEvent, + ) -> IterationNodeNextStreamResponse: + return IterationNodeNextStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeNextStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + index=event.index, + pre_iteration_output=event.output, + created_at=int(time.time()), + extras={}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parallel_mode_run_id=event.parallel_mode_run_id, + duration=event.duration, + ), + ) + + def workflow_iteration_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationCompletedEvent, + ) -> IterationNodeCompletedStreamResponse: + return IterationNodeCompletedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeCompletedStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + outputs=event.outputs, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + status=WorkflowNodeExecutionStatus.SUCCEEDED + if event.error is None + else WorkflowNodeExecutionStatus.FAILED, + error=None, + elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, + execution_metadata=event.metadata, + finished_at=int(time.time()), + steps=event.steps, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_loop_start_to_stream_response( + self, *, task_id: str, workflow_execution_id: str, event: QueueLoopStartEvent + ) -> LoopNodeStartStreamResponse: + return LoopNodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeStartStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + metadata=event.metadata or {}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_loop_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopNextEvent, + ) -> LoopNodeNextStreamResponse: + return LoopNodeNextStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeNextStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + index=event.index, + pre_loop_output=event.output, + created_at=int(time.time()), + extras={}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parallel_mode_run_id=event.parallel_mode_run_id, + duration=event.duration, + ), + ) + + def workflow_loop_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopCompletedEvent, + ) -> LoopNodeCompletedStreamResponse: + return LoopNodeCompletedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeCompletedStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + outputs=event.outputs, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + status=WorkflowNodeExecutionStatus.SUCCEEDED + if event.error is None + else WorkflowNodeExecutionStatus.FAILED, + error=None, + elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, + execution_metadata=event.metadata, + finished_at=int(time.time()), + steps=event.steps, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any] | None) -> Sequence[Mapping[str, Any]]: + """ + Fetch files from node outputs + :param outputs_dict: node outputs dict + :return: + """ + if not outputs_dict: + return [] + + files = [self._fetch_files_from_variable_value(output_value) for output_value in outputs_dict.values()] + # Remove None + files = [file for file in files if file] + # Flatten list + # Flatten the list of sequences into a single list of mappings + flattened_files = [file for sublist in files if sublist for file in sublist] + + # Convert to tuple to match Sequence type + return tuple(flattened_files) + + def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]: + """ + Fetch files from variable value + :param value: variable value + :return: + """ + if not value: + return [] + + files = [] + if isinstance(value, list): + for item in value: + file = self._get_file_var_from_value(item) + if file: + files.append(file) + elif isinstance(value, dict): + file = self._get_file_var_from_value(value) + if file: + files.append(file) + + return files + + def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None: + """ + Get file var from value + :param value: variable value + :return: + """ + if not value: + return None + + if isinstance(value, dict) and value.get("dify_model_identity") == FILE_MODEL_IDENTITY: + return value + elif isinstance(value, File): + return value.to_dict() + + return None + + def handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: + """ + Handle agent log + :param task_id: task id + :param event: agent log event + :return: + """ + return AgentLogStreamResponse( + task_id=task_id, + data=AgentLogStreamResponse.Data( + node_execution_id=event.node_execution_id, + id=event.id, + parent_id=event.parent_id, + label=event.label, + error=event.error, + status=event.status, + data=event.data, + metadata=event.metadata, + node_id=event.node_id, + ), + ) diff --git a/api/core/app/apps/completion/app_generator.py b/api/core/app/apps/completion/app_generator.py index b1bc412616..adcbaad3ec 100644 --- a/api/core/app/apps/completion/app_generator.py +++ b/api/core/app/apps/completion/app_generator.py @@ -4,7 +4,7 @@ import uuid from collections.abc import Generator, Mapping from typing import Any, Literal, Union, overload -from flask import Flask, current_app +from flask import Flask, copy_current_request_context, current_app from pydantic import ValidationError from configs import dify_config @@ -151,16 +151,17 @@ class CompletionAppGenerator(MessageBasedAppGenerator): message_id=message.id, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "message_id": message.id, - }, - ) + # new thread with request context + @copy_current_request_context + def worker_with_context(): + return self._generate_worker( + flask_app=current_app._get_current_object(), # type: ignore + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + message_id=message.id, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() @@ -313,16 +314,17 @@ class CompletionAppGenerator(MessageBasedAppGenerator): message_id=message.id, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "message_id": message.id, - }, - ) + # new thread with request context + @copy_current_request_context + def worker_with_context(): + return self._generate_worker( + flask_app=current_app._get_current_object(), # type: ignore + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + message_id=message.id, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() diff --git a/api/core/app/apps/message_based_app_generator.py b/api/core/app/apps/message_based_app_generator.py index 995082b79d..58b94f4d43 100644 --- a/api/core/app/apps/message_based_app_generator.py +++ b/api/core/app/apps/message_based_app_generator.py @@ -25,7 +25,7 @@ from core.app.task_pipeline.easy_ui_based_generate_task_pipeline import EasyUIBa from core.prompt.utils.prompt_template_parser import PromptTemplateParser from extensions.ext_database import db from models import Account -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import App, AppMode, AppModelConfig, Conversation, EndUser, Message, MessageFile from services.errors.app_model_config import AppModelConfigBrokenError from services.errors.conversation import ConversationNotExistsError @@ -223,7 +223,7 @@ class MessageBasedAppGenerator(BaseAppGenerator): belongs_to="user", url=file.remote_url, upload_file_id=file.related_id, - created_by_role=(CreatedByRole.ACCOUNT if account_id else CreatedByRole.END_USER), + created_by_role=(CreatorUserRole.ACCOUNT if account_id else CreatorUserRole.END_USER), created_by=account_id or end_user_id or "", ) db.session.add(message_file) diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index 9c3d78a338..6ea90e5a3d 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -5,7 +5,7 @@ import uuid from collections.abc import Generator, Mapping, Sequence from typing import Any, Literal, Optional, Union, overload -from flask import Flask, current_app +from flask import Flask, copy_current_request_context, current_app, has_request_context from pydantic import ValidationError from sqlalchemy.orm import sessionmaker @@ -23,11 +23,14 @@ from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerat from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager -from core.workflow.repository import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from factories import file_factory -from models import Account, App, EndUser, Workflow +from models import Account, App, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom +from models.enums import WorkflowRunTriggeredFrom logger = logging.getLogger(__name__) @@ -132,18 +135,30 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow_run_id=workflow_run_id, ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + # Create workflow execution(aka workflow run) repository + if invoke_from == InvokeFrom.DEBUGGER: + workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING + else: + workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=workflow_triggered_from, + ) + # Create workflow node execution repository + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) return self._generate( @@ -152,6 +167,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, application_generate_entity=application_generate_entity, invoke_from=invoke_from, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, workflow_thread_pool_id=workflow_thread_pool_id, @@ -165,6 +181,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user: Union[Account, EndUser], application_generate_entity: WorkflowAppGenerateEntity, invoke_from: InvokeFrom, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, streaming: bool = True, workflow_thread_pool_id: Optional[str] = None, @@ -189,17 +206,22 @@ class WorkflowAppGenerator(BaseAppGenerator): app_mode=app_model.mode, ) - # new thread - worker_thread = threading.Thread( - target=self._generate_worker, - kwargs={ - "flask_app": current_app._get_current_object(), # type: ignore - "application_generate_entity": application_generate_entity, - "queue_manager": queue_manager, - "context": contextvars.copy_context(), - "workflow_thread_pool_id": workflow_thread_pool_id, - }, - ) + # new thread with request context and contextvars + context = contextvars.copy_context() + + @copy_current_request_context + def worker_with_context(): + # Run the worker within the copied context + return context.run( + self._generate_worker, + flask_app=current_app._get_current_object(), # type: ignore + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + context=context, + workflow_thread_pool_id=workflow_thread_pool_id, + ) + + worker_thread = threading.Thread(target=worker_with_context) worker_thread.start() @@ -209,6 +231,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow=workflow, queue_manager=queue_manager, user=user, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, stream=streaming, ) @@ -258,18 +281,28 @@ class WorkflowAppGenerator(BaseAppGenerator): ), workflow_run_id=str(uuid.uuid4()), ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create repositories + # + # Create session factory + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) # Create workflow node execution repository session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) return self._generate( @@ -278,6 +311,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -323,18 +357,28 @@ class WorkflowAppGenerator(BaseAppGenerator): single_loop_run=WorkflowAppGenerateEntity.SingleLoopRunEntity(node_id=node_id, inputs=args["inputs"]), workflow_run_id=str(uuid.uuid4()), ) - contexts.tenant_id.set(application_generate_entity.app_config.tenant_id) contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create repositories + # + # Create session factory + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) # Create workflow node execution repository session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": application_generate_entity.app_config.tenant_id, - "app_id": application_generate_entity.app_config.app_id, - "session_factory": session_factory, - } + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) return self._generate( @@ -343,6 +387,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -365,8 +410,22 @@ class WorkflowAppGenerator(BaseAppGenerator): """ for var, val in context.items(): var.set(val) + + # FIXME(-LAN-): Save current user before entering new app context + from flask import g + + saved_user = None + if has_request_context() and hasattr(g, "_login_user"): + saved_user = g._login_user + with flask_app.app_context(): try: + # Restore user in new app context + if saved_user is not None: + from flask import g + + g._login_user = saved_user + # workflow app runner = WorkflowAppRunner( application_generate_entity=application_generate_entity, @@ -400,6 +459,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow: Workflow, queue_manager: AppQueueManager, user: Union[Account, EndUser], + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]: @@ -419,8 +479,9 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow=workflow, queue_manager=queue_manager, user=user, - stream=stream, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, + stream=stream, ) try: diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index 67cad9c998..0291f49cac 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -3,11 +3,12 @@ import time from collections.abc import Generator from typing import Optional, Union +from sqlalchemy import select from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME -from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager +from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.entities.app_invoke_entities import ( InvokeFrom, WorkflowAppGenerateEntity, @@ -52,13 +53,16 @@ from core.app.entities.task_entities import ( WorkflowTaskState, ) from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline -from core.app.task_pipeline.workflow_cycle_manage import WorkflowCycleManage +from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.ops.ops_trace_manager import TraceQueueManager +from core.workflow.entities.workflow_execution_entities import WorkflowExecution from core.workflow.enums import SystemVariableKey +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.workflow_cycle_manager import WorkflowCycleManager from extensions.ext_database import db from models.account import Account -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import EndUser from models.workflow import ( Workflow, @@ -83,6 +87,7 @@ class WorkflowAppGenerateTaskPipeline: queue_manager: AppQueueManager, user: Union[Account, EndUser], stream: bool, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( @@ -94,15 +99,15 @@ class WorkflowAppGenerateTaskPipeline: if isinstance(user, EndUser): self._user_id = user.id user_session_id = user.session_id - self._created_by_role = CreatedByRole.END_USER + self._created_by_role = CreatorUserRole.END_USER elif isinstance(user, Account): self._user_id = user.id user_session_id = user.id - self._created_by_role = CreatedByRole.ACCOUNT + self._created_by_role = CreatorUserRole.ACCOUNT else: raise ValueError(f"Invalid user type: {type(user)}") - self._workflow_cycle_manager = WorkflowCycleManage( + self._workflow_cycle_manager = WorkflowCycleManager( application_generate_entity=application_generate_entity, workflow_system_variables={ SystemVariableKey.FILES: application_generate_entity.files, @@ -111,9 +116,14 @@ class WorkflowAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, ) + self._workflow_response_converter = WorkflowResponseConverter( + application_generate_entity=application_generate_entity, + ) + self._application_generate_entity = application_generate_entity self._workflow_id = workflow.id self._workflow_features_dict = workflow.features_dict @@ -258,17 +268,15 @@ class WorkflowAppGenerateTaskPipeline: with Session(db.engine, expire_on_commit=False) as session: # init workflow run - workflow_run = self._workflow_cycle_manager._handle_workflow_run_start( + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( session=session, workflow_id=self._workflow_id, - user_id=self._user_id, - created_by_role=self._created_by_role, ) - self._workflow_run_id = workflow_run.id - start_resp = self._workflow_cycle_manager._workflow_start_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + self._workflow_run_id = workflow_execution.id + start_resp = self._workflow_response_converter.workflow_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield start_resp elif isinstance( @@ -278,13 +286,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( + workflow_execution_id=self._workflow_run_id, + event=event, ) - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried( - workflow_run=workflow_run, event=event - ) - response = self._workflow_cycle_manager._workflow_node_retry_to_stream_response( + response = self._workflow_response_converter.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -297,27 +303,22 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start( - workflow_run=workflow_run, event=event - ) - node_start_response = self._workflow_cycle_manager._workflow_node_start_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - session.commit() + workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=self._workflow_run_id, event=event + ) + node_start_response = self._workflow_response_converter.workflow_node_start_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) if node_start_response: yield node_start_response elif isinstance(event, QueueNodeSucceededEvent): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( event=event ) - node_success_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_success_response = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -332,10 +333,10 @@ class WorkflowAppGenerateTaskPipeline: | QueueNodeInLoopFailedEvent | QueueNodeExceptionEvent, ): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( event=event, ) - node_failed_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_failed_response = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -348,18 +349,13 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_start_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_start_resp = ( + self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_start_resp @@ -367,18 +363,13 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_finish_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_finished_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_finish_resp = ( + self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_finish_resp @@ -386,16 +377,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_start_resp = self._workflow_cycle_manager._workflow_iteration_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_start_resp = self._workflow_response_converter.workflow_iteration_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_start_resp @@ -403,16 +389,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_next_resp = self._workflow_cycle_manager._workflow_iteration_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_next_resp = self._workflow_response_converter.workflow_iteration_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_next_resp @@ -420,16 +401,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_finish_resp = self._workflow_cycle_manager._workflow_iteration_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_finish_resp = self._workflow_response_converter.workflow_iteration_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_finish_resp @@ -437,16 +413,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_start_resp = self._workflow_cycle_manager._workflow_loop_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_start_resp = self._workflow_response_converter.workflow_loop_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_start_resp @@ -454,16 +425,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_next_resp = self._workflow_cycle_manager._workflow_loop_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_next_resp = self._workflow_response_converter.workflow_loop_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_next_resp @@ -471,16 +437,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_finish_resp = self._workflow_cycle_manager._workflow_loop_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_finish_resp = self._workflow_response_converter.workflow_loop_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_finish_resp @@ -491,10 +452,8 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -503,12 +462,12 @@ class WorkflowAppGenerateTaskPipeline: ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, + workflow_execution=workflow_execution, ) session.commit() @@ -520,10 +479,8 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_partial_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -533,10 +490,12 @@ class WorkflowAppGenerateTaskPipeline: ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) session.commit() @@ -548,26 +507,28 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.FAILED if isinstance(event, QueueWorkflowFailedEvent) else WorkflowRunStatus.STOPPED, - error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(), + error_message=event.error + if isinstance(event, QueueWorkflowFailedEvent) + else event.get_stop_reason(), conversation_id=None, trace_manager=trace_manager, exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) session.commit() @@ -586,7 +547,7 @@ class WorkflowAppGenerateTaskPipeline: delta_text, from_variable_selector=event.from_variable_selector ) elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager._handle_agent_log( + yield self._workflow_response_converter.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: @@ -595,11 +556,9 @@ class WorkflowAppGenerateTaskPipeline: if tts_publisher: tts_publisher.publish(None) - def _save_workflow_app_log(self, *, session: Session, workflow_run: WorkflowRun) -> None: - """ - Save workflow app log. - :return: - """ + def _save_workflow_app_log(self, *, session: Session, workflow_execution: WorkflowExecution) -> None: + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None invoke_from = self._application_generate_entity.invoke_from if invoke_from == InvokeFrom.SERVICE_API: created_from = WorkflowAppLogCreatedFrom.SERVICE_API diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index 817699bd20..9b2bfcbf61 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -6,7 +6,7 @@ from pydantic import BaseModel, ConfigDict from core.model_runtime.entities.llm_entities import LLMResult from core.model_runtime.utils.encoders import jsonable_encoder -from core.workflow.entities.node_entities import AgentNodeStrategyInit +from core.workflow.entities.node_entities import AgentNodeStrategyInit, NodeRunMetadataKey from models.workflow import WorkflowNodeExecutionStatus @@ -190,7 +190,7 @@ class WorkflowStartStreamResponse(StreamResponse): id: str workflow_id: str sequence_number: int - inputs: dict + inputs: Mapping[str, Any] created_at: int event: StreamEvent = StreamEvent.WORKFLOW_STARTED @@ -212,7 +212,7 @@ class WorkflowFinishStreamResponse(StreamResponse): workflow_id: str sequence_number: int status: str - outputs: Optional[dict] = None + outputs: Optional[Mapping[str, Any]] = None error: Optional[str] = None elapsed_time: float total_tokens: int @@ -244,7 +244,7 @@ class NodeStartStreamResponse(StreamResponse): title: str index: int predecessor_node_id: Optional[str] = None - inputs: Optional[dict] = None + inputs: Optional[Mapping[str, Any]] = None created_at: int extras: dict = {} parallel_id: Optional[str] = None @@ -301,13 +301,13 @@ class NodeFinishStreamResponse(StreamResponse): title: str index: int predecessor_node_id: Optional[str] = None - inputs: Optional[dict] = None - process_data: Optional[dict] = None - outputs: Optional[dict] = None + inputs: Optional[Mapping[str, Any]] = None + process_data: Optional[Mapping[str, Any]] = None + outputs: Optional[Mapping[str, Any]] = None status: str error: Optional[str] = None elapsed_time: float - execution_metadata: Optional[dict] = None + execution_metadata: Optional[Mapping[NodeRunMetadataKey, Any]] = None created_at: int finished_at: int files: Optional[Sequence[Mapping[str, Any]]] = [] @@ -370,13 +370,13 @@ class NodeRetryStreamResponse(StreamResponse): title: str index: int predecessor_node_id: Optional[str] = None - inputs: Optional[dict] = None - process_data: Optional[dict] = None - outputs: Optional[dict] = None + inputs: Optional[Mapping[str, Any]] = None + process_data: Optional[Mapping[str, Any]] = None + outputs: Optional[Mapping[str, Any]] = None status: str error: Optional[str] = None elapsed_time: float - execution_metadata: Optional[dict] = None + execution_metadata: Optional[Mapping[NodeRunMetadataKey, Any]] = None created_at: int finished_at: int files: Optional[Sequence[Mapping[str, Any]]] = [] @@ -788,7 +788,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse): id: str workflow_id: str status: str - outputs: Optional[dict] = None + outputs: Optional[Mapping[str, Any]] = None error: Optional[str] = None elapsed_time: float total_tokens: int diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py index 8c9c26d36e..a98a42f5df 100644 --- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py @@ -9,7 +9,6 @@ from sqlalchemy import select from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME -from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom from core.app.entities.app_invoke_entities import ( AgentChatAppGenerateEntity, @@ -45,6 +44,7 @@ from core.app.entities.task_entities import ( ) from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.app.task_pipeline.message_cycle_manage import MessageCycleManage +from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.model_manager import ModelInstance from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from core.model_runtime.entities.message_entities import ( diff --git a/api/core/app/task_pipeline/workflow_cycle_manage.py b/api/core/app/task_pipeline/workflow_cycle_manage.py deleted file mode 100644 index 09e2ee74e6..0000000000 --- a/api/core/app/task_pipeline/workflow_cycle_manage.py +++ /dev/null @@ -1,948 +0,0 @@ -import json -import time -from collections.abc import Mapping, Sequence -from datetime import UTC, datetime -from typing import Any, Optional, Union, cast -from uuid import uuid4 - -from sqlalchemy import func, select -from sqlalchemy.orm import Session - -from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity -from core.app.entities.queue_entities import ( - QueueAgentLogEvent, - QueueIterationCompletedEvent, - QueueIterationNextEvent, - QueueIterationStartEvent, - QueueLoopCompletedEvent, - QueueLoopNextEvent, - QueueLoopStartEvent, - QueueNodeExceptionEvent, - QueueNodeFailedEvent, - QueueNodeInIterationFailedEvent, - QueueNodeInLoopFailedEvent, - QueueNodeRetryEvent, - QueueNodeStartedEvent, - QueueNodeSucceededEvent, - QueueParallelBranchRunFailedEvent, - QueueParallelBranchRunStartedEvent, - QueueParallelBranchRunSucceededEvent, -) -from core.app.entities.task_entities import ( - AgentLogStreamResponse, - IterationNodeCompletedStreamResponse, - IterationNodeNextStreamResponse, - IterationNodeStartStreamResponse, - LoopNodeCompletedStreamResponse, - LoopNodeNextStreamResponse, - LoopNodeStartStreamResponse, - NodeFinishStreamResponse, - NodeRetryStreamResponse, - NodeStartStreamResponse, - ParallelBranchFinishedStreamResponse, - ParallelBranchStartStreamResponse, - WorkflowFinishStreamResponse, - WorkflowStartStreamResponse, -) -from core.app.task_pipeline.exc import WorkflowRunNotFoundError -from core.file import FILE_MODEL_IDENTITY, File -from core.model_runtime.utils.encoders import jsonable_encoder -from core.ops.entities.trace_entity import TraceTaskName -from core.ops.ops_trace_manager import TraceQueueManager, TraceTask -from core.tools.tool_manager import ToolManager -from core.workflow.entities.node_entities import NodeRunMetadataKey -from core.workflow.enums import SystemVariableKey -from core.workflow.nodes import NodeType -from core.workflow.nodes.tool.entities import ToolNodeData -from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository -from core.workflow.workflow_entry import WorkflowEntry -from models.account import Account -from models.enums import CreatedByRole, WorkflowRunTriggeredFrom -from models.model import EndUser -from models.workflow import ( - Workflow, - WorkflowNodeExecution, - WorkflowNodeExecutionStatus, - WorkflowNodeExecutionTriggeredFrom, - WorkflowRun, - WorkflowRunStatus, -) - - -class WorkflowCycleManage: - def __init__( - self, - *, - application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], - workflow_system_variables: dict[SystemVariableKey, Any], - workflow_node_execution_repository: WorkflowNodeExecutionRepository, - ) -> None: - self._workflow_run: WorkflowRun | None = None - self._workflow_node_executions: dict[str, WorkflowNodeExecution] = {} - self._application_generate_entity = application_generate_entity - self._workflow_system_variables = workflow_system_variables - self._workflow_node_execution_repository = workflow_node_execution_repository - - def _handle_workflow_run_start( - self, - *, - session: Session, - workflow_id: str, - user_id: str, - created_by_role: CreatedByRole, - ) -> WorkflowRun: - workflow_stmt = select(Workflow).where(Workflow.id == workflow_id) - workflow = session.scalar(workflow_stmt) - if not workflow: - raise ValueError(f"Workflow not found: {workflow_id}") - - max_sequence_stmt = select(func.max(WorkflowRun.sequence_number)).where( - WorkflowRun.tenant_id == workflow.tenant_id, - WorkflowRun.app_id == workflow.app_id, - ) - max_sequence = session.scalar(max_sequence_stmt) or 0 - new_sequence_number = max_sequence + 1 - - inputs = {**self._application_generate_entity.inputs} - for key, value in (self._workflow_system_variables or {}).items(): - if key.value == "conversation": - continue - inputs[f"sys.{key.value}"] = value - - triggered_from = ( - WorkflowRunTriggeredFrom.DEBUGGING - if self._application_generate_entity.invoke_from == InvokeFrom.DEBUGGER - else WorkflowRunTriggeredFrom.APP_RUN - ) - - # handle special values - inputs = dict(WorkflowEntry.handle_special_values(inputs) or {}) - - # init workflow run - # TODO: This workflow_run_id should always not be None, maybe we can use a more elegant way to handle this - workflow_run_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID) or uuid4()) - - workflow_run = WorkflowRun() - workflow_run.id = workflow_run_id - workflow_run.tenant_id = workflow.tenant_id - workflow_run.app_id = workflow.app_id - workflow_run.sequence_number = new_sequence_number - workflow_run.workflow_id = workflow.id - workflow_run.type = workflow.type - workflow_run.triggered_from = triggered_from.value - workflow_run.version = workflow.version - workflow_run.graph = workflow.graph - workflow_run.inputs = json.dumps(inputs) - workflow_run.status = WorkflowRunStatus.RUNNING - workflow_run.created_by_role = created_by_role - workflow_run.created_by = user_id - workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) - - session.add(workflow_run) - - return workflow_run - - def _handle_workflow_run_success( - self, - *, - session: Session, - workflow_run_id: str, - start_at: float, - total_tokens: int, - total_steps: int, - outputs: Mapping[str, Any] | None = None, - conversation_id: Optional[str] = None, - trace_manager: Optional[TraceQueueManager] = None, - ) -> WorkflowRun: - """ - Workflow run success - :param workflow_run_id: workflow run id - :param start_at: start time - :param total_tokens: total tokens - :param total_steps: total steps - :param outputs: outputs - :param conversation_id: conversation id - :return: - """ - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) - - outputs = WorkflowEntry.handle_special_values(outputs) - - workflow_run.status = WorkflowRunStatus.SUCCEEDED - workflow_run.outputs = json.dumps(outputs or {}) - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - - if trace_manager: - trace_manager.add_trace_task( - TraceTask( - TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, - conversation_id=conversation_id, - user_id=trace_manager.user_id, - ) - ) - - return workflow_run - - def _handle_workflow_run_partial_success( - self, - *, - session: Session, - workflow_run_id: str, - start_at: float, - total_tokens: int, - total_steps: int, - outputs: Mapping[str, Any] | None = None, - exceptions_count: int = 0, - conversation_id: Optional[str] = None, - trace_manager: Optional[TraceQueueManager] = None, - ) -> WorkflowRun: - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) - outputs = WorkflowEntry.handle_special_values(dict(outputs) if outputs else None) - - workflow_run.status = WorkflowRunStatus.PARTIAL_SUCCEEDED.value - workflow_run.outputs = json.dumps(outputs or {}) - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - workflow_run.exceptions_count = exceptions_count - - if trace_manager: - trace_manager.add_trace_task( - TraceTask( - TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, - conversation_id=conversation_id, - user_id=trace_manager.user_id, - ) - ) - - return workflow_run - - def _handle_workflow_run_failed( - self, - *, - session: Session, - workflow_run_id: str, - start_at: float, - total_tokens: int, - total_steps: int, - status: WorkflowRunStatus, - error: str, - conversation_id: Optional[str] = None, - trace_manager: Optional[TraceQueueManager] = None, - exceptions_count: int = 0, - ) -> WorkflowRun: - """ - Workflow run failed - :param workflow_run_id: workflow run id - :param start_at: start time - :param total_tokens: total tokens - :param total_steps: total steps - :param status: status - :param error: error message - :return: - """ - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) - - workflow_run.status = status.value - workflow_run.error = error - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - workflow_run.exceptions_count = exceptions_count - - # Use the instance repository to find running executions for a workflow run - running_workflow_node_executions = self._workflow_node_execution_repository.get_running_executions( - workflow_run_id=workflow_run.id - ) - - # Update the cache with the retrieved executions - for execution in running_workflow_node_executions: - if execution.node_execution_id: - self._workflow_node_executions[execution.node_execution_id] = execution - - for workflow_node_execution in running_workflow_node_executions: - now = datetime.now(UTC).replace(tzinfo=None) - workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value - workflow_node_execution.error = error - workflow_node_execution.finished_at = now - workflow_node_execution.elapsed_time = (now - workflow_node_execution.created_at).total_seconds() - - if trace_manager: - trace_manager.add_trace_task( - TraceTask( - TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, - conversation_id=conversation_id, - user_id=trace_manager.user_id, - ) - ) - - return workflow_run - - def _handle_node_execution_start( - self, *, workflow_run: WorkflowRun, event: QueueNodeStartedEvent - ) -> WorkflowNodeExecution: - workflow_node_execution = WorkflowNodeExecution() - workflow_node_execution.id = str(uuid4()) - workflow_node_execution.tenant_id = workflow_run.tenant_id - workflow_node_execution.app_id = workflow_run.app_id - workflow_node_execution.workflow_id = workflow_run.workflow_id - workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value - workflow_node_execution.workflow_run_id = workflow_run.id - workflow_node_execution.predecessor_node_id = event.predecessor_node_id - workflow_node_execution.index = event.node_run_index - workflow_node_execution.node_execution_id = event.node_execution_id - workflow_node_execution.node_id = event.node_id - workflow_node_execution.node_type = event.node_type.value - workflow_node_execution.title = event.node_data.title - workflow_node_execution.status = WorkflowNodeExecutionStatus.RUNNING.value - workflow_node_execution.created_by_role = workflow_run.created_by_role - workflow_node_execution.created_by = workflow_run.created_by - workflow_node_execution.execution_metadata = json.dumps( - { - NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id, - NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id, - NodeRunMetadataKey.LOOP_ID: event.in_loop_id, - } - ) - workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None) - - # Use the instance repository to save the workflow node execution - self._workflow_node_execution_repository.save(workflow_node_execution) - - self._workflow_node_executions[event.node_execution_id] = workflow_node_execution - return workflow_node_execution - - def _handle_workflow_node_execution_success(self, *, event: QueueNodeSucceededEvent) -> WorkflowNodeExecution: - workflow_node_execution = self._get_workflow_node_execution(node_execution_id=event.node_execution_id) - inputs = WorkflowEntry.handle_special_values(event.inputs) - process_data = WorkflowEntry.handle_special_values(event.process_data) - outputs = WorkflowEntry.handle_special_values(event.outputs) - execution_metadata_dict = dict(event.execution_metadata or {}) - execution_metadata = json.dumps(jsonable_encoder(execution_metadata_dict)) if execution_metadata_dict else None - finished_at = datetime.now(UTC).replace(tzinfo=None) - elapsed_time = (finished_at - event.start_at).total_seconds() - - process_data = WorkflowEntry.handle_special_values(event.process_data) - - workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value - workflow_node_execution.inputs = json.dumps(inputs) if inputs else None - workflow_node_execution.process_data = json.dumps(process_data) if process_data else None - workflow_node_execution.outputs = json.dumps(outputs) if outputs else None - workflow_node_execution.execution_metadata = execution_metadata - workflow_node_execution.finished_at = finished_at - workflow_node_execution.elapsed_time = elapsed_time - - # Use the instance repository to update the workflow node execution - self._workflow_node_execution_repository.update(workflow_node_execution) - return workflow_node_execution - - def _handle_workflow_node_execution_failed( - self, - *, - event: QueueNodeFailedEvent - | QueueNodeInIterationFailedEvent - | QueueNodeInLoopFailedEvent - | QueueNodeExceptionEvent, - ) -> WorkflowNodeExecution: - """ - Workflow node execution failed - :param event: queue node failed event - :return: - """ - workflow_node_execution = self._get_workflow_node_execution(node_execution_id=event.node_execution_id) - - inputs = WorkflowEntry.handle_special_values(event.inputs) - process_data = WorkflowEntry.handle_special_values(event.process_data) - outputs = WorkflowEntry.handle_special_values(event.outputs) - finished_at = datetime.now(UTC).replace(tzinfo=None) - elapsed_time = (finished_at - event.start_at).total_seconds() - execution_metadata = ( - json.dumps(jsonable_encoder(event.execution_metadata)) if event.execution_metadata else None - ) - process_data = WorkflowEntry.handle_special_values(event.process_data) - workflow_node_execution.status = ( - WorkflowNodeExecutionStatus.FAILED.value - if not isinstance(event, QueueNodeExceptionEvent) - else WorkflowNodeExecutionStatus.EXCEPTION.value - ) - workflow_node_execution.error = event.error - workflow_node_execution.inputs = json.dumps(inputs) if inputs else None - workflow_node_execution.process_data = json.dumps(process_data) if process_data else None - workflow_node_execution.outputs = json.dumps(outputs) if outputs else None - workflow_node_execution.finished_at = finished_at - workflow_node_execution.elapsed_time = elapsed_time - workflow_node_execution.execution_metadata = execution_metadata - - self._workflow_node_execution_repository.update(workflow_node_execution) - - return workflow_node_execution - - def _handle_workflow_node_execution_retried( - self, *, workflow_run: WorkflowRun, event: QueueNodeRetryEvent - ) -> WorkflowNodeExecution: - """ - Workflow node execution failed - :param workflow_run: workflow run - :param event: queue node failed event - :return: - """ - created_at = event.start_at - finished_at = datetime.now(UTC).replace(tzinfo=None) - elapsed_time = (finished_at - created_at).total_seconds() - inputs = WorkflowEntry.handle_special_values(event.inputs) - outputs = WorkflowEntry.handle_special_values(event.outputs) - origin_metadata = { - NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id, - NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id, - NodeRunMetadataKey.LOOP_ID: event.in_loop_id, - } - merged_metadata = ( - {**jsonable_encoder(event.execution_metadata), **origin_metadata} - if event.execution_metadata is not None - else origin_metadata - ) - execution_metadata = json.dumps(merged_metadata) - - workflow_node_execution = WorkflowNodeExecution() - workflow_node_execution.id = str(uuid4()) - workflow_node_execution.tenant_id = workflow_run.tenant_id - workflow_node_execution.app_id = workflow_run.app_id - workflow_node_execution.workflow_id = workflow_run.workflow_id - workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value - workflow_node_execution.workflow_run_id = workflow_run.id - workflow_node_execution.predecessor_node_id = event.predecessor_node_id - workflow_node_execution.node_execution_id = event.node_execution_id - workflow_node_execution.node_id = event.node_id - workflow_node_execution.node_type = event.node_type.value - workflow_node_execution.title = event.node_data.title - workflow_node_execution.status = WorkflowNodeExecutionStatus.RETRY.value - workflow_node_execution.created_by_role = workflow_run.created_by_role - workflow_node_execution.created_by = workflow_run.created_by - workflow_node_execution.created_at = created_at - workflow_node_execution.finished_at = finished_at - workflow_node_execution.elapsed_time = elapsed_time - workflow_node_execution.error = event.error - workflow_node_execution.inputs = json.dumps(inputs) if inputs else None - workflow_node_execution.outputs = json.dumps(outputs) if outputs else None - workflow_node_execution.execution_metadata = execution_metadata - workflow_node_execution.index = event.node_run_index - - # Use the instance repository to save the workflow node execution - self._workflow_node_execution_repository.save(workflow_node_execution) - - self._workflow_node_executions[event.node_execution_id] = workflow_node_execution - return workflow_node_execution - - def _workflow_start_to_stream_response( - self, - *, - session: Session, - task_id: str, - workflow_run: WorkflowRun, - ) -> WorkflowStartStreamResponse: - _ = session - return WorkflowStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=WorkflowStartStreamResponse.Data( - id=workflow_run.id, - workflow_id=workflow_run.workflow_id, - sequence_number=workflow_run.sequence_number, - inputs=dict(workflow_run.inputs_dict or {}), - created_at=int(workflow_run.created_at.timestamp()), - ), - ) - - def _workflow_finish_to_stream_response( - self, - *, - session: Session, - task_id: str, - workflow_run: WorkflowRun, - ) -> WorkflowFinishStreamResponse: - created_by = None - if workflow_run.created_by_role == CreatedByRole.ACCOUNT: - stmt = select(Account).where(Account.id == workflow_run.created_by) - account = session.scalar(stmt) - if account: - created_by = { - "id": account.id, - "name": account.name, - "email": account.email, - } - elif workflow_run.created_by_role == CreatedByRole.END_USER: - stmt = select(EndUser).where(EndUser.id == workflow_run.created_by) - end_user = session.scalar(stmt) - if end_user: - created_by = { - "id": end_user.id, - "user": end_user.session_id, - } - else: - raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") - - return WorkflowFinishStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=WorkflowFinishStreamResponse.Data( - id=workflow_run.id, - workflow_id=workflow_run.workflow_id, - sequence_number=workflow_run.sequence_number, - status=workflow_run.status, - outputs=dict(workflow_run.outputs_dict) if workflow_run.outputs_dict else None, - error=workflow_run.error, - elapsed_time=workflow_run.elapsed_time, - total_tokens=workflow_run.total_tokens, - total_steps=workflow_run.total_steps, - created_by=created_by, - created_at=int(workflow_run.created_at.timestamp()), - finished_at=int(workflow_run.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(dict(workflow_run.outputs_dict)), - exceptions_count=workflow_run.exceptions_count, - ), - ) - - def _workflow_node_start_to_stream_response( - self, - *, - event: QueueNodeStartedEvent, - task_id: str, - workflow_node_execution: WorkflowNodeExecution, - ) -> Optional[NodeStartStreamResponse]: - if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}: - return None - if not workflow_node_execution.workflow_run_id: - return None - - response = NodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeStartStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - title=workflow_node_execution.title, - index=workflow_node_execution.index, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs_dict, - created_at=int(workflow_node_execution.created_at.timestamp()), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - parallel_run_id=event.parallel_mode_run_id, - agent_strategy=event.agent_strategy, - ), - ) - - # extras logic - if event.node_type == NodeType.TOOL: - node_data = cast(ToolNodeData, event.node_data) - response.data.extras["icon"] = ToolManager.get_tool_icon( - tenant_id=self._application_generate_entity.app_config.tenant_id, - provider_type=node_data.provider_type, - provider_id=node_data.provider_id, - ) - - return response - - def _workflow_node_finish_to_stream_response( - self, - *, - event: QueueNodeSucceededEvent - | QueueNodeFailedEvent - | QueueNodeInIterationFailedEvent - | QueueNodeInLoopFailedEvent - | QueueNodeExceptionEvent, - task_id: str, - workflow_node_execution: WorkflowNodeExecution, - ) -> Optional[NodeFinishStreamResponse]: - if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}: - return None - if not workflow_node_execution.workflow_run_id: - return None - if not workflow_node_execution.finished_at: - return None - - return NodeFinishStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeFinishStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - index=workflow_node_execution.index, - title=workflow_node_execution.title, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs_dict, - process_data=workflow_node_execution.process_data_dict, - outputs=workflow_node_execution.outputs_dict, - status=workflow_node_execution.status, - error=workflow_node_execution.error, - elapsed_time=workflow_node_execution.elapsed_time, - execution_metadata=workflow_node_execution.execution_metadata_dict, - created_at=int(workflow_node_execution.created_at.timestamp()), - finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs_dict or {}), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - ), - ) - - def _workflow_node_retry_to_stream_response( - self, - *, - event: QueueNodeRetryEvent, - task_id: str, - workflow_node_execution: WorkflowNodeExecution, - ) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]: - if workflow_node_execution.node_type in {NodeType.ITERATION.value, NodeType.LOOP.value}: - return None - if not workflow_node_execution.workflow_run_id: - return None - if not workflow_node_execution.finished_at: - return None - - return NodeRetryStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeRetryStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - index=workflow_node_execution.index, - title=workflow_node_execution.title, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs_dict, - process_data=workflow_node_execution.process_data_dict, - outputs=workflow_node_execution.outputs_dict, - status=workflow_node_execution.status, - error=workflow_node_execution.error, - elapsed_time=workflow_node_execution.elapsed_time, - execution_metadata=workflow_node_execution.execution_metadata_dict, - created_at=int(workflow_node_execution.created_at.timestamp()), - finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs_dict or {}), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - retry_index=event.retry_index, - ), - ) - - def _workflow_parallel_branch_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueParallelBranchRunStartedEvent - ) -> ParallelBranchStartStreamResponse: - _ = session - return ParallelBranchStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=ParallelBranchStartStreamResponse.Data( - parallel_id=event.parallel_id, - parallel_branch_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - created_at=int(time.time()), - ), - ) - - def _workflow_parallel_branch_finished_to_stream_response( - self, - *, - session: Session, - task_id: str, - workflow_run: WorkflowRun, - event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent, - ) -> ParallelBranchFinishedStreamResponse: - _ = session - return ParallelBranchFinishedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=ParallelBranchFinishedStreamResponse.Data( - parallel_id=event.parallel_id, - parallel_branch_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - status="succeeded" if isinstance(event, QueueParallelBranchRunSucceededEvent) else "failed", - error=event.error if isinstance(event, QueueParallelBranchRunFailedEvent) else None, - created_at=int(time.time()), - ), - ) - - def _workflow_iteration_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationStartEvent - ) -> IterationNodeStartStreamResponse: - _ = session - return IterationNodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=IterationNodeStartStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - metadata=event.metadata or {}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def _workflow_iteration_next_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent - ) -> IterationNodeNextStreamResponse: - _ = session - return IterationNodeNextStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=IterationNodeNextStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - index=event.index, - pre_iteration_output=event.output, - created_at=int(time.time()), - extras={}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parallel_mode_run_id=event.parallel_mode_run_id, - duration=event.duration, - ), - ) - - def _workflow_iteration_completed_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent - ) -> IterationNodeCompletedStreamResponse: - _ = session - return IterationNodeCompletedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=IterationNodeCompletedStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - outputs=event.outputs, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - status=WorkflowNodeExecutionStatus.SUCCEEDED - if event.error is None - else WorkflowNodeExecutionStatus.FAILED, - error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), - total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, - execution_metadata=event.metadata, - finished_at=int(time.time()), - steps=event.steps, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def _workflow_loop_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopStartEvent - ) -> LoopNodeStartStreamResponse: - _ = session - return LoopNodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=LoopNodeStartStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - metadata=event.metadata or {}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def _workflow_loop_next_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopNextEvent - ) -> LoopNodeNextStreamResponse: - _ = session - return LoopNodeNextStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=LoopNodeNextStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - index=event.index, - pre_loop_output=event.output, - created_at=int(time.time()), - extras={}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parallel_mode_run_id=event.parallel_mode_run_id, - duration=event.duration, - ), - ) - - def _workflow_loop_completed_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopCompletedEvent - ) -> LoopNodeCompletedStreamResponse: - _ = session - return LoopNodeCompletedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_run.id, - data=LoopNodeCompletedStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - outputs=event.outputs, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - status=WorkflowNodeExecutionStatus.SUCCEEDED - if event.error is None - else WorkflowNodeExecutionStatus.FAILED, - error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), - total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, - execution_metadata=event.metadata, - finished_at=int(time.time()), - steps=event.steps, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def _fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any]) -> Sequence[Mapping[str, Any]]: - """ - Fetch files from node outputs - :param outputs_dict: node outputs dict - :return: - """ - if not outputs_dict: - return [] - - files = [self._fetch_files_from_variable_value(output_value) for output_value in outputs_dict.values()] - # Remove None - files = [file for file in files if file] - # Flatten list - # Flatten the list of sequences into a single list of mappings - flattened_files = [file for sublist in files if sublist for file in sublist] - - # Convert to tuple to match Sequence type - return tuple(flattened_files) - - def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]: - """ - Fetch files from variable value - :param value: variable value - :return: - """ - if not value: - return [] - - files = [] - if isinstance(value, list): - for item in value: - file = self._get_file_var_from_value(item) - if file: - files.append(file) - elif isinstance(value, dict): - file = self._get_file_var_from_value(value) - if file: - files.append(file) - - return files - - def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None: - """ - Get file var from value - :param value: variable value - :return: - """ - if not value: - return None - - if isinstance(value, dict) and value.get("dify_model_identity") == FILE_MODEL_IDENTITY: - return value - elif isinstance(value, File): - return value.to_dict() - - return None - - def _get_workflow_run(self, *, session: Session, workflow_run_id: str) -> WorkflowRun: - if self._workflow_run and self._workflow_run.id == workflow_run_id: - cached_workflow_run = self._workflow_run - cached_workflow_run = session.merge(cached_workflow_run) - return cached_workflow_run - stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id) - workflow_run = session.scalar(stmt) - if not workflow_run: - raise WorkflowRunNotFoundError(workflow_run_id) - self._workflow_run = workflow_run - - return workflow_run - - def _get_workflow_node_execution(self, node_execution_id: str) -> WorkflowNodeExecution: - # First check the cache for performance - if node_execution_id in self._workflow_node_executions: - cached_execution = self._workflow_node_executions[node_execution_id] - # No need to merge with session since expire_on_commit=False - return cached_execution - - # If not in cache, use the instance repository to get by node_execution_id - execution = self._workflow_node_execution_repository.get_by_node_execution_id(node_execution_id) - - if not execution: - raise ValueError(f"Workflow node execution not found: {node_execution_id}") - - # Update cache - self._workflow_node_executions[node_execution_id] = execution - return execution - - def _handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: - """ - Handle agent log - :param task_id: task id - :param event: agent log event - :return: - """ - return AgentLogStreamResponse( - task_id=task_id, - data=AgentLogStreamResponse.Data( - node_execution_id=event.node_execution_id, - id=event.id, - parent_id=event.parent_id, - label=event.label, - error=event.error, - status=event.status, - data=event.data, - metadata=event.metadata, - node_id=event.node_id, - ), - ) diff --git a/api/core/base/__init__.py b/api/core/base/__init__.py new file mode 100644 index 0000000000..3f4bd3b771 --- /dev/null +++ b/api/core/base/__init__.py @@ -0,0 +1 @@ +# Core base package diff --git a/api/core/base/tts/__init__.py b/api/core/base/tts/__init__.py new file mode 100644 index 0000000000..37b6eeebb0 --- /dev/null +++ b/api/core/base/tts/__init__.py @@ -0,0 +1,6 @@ +from core.base.tts.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk + +__all__ = [ + "AppGeneratorTTSPublisher", + "AudioTrunk", +] diff --git a/api/core/app/apps/advanced_chat/app_generator_tts_publisher.py b/api/core/base/tts/app_generator_tts_publisher.py similarity index 100% rename from api/core/app/apps/advanced_chat/app_generator_tts_publisher.py rename to api/core/base/tts/app_generator_tts_publisher.py diff --git a/api/core/callback_handler/index_tool_callback_handler.py b/api/core/callback_handler/index_tool_callback_handler.py index 56859df7f4..13c22213c4 100644 --- a/api/core/callback_handler/index_tool_callback_handler.py +++ b/api/core/callback_handler/index_tool_callback_handler.py @@ -1,3 +1,5 @@ +import logging + from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom from core.app.entities.app_invoke_entities import InvokeFrom from core.app.entities.queue_entities import QueueRetrieverResourcesEvent @@ -7,6 +9,8 @@ from extensions.ext_database import db from models.dataset import ChildChunk, DatasetQuery, DocumentSegment from models.dataset import Document as DatasetDocument +_logger = logging.getLogger(__name__) + class DatasetIndexToolCallbackHandler: """Callback handler for dataset tool.""" @@ -42,18 +46,31 @@ class DatasetIndexToolCallbackHandler: """Handle tool end.""" for document in documents: if document.metadata is not None: - dataset_document = DatasetDocument.query.filter( - DatasetDocument.id == document.metadata["document_id"] - ).first() + document_id = document.metadata["document_id"] + dataset_document = db.session.query(DatasetDocument).filter(DatasetDocument.id == document_id).first() + if not dataset_document: + _logger.warning( + "Expected DatasetDocument record to exist, but none was found, document_id=%s", + document_id, + ) + continue if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunk = ChildChunk.query.filter( - ChildChunk.index_node_id == document.metadata["doc_id"], - ChildChunk.dataset_id == dataset_document.dataset_id, - ChildChunk.document_id == dataset_document.id, - ).first() + child_chunk = ( + db.session.query(ChildChunk) + .filter( + ChildChunk.index_node_id == document.metadata["doc_id"], + ChildChunk.dataset_id == dataset_document.dataset_id, + ChildChunk.document_id == dataset_document.id, + ) + .first() + ) if child_chunk: - segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update( - {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == child_chunk.segment_id) + .update( + {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False + ) ) else: query = db.session.query(DocumentSegment).filter( diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py index 86887c9b4a..66d8d0f414 100644 --- a/api/core/entities/provider_configuration.py +++ b/api/core/entities/provider_configuration.py @@ -754,7 +754,7 @@ class ProviderConfiguration(BaseModel): :param only_active: return active model only :return: """ - provider_models = self.get_provider_models(model_type, only_active) + provider_models = self.get_provider_models(model_type, only_active, model) for provider_model in provider_models: if provider_model.model == model: @@ -763,12 +763,13 @@ class ProviderConfiguration(BaseModel): return None def get_provider_models( - self, model_type: Optional[ModelType] = None, only_active: bool = False + self, model_type: Optional[ModelType] = None, only_active: bool = False, model: Optional[str] = None ) -> list[ModelWithProviderEntity]: """ Get provider models. :param model_type: model type :param only_active: only active models + :param model: model name :return: """ model_provider_factory = ModelProviderFactory(self.tenant_id) @@ -791,7 +792,10 @@ class ProviderConfiguration(BaseModel): ) else: provider_models = self._get_custom_provider_models( - model_types=model_types, provider_schema=provider_schema, model_setting_map=model_setting_map + model_types=model_types, + provider_schema=provider_schema, + model_setting_map=model_setting_map, + model=model, ) if only_active: @@ -897,37 +901,36 @@ class ProviderConfiguration(BaseModel): ) except Exception as ex: logger.warning(f"get custom model schema failed, {ex}") + continue - if not custom_model_schema: - continue + if not custom_model_schema: + continue - if custom_model_schema.model_type not in model_types: - continue + if custom_model_schema.model_type not in model_types: + continue - status = ModelStatus.ACTIVE - if ( - custom_model_schema.model_type in model_setting_map - and custom_model_schema.model in model_setting_map[custom_model_schema.model_type] - ): - model_setting = model_setting_map[custom_model_schema.model_type][ - custom_model_schema.model - ] - if model_setting.enabled is False: - status = ModelStatus.DISABLED + status = ModelStatus.ACTIVE + if ( + custom_model_schema.model_type in model_setting_map + and custom_model_schema.model in model_setting_map[custom_model_schema.model_type] + ): + model_setting = model_setting_map[custom_model_schema.model_type][custom_model_schema.model] + if model_setting.enabled is False: + status = ModelStatus.DISABLED - provider_models.append( - ModelWithProviderEntity( - model=custom_model_schema.model, - label=custom_model_schema.label, - model_type=custom_model_schema.model_type, - features=custom_model_schema.features, - fetch_from=FetchFrom.PREDEFINED_MODEL, - model_properties=custom_model_schema.model_properties, - deprecated=custom_model_schema.deprecated, - provider=SimpleModelProviderEntity(self.provider), - status=status, - ) + provider_models.append( + ModelWithProviderEntity( + model=custom_model_schema.model, + label=custom_model_schema.label, + model_type=custom_model_schema.model_type, + features=custom_model_schema.features, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties=custom_model_schema.model_properties, + deprecated=custom_model_schema.deprecated, + provider=SimpleModelProviderEntity(self.provider), + status=status, ) + ) # if llm name not in restricted llm list, remove it restrict_model_names = [rm.model for rm in restrict_models] @@ -944,6 +947,7 @@ class ProviderConfiguration(BaseModel): model_types: Sequence[ModelType], provider_schema: ProviderEntity, model_setting_map: dict[ModelType, dict[str, ModelSettings]], + model: Optional[str] = None, ) -> list[ModelWithProviderEntity]: """ Get custom provider models. @@ -996,7 +1000,8 @@ class ProviderConfiguration(BaseModel): for model_configuration in self.custom_configuration.models: if model_configuration.model_type not in model_types: continue - + if model and model != model_configuration.model: + continue try: custom_model_schema = self.get_model_schema( model_type=model_configuration.model_type, diff --git a/api/core/indexing_runner.py b/api/core/indexing_runner.py index 81bf59b2b6..848d897779 100644 --- a/api/core/indexing_runner.py +++ b/api/core/indexing_runner.py @@ -51,7 +51,7 @@ class IndexingRunner: for dataset_document in dataset_documents: try: # get dataset - dataset = Dataset.query.filter_by(id=dataset_document.dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_document.dataset_id).first() if not dataset: raise ValueError("no dataset found") @@ -103,15 +103,17 @@ class IndexingRunner: """Run the indexing process when the index_status is splitting.""" try: # get dataset - dataset = Dataset.query.filter_by(id=dataset_document.dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_document.dataset_id).first() if not dataset: raise ValueError("no dataset found") # get exist document_segment list and delete - document_segments = DocumentSegment.query.filter_by( - dataset_id=dataset.id, document_id=dataset_document.id - ).all() + document_segments = ( + db.session.query(DocumentSegment) + .filter_by(dataset_id=dataset.id, document_id=dataset_document.id) + .all() + ) for document_segment in document_segments: db.session.delete(document_segment) @@ -162,15 +164,17 @@ class IndexingRunner: """Run the indexing process when the index_status is indexing.""" try: # get dataset - dataset = Dataset.query.filter_by(id=dataset_document.dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_document.dataset_id).first() if not dataset: raise ValueError("no dataset found") # get exist document_segment list and delete - document_segments = DocumentSegment.query.filter_by( - dataset_id=dataset.id, document_id=dataset_document.id - ).all() + document_segments = ( + db.session.query(DocumentSegment) + .filter_by(dataset_id=dataset.id, document_id=dataset_document.id) + .all() + ) documents = [] if document_segments: @@ -254,7 +258,7 @@ class IndexingRunner: embedding_model_instance = None if dataset_id: - dataset = Dataset.query.filter_by(id=dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_id).first() if not dataset: raise ValueError("Dataset not found.") if dataset.indexing_technique == "high_quality" or indexing_technique == "high_quality": @@ -587,7 +591,7 @@ class IndexingRunner: @staticmethod def _process_keyword_index(flask_app, dataset_id, document_id, documents): with flask_app.app_context(): - dataset = Dataset.query.filter_by(id=dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_id).first() if not dataset: raise ValueError("no dataset found") keyword = Keyword(dataset) @@ -656,10 +660,10 @@ class IndexingRunner: """ Update the document indexing status. """ - count = DatasetDocument.query.filter_by(id=document_id, is_paused=True).count() + count = db.session.query(DatasetDocument).filter_by(id=document_id, is_paused=True).count() if count > 0: raise DocumentIsPausedError() - document = DatasetDocument.query.filter_by(id=document_id).first() + document = db.session.query(DatasetDocument).filter_by(id=document_id).first() if not document: raise DocumentIsDeletedPausedError() @@ -668,7 +672,7 @@ class IndexingRunner: if extra_update_params: update_params.update(extra_update_params) - DatasetDocument.query.filter_by(id=document_id).update(update_params) + db.session.query(DatasetDocument).filter_by(id=document_id).update(update_params) db.session.commit() @staticmethod @@ -676,7 +680,7 @@ class IndexingRunner: """ Update the document segment by document id. """ - DocumentSegment.query.filter_by(document_id=dataset_document_id).update(update_params) + db.session.query(DocumentSegment).filter_by(document_id=dataset_document_id).update(update_params) db.session.commit() def _transform( diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index e5dbc30689..e01896a491 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -51,15 +51,19 @@ class LLMGenerator: response = cast( LLMResult, model_instance.invoke_llm( - prompt_messages=list(prompts), model_parameters={"max_tokens": 100, "temperature": 1}, stream=False + prompt_messages=list(prompts), model_parameters={"max_tokens": 500, "temperature": 1}, stream=False ), ) answer = cast(str, response.message.content) cleaned_answer = re.sub(r"^.*(\{.*\}).*$", r"\1", answer, flags=re.DOTALL) if cleaned_answer is None: return "" - result_dict = json.loads(cleaned_answer) - answer = result_dict["Your Output"] + try: + result_dict = json.loads(cleaned_answer) + answer = result_dict["Your Output"] + except json.JSONDecodeError as e: + logging.exception("Failed to generate name after answer, use query instead") + answer = query name = answer.strip() if len(name) > 75: diff --git a/api/core/ops/entities/config_entity.py b/api/core/ops/entities/config_entity.py index 874b2800b2..f2d1bd305a 100644 --- a/api/core/ops/entities/config_entity.py +++ b/api/core/ops/entities/config_entity.py @@ -1,9 +1,9 @@ -from enum import Enum +from enum import StrEnum from pydantic import BaseModel, ValidationInfo, field_validator -class TracingProviderEnum(Enum): +class TracingProviderEnum(StrEnum): LANGFUSE = "langfuse" LANGSMITH = "langsmith" OPIK = "opik" diff --git a/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py b/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py index f486da3a6d..46ba1c45b9 100644 --- a/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py +++ b/api/core/ops/langfuse_trace/entities/langfuse_trace_entity.py @@ -1,3 +1,4 @@ +from collections.abc import Mapping from datetime import datetime from enum import StrEnum from typing import Any, Optional, Union @@ -155,10 +156,10 @@ class LangfuseSpan(BaseModel): description="The status message of the span. Additional field for context of the event. E.g. the error " "message of an error event.", ) - input: Optional[Union[str, dict[str, Any], list, None]] = Field( + input: Optional[Union[str, Mapping[str, Any], list, None]] = Field( default=None, description="The input of the span. Can be any JSON object." ) - output: Optional[Union[str, dict[str, Any], list, None]] = Field( + output: Optional[Union[str, Mapping[str, Any], list, None]] = Field( default=None, description="The output of the span. Can be any JSON object." ) version: Optional[str] = Field( diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index b229d244f7..120c36f53d 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -1,11 +1,10 @@ -import json import logging import os from datetime import datetime, timedelta from typing import Optional from langfuse import Langfuse # type: ignore -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import Session, sessionmaker from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import LangfuseConfig @@ -29,9 +28,10 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( UnitEnum, ) from core.ops.utils import filter_none_values -from core.workflow.repository.repository_factory import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.nodes.enums import NodeType from extensions.ext_database import db -from models.model import EndUser +from models import Account, App, EndUser, WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) @@ -113,8 +113,29 @@ class LangFuseDataTrace(BaseTraceInstance): # through workflow_run_id get all_nodes_execution using repository session_factory = sessionmaker(bind=db.engine) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={"tenant_id": trace_info.tenant_id, "session_factory": session_factory}, + # Find the app's creator account + with Session(db.engine, expire_on_commit=False) as session: + # Get the app to find its creator + app_id = trace_info.metadata.get("app_id") + if not app_id: + raise ValueError("No app_id found in trace_info metadata") + + app = session.query(App).filter(App.id == app_id).first() + if not app: + raise ValueError(f"App with id {app_id} not found") + + if not app.created_by: + raise ValueError(f"App with id {app_id} has no creator (created_by is None)") + + service_account = session.query(Account).filter(Account.id == app.created_by).first() + if not service_account: + raise ValueError(f"Creator account with id {app.created_by} not found for app {app_id}") + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=service_account, + app_id=trace_info.metadata.get("app_id"), + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) # Get all executions for this workflow run @@ -124,23 +145,22 @@ class LangFuseDataTrace(BaseTraceInstance): for node_execution in workflow_node_executions: node_execution_id = node_execution.id - tenant_id = node_execution.tenant_id - app_id = node_execution.app_id + tenant_id = trace_info.tenant_id # Use from trace_info instead + app_id = trace_info.metadata.get("app_id") # Use from trace_info instead node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == "llm": - inputs = ( - json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {} - ) + if node_type == NodeType.LLM: + inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: - inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} - outputs = json.loads(node_execution.outputs) if node_execution.outputs else {} + inputs = node_execution.inputs if node_execution.inputs else {} + outputs = node_execution.outputs if node_execution.outputs else {} created_at = node_execution.created_at or datetime.now() elapsed_time = node_execution.elapsed_time finished_at = created_at + timedelta(seconds=elapsed_time) - metadata = json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {} + execution_metadata = node_execution.metadata if node_execution.metadata else {} + metadata = {str(k): v for k, v in execution_metadata.items()} metadata.update( { "workflow_run_id": trace_info.workflow_run_id, @@ -152,7 +172,7 @@ class LangFuseDataTrace(BaseTraceInstance): "status": status, } ) - process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + process_data = node_execution.process_data if node_execution.process_data else {} model_provider = process_data.get("model_provider", None) model_name = process_data.get("model_name", None) if model_provider is not None and model_name is not None: diff --git a/api/core/ops/langsmith_trace/entities/langsmith_trace_entity.py b/api/core/ops/langsmith_trace/entities/langsmith_trace_entity.py index 348b7ba501..4fd01136ba 100644 --- a/api/core/ops/langsmith_trace/entities/langsmith_trace_entity.py +++ b/api/core/ops/langsmith_trace/entities/langsmith_trace_entity.py @@ -1,3 +1,4 @@ +from collections.abc import Mapping from datetime import datetime from enum import StrEnum from typing import Any, Optional, Union @@ -30,8 +31,8 @@ class LangSmithMultiModel(BaseModel): class LangSmithRunModel(LangSmithTokenUsage, LangSmithMultiModel): name: Optional[str] = Field(..., description="Name of the run") - inputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Inputs of the run") - outputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Outputs of the run") + inputs: Optional[Union[str, Mapping[str, Any], list, None]] = Field(None, description="Inputs of the run") + outputs: Optional[Union[str, Mapping[str, Any], list, None]] = Field(None, description="Outputs of the run") run_type: LangSmithRunType = Field(..., description="Type of the run") start_time: Optional[datetime | str] = Field(None, description="Start time of the run") end_time: Optional[datetime | str] = Field(None, description="End time of the run") diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index 78a51ff36e..6631727c79 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -1,4 +1,3 @@ -import json import logging import os import uuid @@ -7,7 +6,7 @@ from typing import Optional, cast from langsmith import Client from langsmith.schemas import RunBase -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import Session, sessionmaker from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import LangSmithConfig @@ -28,9 +27,11 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( LangSmithRunUpdateModel, ) from core.ops.utils import filter_none_values, generate_dotted_order -from core.workflow.repository.repository_factory import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.nodes.enums import NodeType from extensions.ext_database import db -from models.model import EndUser, MessageFile +from models import Account, App, EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) @@ -137,12 +138,29 @@ class LangSmithDataTrace(BaseTraceInstance): # through workflow_run_id get all_nodes_execution using repository session_factory = sessionmaker(bind=db.engine) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": trace_info.tenant_id, - "app_id": trace_info.metadata.get("app_id"), - "session_factory": session_factory, - }, + # Find the app's creator account + with Session(db.engine, expire_on_commit=False) as session: + # Get the app to find its creator + app_id = trace_info.metadata.get("app_id") + if not app_id: + raise ValueError("No app_id found in trace_info metadata") + + app = session.query(App).filter(App.id == app_id).first() + if not app: + raise ValueError(f"App with id {app_id} not found") + + if not app.created_by: + raise ValueError(f"App with id {app_id} has no creator (created_by is None)") + + service_account = session.query(Account).filter(Account.id == app.created_by).first() + if not service_account: + raise ValueError(f"Creator account with id {app.created_by} not found for app {app_id}") + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=service_account, + app_id=trace_info.metadata.get("app_id"), + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) # Get all executions for this workflow run @@ -152,27 +170,23 @@ class LangSmithDataTrace(BaseTraceInstance): for node_execution in workflow_node_executions: node_execution_id = node_execution.id - tenant_id = node_execution.tenant_id - app_id = node_execution.app_id + tenant_id = trace_info.tenant_id # Use from trace_info instead + app_id = trace_info.metadata.get("app_id") # Use from trace_info instead node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == "llm": - inputs = ( - json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {} - ) + if node_type == NodeType.LLM: + inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: - inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} - outputs = json.loads(node_execution.outputs) if node_execution.outputs else {} + inputs = node_execution.inputs if node_execution.inputs else {} + outputs = node_execution.outputs if node_execution.outputs else {} created_at = node_execution.created_at or datetime.now() elapsed_time = node_execution.elapsed_time finished_at = created_at + timedelta(seconds=elapsed_time) - execution_metadata = ( - json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {} - ) - node_total_tokens = execution_metadata.get("total_tokens", 0) - metadata = execution_metadata.copy() + execution_metadata = node_execution.metadata if node_execution.metadata else {} + node_total_tokens = execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS) or 0 + metadata = {str(key): value for key, value in execution_metadata.items()} metadata.update( { "workflow_run_id": trace_info.workflow_run_id, @@ -185,7 +199,7 @@ class LangSmithDataTrace(BaseTraceInstance): } ) - process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + process_data = node_execution.process_data if node_execution.process_data else {} if process_data and process_data.get("model_mode") == "chat": run_type = LangSmithRunType.llm @@ -195,7 +209,7 @@ class LangSmithDataTrace(BaseTraceInstance): "ls_model_name": process_data.get("model_name", ""), } ) - elif node_type == "knowledge-retrieval": + elif node_type == NodeType.KNOWLEDGE_RETRIEVAL: run_type = LangSmithRunType.retriever else: run_type = LangSmithRunType.tool diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index a14b5afb8e..6c159a4831 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -1,4 +1,3 @@ -import json import logging import os import uuid @@ -7,7 +6,7 @@ from typing import Optional, cast from opik import Opik, Trace from opik.id_helpers import uuid4_to_uuid7 -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import Session, sessionmaker from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import OpikConfig @@ -22,9 +21,11 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) -from core.workflow.repository.repository_factory import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.nodes.enums import NodeType from extensions.ext_database import db -from models.model import EndUser, MessageFile +from models import Account, App, EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) @@ -114,6 +115,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": workflow_metadata, "input": wrap_dict("input", trace_info.workflow_run_inputs), "output": wrap_dict("output", trace_info.workflow_run_outputs), + "thread_id": trace_info.conversation_id, "tags": ["message", "workflow"], "project_name": self.project, } @@ -143,6 +145,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": workflow_metadata, "input": wrap_dict("input", trace_info.workflow_run_inputs), "output": wrap_dict("output", trace_info.workflow_run_outputs), + "thread_id": trace_info.conversation_id, "tags": ["workflow"], "project_name": self.project, } @@ -150,12 +153,29 @@ class OpikDataTrace(BaseTraceInstance): # through workflow_run_id get all_nodes_execution using repository session_factory = sessionmaker(bind=db.engine) - workflow_node_execution_repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": trace_info.tenant_id, - "app_id": trace_info.metadata.get("app_id"), - "session_factory": session_factory, - }, + # Find the app's creator account + with Session(db.engine, expire_on_commit=False) as session: + # Get the app to find its creator + app_id = trace_info.metadata.get("app_id") + if not app_id: + raise ValueError("No app_id found in trace_info metadata") + + app = session.query(App).filter(App.id == app_id).first() + if not app: + raise ValueError(f"App with id {app_id} not found") + + if not app.created_by: + raise ValueError(f"App with id {app_id} has no creator (created_by is None)") + + service_account = session.query(Account).filter(Account.id == app.created_by).first() + if not service_account: + raise ValueError(f"Creator account with id {app.created_by} not found for app {app_id}") + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=service_account, + app_id=trace_info.metadata.get("app_id"), + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) # Get all executions for this workflow run @@ -165,26 +185,22 @@ class OpikDataTrace(BaseTraceInstance): for node_execution in workflow_node_executions: node_execution_id = node_execution.id - tenant_id = node_execution.tenant_id - app_id = node_execution.app_id + tenant_id = trace_info.tenant_id # Use from trace_info instead + app_id = trace_info.metadata.get("app_id") # Use from trace_info instead node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == "llm": - inputs = ( - json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {} - ) + if node_type == NodeType.LLM: + inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: - inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} - outputs = json.loads(node_execution.outputs) if node_execution.outputs else {} + inputs = node_execution.inputs if node_execution.inputs else {} + outputs = node_execution.outputs if node_execution.outputs else {} created_at = node_execution.created_at or datetime.now() elapsed_time = node_execution.elapsed_time finished_at = created_at + timedelta(seconds=elapsed_time) - execution_metadata = ( - json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {} - ) - metadata = execution_metadata.copy() + execution_metadata = node_execution.metadata if node_execution.metadata else {} + metadata = {str(k): v for k, v in execution_metadata.items()} metadata.update( { "workflow_run_id": trace_info.workflow_run_id, @@ -197,7 +213,7 @@ class OpikDataTrace(BaseTraceInstance): } ) - process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + process_data = node_execution.process_data if node_execution.process_data else {} provider = None model = None @@ -230,7 +246,7 @@ class OpikDataTrace(BaseTraceInstance): parent_span_id = trace_info.workflow_app_log_id or trace_info.workflow_run_id if not total_tokens: - total_tokens = execution_metadata.get("total_tokens", 0) + total_tokens = execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS) or 0 span_data = { "trace_id": opik_trace_id, @@ -292,6 +308,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": wrap_metadata(metadata), "input": trace_info.inputs, "output": message_data.answer, + "thread_id": message_data.conversation_id, "tags": ["message", str(trace_info.conversation_mode)], "project_name": self.project, } @@ -406,6 +423,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": wrap_metadata(trace_info.metadata), "input": trace_info.inputs, "output": trace_info.outputs, + "thread_id": trace_info.conversation_id, "tags": ["generate_name"], "project_name": self.project, } diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 2c68055f87..32301e11e7 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -16,11 +16,7 @@ from sqlalchemy.orm import Session from core.helper.encrypter import decrypt_token, encrypt_token, obfuscated_token from core.ops.entities.config_entity import ( OPS_FILE_PATH, - LangfuseConfig, - LangSmithConfig, - OpikConfig, TracingProviderEnum, - WeaveConfig, ) from core.ops.entities.trace_entity import ( DatasetRetrievalTraceInfo, @@ -33,11 +29,8 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) -from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace -from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace -from core.ops.opik_trace.opik_trace import OpikDataTrace from core.ops.utils import get_message_data -from core.ops.weave_trace.weave_trace import WeaveDataTrace +from core.workflow.entities.workflow_execution_entities import WorkflowExecution from extensions.ext_database import db from extensions.ext_storage import storage from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig @@ -45,36 +38,58 @@ from models.workflow import WorkflowAppLog, WorkflowRun from tasks.ops_trace_task import process_trace_tasks -def build_opik_trace_instance(config: OpikConfig): - return OpikDataTrace(config) +class OpsTraceProviderConfigMap(dict[str, dict[str, Any]]): + def __getitem__(self, provider: str) -> dict[str, Any]: + match provider: + case TracingProviderEnum.LANGFUSE: + from core.ops.entities.config_entity import LangfuseConfig + from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace + + return { + "config_class": LangfuseConfig, + "secret_keys": ["public_key", "secret_key"], + "other_keys": ["host", "project_key"], + "trace_instance": LangFuseDataTrace, + } + + case TracingProviderEnum.LANGSMITH: + from core.ops.entities.config_entity import LangSmithConfig + from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace + + return { + "config_class": LangSmithConfig, + "secret_keys": ["api_key"], + "other_keys": ["project", "endpoint"], + "trace_instance": LangSmithDataTrace, + } + + case TracingProviderEnum.OPIK: + from core.ops.entities.config_entity import OpikConfig + from core.ops.opik_trace.opik_trace import OpikDataTrace + + return { + "config_class": OpikConfig, + "secret_keys": ["api_key"], + "other_keys": ["project", "url", "workspace"], + "trace_instance": OpikDataTrace, + } + + case TracingProviderEnum.WEAVE: + from core.ops.entities.config_entity import WeaveConfig + from core.ops.weave_trace.weave_trace import WeaveDataTrace + + return { + "config_class": WeaveConfig, + "secret_keys": ["api_key"], + "other_keys": ["project", "entity", "endpoint"], + "trace_instance": WeaveDataTrace, + } + + case _: + raise KeyError(f"Unsupported tracing provider: {provider}") -provider_config_map: dict[str, dict[str, Any]] = { - TracingProviderEnum.LANGFUSE.value: { - "config_class": LangfuseConfig, - "secret_keys": ["public_key", "secret_key"], - "other_keys": ["host", "project_key"], - "trace_instance": LangFuseDataTrace, - }, - TracingProviderEnum.LANGSMITH.value: { - "config_class": LangSmithConfig, - "secret_keys": ["api_key"], - "other_keys": ["project", "endpoint"], - "trace_instance": LangSmithDataTrace, - }, - TracingProviderEnum.OPIK.value: { - "config_class": OpikConfig, - "secret_keys": ["api_key"], - "other_keys": ["project", "url", "workspace"], - "trace_instance": lambda config: build_opik_trace_instance(config), - }, - TracingProviderEnum.WEAVE.value: { - "config_class": WeaveConfig, - "secret_keys": ["api_key"], - "other_keys": ["project", "entity", "endpoint"], - "trace_instance": WeaveDataTrace, - }, -} +provider_config_map: dict[str, dict[str, Any]] = OpsTraceProviderConfigMap() class OpsTraceManager: @@ -220,7 +235,11 @@ class OpsTraceManager: return None tracing_provider = app_ops_trace_config.get("tracing_provider") - if tracing_provider is None or tracing_provider not in provider_config_map: + if tracing_provider is None: + return None + try: + provider_config_map[tracing_provider] + except KeyError: return None # decrypt_token @@ -273,8 +292,14 @@ class OpsTraceManager: :return: """ # auth check - if tracing_provider not in provider_config_map and tracing_provider is not None: - raise ValueError(f"Invalid tracing provider: {tracing_provider}") + if enabled == True: + try: + provider_config_map[tracing_provider] + except KeyError: + raise ValueError(f"Invalid tracing provider: {tracing_provider}") + else: + if tracing_provider is not None: + raise ValueError(f"Invalid tracing provider: {tracing_provider}") app_config: Optional[App] = db.session.query(App).filter(App.id == app_id).first() if not app_config: @@ -353,7 +378,7 @@ class TraceTask: self, trace_type: Any, message_id: Optional[str] = None, - workflow_run: Optional[WorkflowRun] = None, + workflow_execution: Optional[WorkflowExecution] = None, conversation_id: Optional[str] = None, user_id: Optional[str] = None, timer: Optional[Any] = None, @@ -361,7 +386,7 @@ class TraceTask: ): self.trace_type = trace_type self.message_id = message_id - self.workflow_run_id = workflow_run.id if workflow_run else None + self.workflow_run_id = workflow_execution.id if workflow_execution else None self.conversation_id = conversation_id self.user_id = user_id self.timer = timer diff --git a/api/core/ops/weave_trace/entities/weave_trace_entity.py b/api/core/ops/weave_trace/entities/weave_trace_entity.py index e423f5ccbb..7f489f37ac 100644 --- a/api/core/ops/weave_trace/entities/weave_trace_entity.py +++ b/api/core/ops/weave_trace/entities/weave_trace_entity.py @@ -1,3 +1,4 @@ +from collections.abc import Mapping from typing import Any, Optional, Union from pydantic import BaseModel, Field, field_validator @@ -19,8 +20,8 @@ class WeaveMultiModel(BaseModel): class WeaveTraceModel(WeaveTokenUsage, WeaveMultiModel): id: str = Field(..., description="ID of the trace") op: str = Field(..., description="Name of the operation") - inputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Inputs of the trace") - outputs: Optional[Union[str, dict[str, Any], list, None]] = Field(None, description="Outputs of the trace") + inputs: Optional[Union[str, Mapping[str, Any], list, None]] = Field(None, description="Inputs of the trace") + outputs: Optional[Union[str, Mapping[str, Any], list, None]] = Field(None, description="Outputs of the trace") attributes: Optional[Union[str, dict[str, Any], list, None]] = Field( None, description="Metadata and attributes associated with trace" ) diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index 49594cb0f1..a4f38dfbba 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -1,4 +1,3 @@ -import json import logging import os import uuid @@ -7,6 +6,7 @@ from typing import Any, Optional, cast import wandb import weave +from sqlalchemy.orm import Session, sessionmaker from core.ops.base_trace_instance import BaseTraceInstance from core.ops.entities.config_entity import WeaveConfig @@ -22,9 +22,11 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.nodes.enums import NodeType from extensions.ext_database import db -from models.model import EndUser, MessageFile -from models.workflow import WorkflowNodeExecution +from models import Account, App, EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom logger = logging.getLogger(__name__) @@ -128,58 +130,57 @@ class WeaveDataTrace(BaseTraceInstance): self.start_call(workflow_run, parent_run_id=trace_info.message_id) - # through workflow_run_id get all_nodes_execution - workflow_nodes_execution_id_records = ( - db.session.query(WorkflowNodeExecution.id) - .filter(WorkflowNodeExecution.workflow_run_id == trace_info.workflow_run_id) - .all() + # through workflow_run_id get all_nodes_execution using repository + session_factory = sessionmaker(bind=db.engine) + # Find the app's creator account + with Session(db.engine, expire_on_commit=False) as session: + # Get the app to find its creator + app_id = trace_info.metadata.get("app_id") + if not app_id: + raise ValueError("No app_id found in trace_info metadata") + + app = session.query(App).filter(App.id == app_id).first() + if not app: + raise ValueError(f"App with id {app_id} not found") + + if not app.created_by: + raise ValueError(f"App with id {app_id} has no creator (created_by is None)") + + service_account = session.query(Account).filter(Account.id == app.created_by).first() + if not service_account: + raise ValueError(f"Creator account with id {app.created_by} not found for app {app_id}") + + workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=session_factory, + user=service_account, + app_id=trace_info.metadata.get("app_id"), + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) - for node_execution_id_record in workflow_nodes_execution_id_records: - node_execution = ( - db.session.query( - WorkflowNodeExecution.id, - WorkflowNodeExecution.tenant_id, - WorkflowNodeExecution.app_id, - WorkflowNodeExecution.title, - WorkflowNodeExecution.node_type, - WorkflowNodeExecution.status, - WorkflowNodeExecution.inputs, - WorkflowNodeExecution.outputs, - WorkflowNodeExecution.created_at, - WorkflowNodeExecution.elapsed_time, - WorkflowNodeExecution.process_data, - WorkflowNodeExecution.execution_metadata, - ) - .filter(WorkflowNodeExecution.id == node_execution_id_record.id) - .first() - ) - - if not node_execution: - continue + # Get all executions for this workflow run + workflow_node_executions = workflow_node_execution_repository.get_by_workflow_run( + workflow_run_id=trace_info.workflow_run_id + ) + for node_execution in workflow_node_executions: node_execution_id = node_execution.id - tenant_id = node_execution.tenant_id - app_id = node_execution.app_id + tenant_id = trace_info.tenant_id # Use from trace_info instead + app_id = trace_info.metadata.get("app_id") # Use from trace_info instead node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == "llm": - inputs = ( - json.loads(node_execution.process_data).get("prompts", {}) if node_execution.process_data else {} - ) + if node_type == NodeType.LLM: + inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: - inputs = json.loads(node_execution.inputs) if node_execution.inputs else {} - outputs = json.loads(node_execution.outputs) if node_execution.outputs else {} + inputs = node_execution.inputs if node_execution.inputs else {} + outputs = node_execution.outputs if node_execution.outputs else {} created_at = node_execution.created_at or datetime.now() elapsed_time = node_execution.elapsed_time finished_at = created_at + timedelta(seconds=elapsed_time) - execution_metadata = ( - json.loads(node_execution.execution_metadata) if node_execution.execution_metadata else {} - ) - node_total_tokens = execution_metadata.get("total_tokens", 0) - attributes = execution_metadata.copy() + execution_metadata = node_execution.metadata if node_execution.metadata else {} + node_total_tokens = execution_metadata.get(NodeRunMetadataKey.TOTAL_TOKENS) or 0 + attributes = {str(k): v for k, v in execution_metadata.items()} attributes.update( { "workflow_run_id": trace_info.workflow_run_id, @@ -192,7 +193,7 @@ class WeaveDataTrace(BaseTraceInstance): } ) - process_data = json.loads(node_execution.process_data) if node_execution.process_data else {} + process_data = node_execution.process_data if node_execution.process_data else {} if process_data and process_data.get("model_mode") == "chat": attributes.update( { diff --git a/api/core/plugin/backwards_invocation/node.py b/api/core/plugin/backwards_invocation/node.py index db07e52f3f..7898795ce2 100644 --- a/api/core/plugin/backwards_invocation/node.py +++ b/api/core/plugin/backwards_invocation/node.py @@ -64,9 +64,9 @@ class PluginNodeBackwardsInvocation(BaseBackwardsInvocation): ) return { - "inputs": execution.inputs_dict, - "outputs": execution.outputs_dict, - "process_data": execution.process_data_dict, + "inputs": execution.inputs, + "outputs": execution.outputs, + "process_data": execution.process_data, } @classmethod @@ -113,7 +113,7 @@ class PluginNodeBackwardsInvocation(BaseBackwardsInvocation): ) return { - "inputs": execution.inputs_dict, - "outputs": execution.outputs_dict, - "process_data": execution.process_data_dict, + "inputs": execution.inputs, + "outputs": execution.outputs, + "process_data": execution.process_data, } diff --git a/api/core/plugin/endpoint/exc.py b/api/core/plugin/endpoint/exc.py new file mode 100644 index 0000000000..aa29f1e9a1 --- /dev/null +++ b/api/core/plugin/endpoint/exc.py @@ -0,0 +1,6 @@ +class EndpointSetupFailedError(ValueError): + """ + Endpoint setup failed error + """ + + pass diff --git a/api/core/plugin/entities/endpoint.py b/api/core/plugin/entities/endpoint.py index 6c6c8bf9bc..d7ba75bb4f 100644 --- a/api/core/plugin/entities/endpoint.py +++ b/api/core/plugin/entities/endpoint.py @@ -24,7 +24,7 @@ class EndpointProviderDeclaration(BaseModel): """ settings: list[ProviderConfig] = Field(default_factory=list) - endpoints: Optional[list[EndpointDeclaration]] = Field(default_factory=list) + endpoints: Optional[list[EndpointDeclaration]] = Field(default_factory=list[EndpointDeclaration]) class EndpointEntity(BasePluginEntity): diff --git a/api/core/plugin/entities/plugin.py b/api/core/plugin/entities/plugin.py index 07ed94380a..bdf7d5ce1f 100644 --- a/api/core/plugin/entities/plugin.py +++ b/api/core/plugin/entities/plugin.py @@ -52,7 +52,7 @@ class PluginResourceRequirements(BaseModel): model: Optional[Model] = Field(default=None) node: Optional[Node] = Field(default=None) endpoint: Optional[Endpoint] = Field(default=None) - storage: Storage = Field(default=None) + storage: Optional[Storage] = Field(default=None) permission: Optional[Permission] = Field(default=None) @@ -66,9 +66,9 @@ class PluginCategory(enum.StrEnum): class PluginDeclaration(BaseModel): class Plugins(BaseModel): - tools: Optional[list[str]] = Field(default_factory=list) - models: Optional[list[str]] = Field(default_factory=list) - endpoints: Optional[list[str]] = Field(default_factory=list) + tools: Optional[list[str]] = Field(default_factory=list[str]) + models: Optional[list[str]] = Field(default_factory=list[str]) + endpoints: Optional[list[str]] = Field(default_factory=list[str]) class Meta(BaseModel): minimum_dify_version: Optional[str] = Field(default=None, pattern=r"^\d{1,4}(\.\d{1,4}){1,3}(-\w{1,16})?$") @@ -84,6 +84,7 @@ class PluginDeclaration(BaseModel): resource: PluginResourceRequirements plugins: Plugins tags: list[str] = Field(default_factory=list) + repo: Optional[str] = Field(default=None) verified: bool = Field(default=False) tool: Optional[ToolProviderEntity] = None model: Optional[ProviderEntity] = None diff --git a/api/core/plugin/entities/plugin_daemon.py b/api/core/plugin/entities/plugin_daemon.py index 2bea07bea0..e9275c31cc 100644 --- a/api/core/plugin/entities/plugin_daemon.py +++ b/api/core/plugin/entities/plugin_daemon.py @@ -9,7 +9,7 @@ from core.agent.plugin_entities import AgentProviderEntityWithPlugin from core.model_runtime.entities.model_entities import AIModelEntity from core.model_runtime.entities.provider_entities import ProviderEntity from core.plugin.entities.base import BasePluginEntity -from core.plugin.entities.plugin import PluginDeclaration +from core.plugin.entities.plugin import PluginDeclaration, PluginEntity from core.tools.entities.common_entities import I18nObject from core.tools.entities.tool_entities import ToolProviderEntityWithPlugin @@ -167,3 +167,8 @@ class PluginOAuthAuthorizationUrlResponse(BaseModel): class PluginOAuthCredentialsResponse(BaseModel): credentials: Mapping[str, Any] = Field(description="The credentials of the OAuth.") + + +class PluginListResponse(BaseModel): + list: list[PluginEntity] + total: int diff --git a/api/core/plugin/entities/request.py b/api/core/plugin/entities/request.py index ffd7011a41..8d18f5c20d 100644 --- a/api/core/plugin/entities/request.py +++ b/api/core/plugin/entities/request.py @@ -55,8 +55,8 @@ class RequestInvokeLLM(BaseRequestInvokeModel): mode: str completion_params: dict[str, Any] = Field(default_factory=dict) prompt_messages: list[PromptMessage] = Field(default_factory=list) - tools: Optional[list[PromptMessageTool]] = Field(default_factory=list) - stop: Optional[list[str]] = Field(default_factory=list) + tools: Optional[list[PromptMessageTool]] = Field(default_factory=list[PromptMessageTool]) + stop: Optional[list[str]] = Field(default_factory=list[str]) stream: Optional[bool] = False model_config = ConfigDict(protected_namespaces=()) diff --git a/api/core/plugin/impl/base.py b/api/core/plugin/impl/base.py index 4f1d808a3e..591e7b0525 100644 --- a/api/core/plugin/impl/base.py +++ b/api/core/plugin/impl/base.py @@ -17,6 +17,7 @@ from core.model_runtime.errors.invoke import ( InvokeServerUnavailableError, ) from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.plugin.endpoint.exc import EndpointSetupFailedError from core.plugin.entities.plugin_daemon import PluginDaemonBasicResponse, PluginDaemonError, PluginDaemonInnerError from core.plugin.impl.exc import ( PluginDaemonBadRequestError, @@ -219,6 +220,8 @@ class BasePluginClient: raise InvokeServerUnavailableError(description=args.get("description")) case CredentialsValidateFailedError.__name__: raise CredentialsValidateFailedError(error_object.get("message")) + case EndpointSetupFailedError.__name__: + raise EndpointSetupFailedError(error_object.get("message")) case _: raise PluginInvokeError(description=message) case PluginDaemonInternalServerError.__name__: diff --git a/api/core/plugin/impl/plugin.py b/api/core/plugin/impl/plugin.py index 3349463ce5..1cd2dc1be7 100644 --- a/api/core/plugin/impl/plugin.py +++ b/api/core/plugin/impl/plugin.py @@ -9,7 +9,12 @@ from core.plugin.entities.plugin import ( PluginInstallation, PluginInstallationSource, ) -from core.plugin.entities.plugin_daemon import PluginInstallTask, PluginInstallTaskStartResponse, PluginUploadResponse +from core.plugin.entities.plugin_daemon import ( + PluginInstallTask, + PluginInstallTaskStartResponse, + PluginListResponse, + PluginUploadResponse, +) from core.plugin.impl.base import BasePluginClient @@ -27,11 +32,20 @@ class PluginInstaller(BasePluginClient): ) def list_plugins(self, tenant_id: str) -> list[PluginEntity]: + result = self._request_with_plugin_daemon_response( + "GET", + f"plugin/{tenant_id}/management/list", + PluginListResponse, + params={"page": 1, "page_size": 256}, + ) + return result.list + + def list_plugins_with_total(self, tenant_id: str, page: int, page_size: int) -> PluginListResponse: return self._request_with_plugin_daemon_response( "GET", f"plugin/{tenant_id}/management/list", - list[PluginEntity], - params={"page": 1, "page_size": 256}, + PluginListResponse, + params={"page": page, "page_size": page_size}, ) def upload_pkg( diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index 46a5330bdb..2c5178241c 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -10,6 +10,7 @@ from core.rag.data_post_processor.data_post_processor import DataPostProcessor from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.vdb.vector_factory import Vector from core.rag.embedding.retrieval import RetrievalSegments +from core.rag.entities.metadata_entities import MetadataCondition from core.rag.index_processor.constant.index_type import IndexType from core.rag.models.document import Document from core.rag.rerank.rerank_type import RerankMode @@ -119,12 +120,25 @@ class RetrievalService: return all_documents @classmethod - def external_retrieve(cls, dataset_id: str, query: str, external_retrieval_model: Optional[dict] = None): + def external_retrieve( + cls, + dataset_id: str, + query: str, + external_retrieval_model: Optional[dict] = None, + metadata_filtering_conditions: Optional[dict] = None, + ): dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if not dataset: return [] + metadata_condition = ( + MetadataCondition(**metadata_filtering_conditions) if metadata_filtering_conditions else None + ) all_documents = ExternalDatasetService.fetch_external_knowledge_retrieval( - dataset.tenant_id, dataset_id, query, external_retrieval_model or {} + dataset.tenant_id, + dataset_id, + query, + external_retrieval_model or {}, + metadata_condition=metadata_condition, ) return all_documents @@ -391,7 +405,29 @@ class RetrievalService: record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore record["score"] = segment_child_map[record["segment"].id]["max_score"] - return [RetrievalSegments(**record) for record in records] + result = [] + for record in records: + # Extract segment + segment = record["segment"] + + # Extract child_chunks, ensuring it's a list or None + child_chunks = record.get("child_chunks") + if not isinstance(child_chunks, list): + child_chunks = None + + # Extract score, ensuring it's a float or None + score_value = record.get("score") + score = ( + float(score_value) + if score_value is not None and isinstance(score_value, int | float | str) + else None + ) + + # Create RetrievalSegments object + retrieval_segment = RetrievalSegments(segment=segment, child_chunks=child_chunks, score=score) + result.append(retrieval_segment) + + return result except Exception as e: db.session.rollback() raise e diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py index e23b8d197f..6991598ce6 100644 --- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py +++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py @@ -23,7 +23,8 @@ logger = logging.getLogger(__name__) class OpenSearchConfig(BaseModel): host: str port: int - secure: bool = False + secure: bool = False # use_ssl + verify_certs: bool = True auth_method: Literal["basic", "aws_managed_iam"] = "basic" user: Optional[str] = None password: Optional[str] = None @@ -42,6 +43,8 @@ class OpenSearchConfig(BaseModel): raise ValueError("config OPENSEARCH_AWS_REGION is required for AWS_MANAGED_IAM auth method") if not values.get("aws_service"): raise ValueError("config OPENSEARCH_AWS_SERVICE is required for AWS_MANAGED_IAM auth method") + if not values.get("OPENSEARCH_SECURE") and values.get("OPENSEARCH_VERIFY_CERTS"): + raise ValueError("verify_certs=True requires secure (HTTPS) connection") return values def create_aws_managed_iam_auth(self) -> Urllib3AWSV4SignerAuth: @@ -57,7 +60,7 @@ class OpenSearchConfig(BaseModel): params = { "hosts": [{"host": self.host, "port": self.port}], "use_ssl": self.secure, - "verify_certs": self.secure, + "verify_certs": self.verify_certs, "connection_class": Urllib3HttpConnection, "pool_maxsize": 20, } @@ -279,6 +282,7 @@ class OpenSearchVectorFactory(AbstractVectorFactory): host=dify_config.OPENSEARCH_HOST or "localhost", port=dify_config.OPENSEARCH_PORT, secure=dify_config.OPENSEARCH_SECURE, + verify_certs=dify_config.OPENSEARCH_VERIFY_CERTS, auth_method=dify_config.OPENSEARCH_AUTH_METHOD.value, user=dify_config.OPENSEARCH_USER, password=dify_config.OPENSEARCH_PASSWORD, diff --git a/api/core/rag/datasource/vdb/pgvector/pgvector.py b/api/core/rag/datasource/vdb/pgvector/pgvector.py index 366a21c381..04e9cf801e 100644 --- a/api/core/rag/datasource/vdb/pgvector/pgvector.py +++ b/api/core/rag/datasource/vdb/pgvector/pgvector.py @@ -1,3 +1,4 @@ +import hashlib import json import logging import uuid @@ -61,12 +62,12 @@ CREATE TABLE IF NOT EXISTS {table_name} ( """ SQL_CREATE_INDEX = """ -CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx ON {table_name} +CREATE INDEX IF NOT EXISTS embedding_cosine_v1_idx_{index_hash} ON {table_name} USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); """ SQL_CREATE_INDEX_PG_BIGM = """ -CREATE INDEX IF NOT EXISTS bigm_idx ON {table_name} +CREATE INDEX IF NOT EXISTS bigm_idx_{index_hash} ON {table_name} USING gin (text gin_bigm_ops); """ @@ -76,6 +77,7 @@ class PGVector(BaseVector): super().__init__(collection_name) self.pool = self._create_connection_pool(config) self.table_name = f"embedding_{collection_name}" + self.index_hash = hashlib.md5(self.table_name.encode()).hexdigest()[:8] self.pg_bigm = config.pg_bigm def get_type(self) -> str: @@ -256,10 +258,9 @@ class PGVector(BaseVector): # PG hnsw index only support 2000 dimension or less # ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing if dimension <= 2000: - cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name, index_hash=self.index_hash)) if self.pg_bigm: - cur.execute("CREATE EXTENSION IF NOT EXISTS pg_bigm") - cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name)) + cur.execute(SQL_CREATE_INDEX_PG_BIGM.format(table_name=self.table_name, index_hash=self.index_hash)) redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py index 1e040f415e..8ce194c683 100644 --- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py +++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py @@ -46,6 +46,7 @@ class QdrantConfig(BaseModel): root_path: Optional[str] = None grpc_port: int = 6334 prefer_grpc: bool = False + replication_factor: int = 1 def to_qdrant_params(self): if self.endpoint and self.endpoint.startswith("path:"): @@ -119,11 +120,13 @@ class QdrantVector(BaseVector): max_indexing_threads=0, on_disk=False, ) + self._client.create_collection( collection_name=collection_name, vectors_config=vectors_config, hnsw_config=hnsw_config, timeout=int(self._client_config.timeout), + replication_factor=self._client_config.replication_factor, ) # create group_id payload index @@ -466,5 +469,6 @@ class QdrantVectorFactory(AbstractVectorFactory): timeout=dify_config.QDRANT_CLIENT_TIMEOUT, grpc_port=dify_config.QDRANT_GRPC_PORT, prefer_grpc=dify_config.QDRANT_GRPC_ENABLED, + replication_factor=dify_config.QDRANT_REPLICATION_FACTOR, ), ) diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index e266659075..d2bf3eb92a 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -271,12 +271,15 @@ class TencentVector(BaseVector): for result in res[0]: meta = result.get(self.field_metadata) + if isinstance(meta, str): + # Compatible with version 1.1.3 and below. + meta = json.loads(meta) + score = 1 - result.get("score", 0.0) score = result.get("score", 0.0) if score > score_threshold: meta["score"] = score doc = Document(page_content=result.get(self.field_text), metadata=meta) docs.append(doc) - return docs def delete(self) -> None: diff --git a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py index 6a61fe9496..6f895b12af 100644 --- a/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py +++ b/api/core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py @@ -49,6 +49,7 @@ class TidbOnQdrantConfig(BaseModel): root_path: Optional[str] = None grpc_port: int = 6334 prefer_grpc: bool = False + replication_factor: int = 1 def to_qdrant_params(self): if self.endpoint and self.endpoint.startswith("path:"): @@ -134,6 +135,7 @@ class TidbOnQdrantVector(BaseVector): vectors_config=vectors_config, hnsw_config=hnsw_config, timeout=int(self._client_config.timeout), + replication_factor=self._client_config.replication_factor, ) # create group_id payload index @@ -484,6 +486,7 @@ class TidbOnQdrantVectorFactory(AbstractVectorFactory): timeout=dify_config.TIDB_ON_QDRANT_CLIENT_TIMEOUT, grpc_port=dify_config.TIDB_ON_QDRANT_GRPC_PORT, prefer_grpc=dify_config.TIDB_ON_QDRANT_GRPC_ENABLED, + replication_factor=dify_config.QDRANT_REPLICATION_FACTOR, ), ) diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index 7ab248199a..4e14800d0a 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -317,7 +317,7 @@ class NotionExtractor(BaseExtractor): data_source_info["last_edited_time"] = last_edited_time update_params = {DocumentModel.data_source_info: json.dumps(data_source_info)} - DocumentModel.query.filter_by(id=document_model.id).update(update_params) + db.session.query(DocumentModel).filter_by(id=document_model.id).update(update_params) db.session.commit() def get_notion_last_edited_time(self) -> str: @@ -347,14 +347,18 @@ class NotionExtractor(BaseExtractor): @classmethod def _get_access_token(cls, tenant_id: str, notion_workspace_id: str) -> str: - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{notion_workspace_id}"', + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.disabled == False, + DataSourceOauthBinding.source_info["workspace_id"] == f'"{notion_workspace_id}"', + ) ) - ).first() + .first() + ) if not data_source_binding: raise Exception( diff --git a/api/core/rag/extractor/watercrawl/client.py b/api/core/rag/extractor/watercrawl/client.py index 6eaede7dbc..6d596e07d8 100644 --- a/api/core/rag/extractor/watercrawl/client.py +++ b/api/core/rag/extractor/watercrawl/client.py @@ -6,6 +6,12 @@ from urllib.parse import urljoin import requests from requests import Response +from core.rag.extractor.watercrawl.exceptions import ( + WaterCrawlAuthenticationError, + WaterCrawlBadRequestError, + WaterCrawlPermissionError, +) + class BaseAPIClient: def __init__(self, api_key, base_url): @@ -53,6 +59,15 @@ class WaterCrawlAPIClient(BaseAPIClient): yield data def process_response(self, response: Response) -> dict | bytes | list | None | Generator: + if response.status_code == 401: + raise WaterCrawlAuthenticationError(response) + + if response.status_code == 403: + raise WaterCrawlPermissionError(response) + + if 400 <= response.status_code < 500: + raise WaterCrawlBadRequestError(response) + response.raise_for_status() if response.status_code == 204: return None diff --git a/api/core/rag/extractor/watercrawl/exceptions.py b/api/core/rag/extractor/watercrawl/exceptions.py new file mode 100644 index 0000000000..e407a594e0 --- /dev/null +++ b/api/core/rag/extractor/watercrawl/exceptions.py @@ -0,0 +1,32 @@ +import json + + +class WaterCrawlError(Exception): + pass + + +class WaterCrawlBadRequestError(WaterCrawlError): + def __init__(self, response): + self.status_code = response.status_code + self.response = response + data = response.json() + self.message = data.get("message", "Unknown error occurred") + self.errors = data.get("errors", {}) + super().__init__(self.message) + + @property + def flat_errors(self): + return json.dumps(self.errors) + + def __str__(self): + return f"WaterCrawlBadRequestError: {self.message} \n {self.flat_errors}" + + +class WaterCrawlPermissionError(WaterCrawlBadRequestError): + def __str__(self): + return f"You are exceeding your WaterCrawl API limits. {self.message}" + + +class WaterCrawlAuthenticationError(WaterCrawlBadRequestError): + def __str__(self): + return "WaterCrawl API key is invalid or expired. Please check your API key and try again." diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index edaa8c92fa..bff0acc48f 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -19,7 +19,7 @@ from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document from extensions.ext_database import db from extensions.ext_storage import storage -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import UploadFile logger = logging.getLogger(__name__) @@ -76,8 +76,7 @@ class WordExtractor(BaseExtractor): parsed = urlparse(url) return bool(parsed.netloc) and bool(parsed.scheme) - def _extract_images_from_docx(self, doc, image_folder): - os.makedirs(image_folder, exist_ok=True) + def _extract_images_from_docx(self, doc): image_count = 0 image_map = {} @@ -117,7 +116,7 @@ class WordExtractor(BaseExtractor): extension=str(image_ext), mime_type=mime_type or "", created_by=self.user_id, - created_by_role=CreatedByRole.ACCOUNT, + created_by_role=CreatorUserRole.ACCOUNT, created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), used=True, used_by=self.user_id, @@ -210,7 +209,7 @@ class WordExtractor(BaseExtractor): content = [] - image_map = self._extract_images_from_docx(doc, image_folder) + image_map = self._extract_images_from_docx(doc) hyperlinks_url = None url_pattern = re.compile(r"http://[^\s+]+//|https://[^\s+]+") @@ -225,7 +224,7 @@ class WordExtractor(BaseExtractor): xml = ElementTree.XML(run.element.xml) x_child = [c for c in xml.iter() if c is not None] for x in x_child: - if x_child is None: + if x is None: continue if x.tag.endswith("instrText"): if x.text is None: diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index b1565f10f2..c4adf6de4d 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -149,7 +149,7 @@ class DatasetRetrieval: else: inputs = {} available_datasets_ids = [dataset.id for dataset in available_datasets] - metadata_filter_document_ids, metadata_condition = self._get_metadata_filter_condition( + metadata_filter_document_ids, metadata_condition = self.get_metadata_filter_condition( available_datasets_ids, query, tenant_id, @@ -190,7 +190,7 @@ class DatasetRetrieval: retrieve_config.rerank_mode or "reranking_model", retrieve_config.reranking_model, retrieve_config.weights, - retrieve_config.reranking_enabled or True, + True if retrieve_config.reranking_enabled is None else retrieve_config.reranking_enabled, message_id, metadata_filter_document_ids, metadata_condition, @@ -237,12 +237,16 @@ class DatasetRetrieval: if show_retrieve_source: for record in records: segment = record.segment - dataset = Dataset.query.filter_by(id=segment.dataset_id).first() - document = DatasetDocument.query.filter( - DatasetDocument.id == segment.document_id, - DatasetDocument.enabled == True, - DatasetDocument.archived == False, - ).first() + dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first() + document = ( + db.session.query(DatasetDocument) + .filter( + DatasetDocument.id == segment.document_id, + DatasetDocument.enabled == True, + DatasetDocument.archived == False, + ) + .first() + ) if dataset and document: source = { "dataset_id": dataset.id, @@ -506,19 +510,30 @@ class DatasetRetrieval: dify_documents = [document for document in documents if document.provider == "dify"] for document in dify_documents: if document.metadata is not None: - dataset_document = DatasetDocument.query.filter( - DatasetDocument.id == document.metadata["document_id"] - ).first() + dataset_document = ( + db.session.query(DatasetDocument) + .filter(DatasetDocument.id == document.metadata["document_id"]) + .first() + ) if dataset_document: if dataset_document.doc_form == IndexType.PARENT_CHILD_INDEX: - child_chunk = ChildChunk.query.filter( - ChildChunk.index_node_id == document.metadata["doc_id"], - ChildChunk.dataset_id == dataset_document.dataset_id, - ChildChunk.document_id == dataset_document.id, - ).first() + child_chunk = ( + db.session.query(ChildChunk) + .filter( + ChildChunk.index_node_id == document.metadata["doc_id"], + ChildChunk.dataset_id == dataset_document.dataset_id, + ChildChunk.document_id == dataset_document.id, + ) + .first() + ) if child_chunk: - segment = DocumentSegment.query.filter(DocumentSegment.id == child_chunk.segment_id).update( - {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == child_chunk.segment_id) + .update( + {DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, + synchronize_session=False, + ) ) db.session.commit() else: @@ -649,6 +664,8 @@ class DatasetRetrieval: return_resource: bool, invoke_from: InvokeFrom, hit_callback: DatasetIndexToolCallbackHandler, + user_id: str, + inputs: dict, ) -> Optional[list[DatasetRetrieverBaseTool]]: """ A dataset tool is a tool that can be used to retrieve information from a dataset @@ -706,6 +723,9 @@ class DatasetRetrieval: hit_callbacks=[hit_callback], return_resource=return_resource, retriever_from=invoke_from.to_source(), + retrieve_config=retrieve_config, + user_id=user_id, + inputs=inputs, ) tools.append(tool) @@ -826,7 +846,7 @@ class DatasetRetrieval: ) return filter_documents[:top_k] if top_k else filter_documents - def _get_metadata_filter_condition( + def get_metadata_filter_condition( self, dataset_ids: list, query: str, @@ -876,20 +896,31 @@ class DatasetRetrieval: ) elif metadata_filtering_mode == "manual": if metadata_filtering_conditions: - metadata_condition = MetadataCondition(**metadata_filtering_conditions.model_dump()) + conditions = [] for sequence, condition in enumerate(metadata_filtering_conditions.conditions): # type: ignore metadata_name = condition.name expected_value = condition.value - if expected_value is not None or condition.comparison_operator in ("empty", "not empty"): + if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): if isinstance(expected_value, str): expected_value = self._replace_metadata_filter_value(expected_value, inputs) - filters = self._process_metadata_filter_func( - sequence, - condition.comparison_operator, - metadata_name, - expected_value, - filters, + conditions.append( + Condition( + name=metadata_name, + comparison_operator=condition.comparison_operator, + value=expected_value, ) + ) + filters = self._process_metadata_filter_func( + sequence, + condition.comparison_operator, + metadata_name, + expected_value, + filters, + ) + metadata_condition = MetadataCondition( + logical_operator=metadata_filtering_conditions.logical_operator, + conditions=conditions, + ) else: raise ValueError("Invalid metadata filtering mode") if filters: diff --git a/api/core/repositories/__init__.py b/api/core/repositories/__init__.py index 5c70d50cde..6452317120 100644 --- a/api/core/repositories/__init__.py +++ b/api/core/repositories/__init__.py @@ -4,3 +4,9 @@ Repository implementations for data access. This package contains concrete implementations of the repository interfaces defined in the core.workflow.repository package. """ + +from core.repositories.sqlalchemy_workflow_node_execution_repository import SQLAlchemyWorkflowNodeExecutionRepository + +__all__ = [ + "SQLAlchemyWorkflowNodeExecutionRepository", +] diff --git a/api/core/repositories/repository_registry.py b/api/core/repositories/repository_registry.py deleted file mode 100644 index b66f3ba8e6..0000000000 --- a/api/core/repositories/repository_registry.py +++ /dev/null @@ -1,87 +0,0 @@ -""" -Registry for repository implementations. - -This module is responsible for registering factory functions with the repository factory. -""" - -import logging -from collections.abc import Mapping -from typing import Any - -from sqlalchemy.orm import sessionmaker - -from configs import dify_config -from core.repositories.workflow_node_execution import SQLAlchemyWorkflowNodeExecutionRepository -from core.workflow.repository.repository_factory import RepositoryFactory -from extensions.ext_database import db - -logger = logging.getLogger(__name__) - -# Storage type constants -STORAGE_TYPE_RDBMS = "rdbms" -STORAGE_TYPE_HYBRID = "hybrid" - - -def register_repositories() -> None: - """ - Register repository factory functions with the RepositoryFactory. - - This function reads configuration settings to determine which repository - implementations to register. - """ - # Configure WorkflowNodeExecutionRepository factory based on configuration - workflow_node_execution_storage = dify_config.WORKFLOW_NODE_EXECUTION_STORAGE - - # Check storage type and register appropriate implementation - if workflow_node_execution_storage == STORAGE_TYPE_RDBMS: - # Register SQLAlchemy implementation for RDBMS storage - logger.info("Registering WorkflowNodeExecution repository with RDBMS storage") - RepositoryFactory.register_workflow_node_execution_factory(create_workflow_node_execution_repository) - elif workflow_node_execution_storage == STORAGE_TYPE_HYBRID: - # Hybrid storage is not yet implemented - raise NotImplementedError("Hybrid storage for WorkflowNodeExecution repository is not yet implemented") - else: - # Unknown storage type - raise ValueError( - f"Unknown storage type '{workflow_node_execution_storage}' for WorkflowNodeExecution repository. " - f"Supported types: {STORAGE_TYPE_RDBMS}" - ) - - -def create_workflow_node_execution_repository(params: Mapping[str, Any]) -> SQLAlchemyWorkflowNodeExecutionRepository: - """ - Create a WorkflowNodeExecutionRepository instance using SQLAlchemy implementation. - - This factory function creates a repository for the RDBMS storage type. - - Args: - params: Parameters for creating the repository, including: - - tenant_id: Required. The tenant ID for multi-tenancy. - - app_id: Optional. The application ID for filtering. - - session_factory: Optional. A SQLAlchemy sessionmaker instance. If not provided, - a new sessionmaker will be created using the global database engine. - - Returns: - A WorkflowNodeExecutionRepository instance - - Raises: - ValueError: If required parameters are missing - """ - # Extract required parameters - tenant_id = params.get("tenant_id") - if tenant_id is None: - raise ValueError("tenant_id is required for WorkflowNodeExecution repository with RDBMS storage") - - # Extract optional parameters - app_id = params.get("app_id") - - # Use the session_factory from params if provided, otherwise create one using the global db engine - session_factory = params.get("session_factory") - if session_factory is None: - # Create a sessionmaker using the same engine as the global db session - session_factory = sessionmaker(bind=db.engine) - - # Create and return the repository - return SQLAlchemyWorkflowNodeExecutionRepository( - session_factory=session_factory, tenant_id=tenant_id, app_id=app_id - ) diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py new file mode 100644 index 0000000000..c1a71b45d0 --- /dev/null +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -0,0 +1,242 @@ +""" +SQLAlchemy implementation of the WorkflowExecutionRepository. +""" + +import json +import logging +from typing import Optional, Union + +from sqlalchemy import select +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from core.workflow.entities.workflow_execution_entities import ( + WorkflowExecution, + WorkflowExecutionStatus, + WorkflowType, +) +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository +from models import ( + Account, + CreatorUserRole, + EndUser, + WorkflowRun, +) +from models.enums import WorkflowRunTriggeredFrom + +logger = logging.getLogger(__name__) + + +class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): + """ + SQLAlchemy implementation of the WorkflowExecutionRepository interface. + + This implementation supports multi-tenancy by filtering operations based on tenant_id. + Each method creates its own session, handles the transaction, and commits changes + to the database. This prevents long-running connections in the workflow core. + + This implementation also includes an in-memory cache for workflow executions to improve + performance by reducing database queries. + """ + + def __init__( + self, + session_factory: sessionmaker | Engine, + user: Union[Account, EndUser], + app_id: Optional[str], + triggered_from: Optional[WorkflowRunTriggeredFrom], + ): + """ + Initialize the repository with a SQLAlchemy sessionmaker or engine and context information. + + Args: + session_factory: SQLAlchemy sessionmaker or engine for creating sessions + user: Account or EndUser object containing tenant_id, user ID, and role information + app_id: App ID for filtering by application (can be None) + triggered_from: Source of the execution trigger (DEBUGGING or APP_RUN) + """ + # If an engine is provided, create a sessionmaker from it + if isinstance(session_factory, Engine): + self._session_factory = sessionmaker(bind=session_factory, expire_on_commit=False) + elif isinstance(session_factory, sessionmaker): + self._session_factory = session_factory + else: + raise ValueError( + f"Invalid session_factory type {type(session_factory).__name__}; expected sessionmaker or Engine" + ) + + # Extract tenant_id from user + tenant_id: str | None = user.tenant_id if isinstance(user, EndUser) else user.current_tenant_id + if not tenant_id: + raise ValueError("User must have a tenant_id or current_tenant_id") + self._tenant_id = tenant_id + + # Store app context + self._app_id = app_id + + # Extract user context + self._triggered_from = triggered_from + self._creator_user_id = user.id + + # Determine user role based on user type + self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER + + # Initialize in-memory cache for workflow executions + # Key: execution_id, Value: WorkflowRun (DB model) + self._execution_cache: dict[str, WorkflowRun] = {} + + def _to_domain_model(self, db_model: WorkflowRun) -> WorkflowExecution: + """ + Convert a database model to a domain model. + + Args: + db_model: The database model to convert + + Returns: + The domain model + """ + # Parse JSON fields + inputs = db_model.inputs_dict + outputs = db_model.outputs_dict + graph = db_model.graph_dict + + # Convert status to domain enum + status = WorkflowExecutionStatus(db_model.status) + + return WorkflowExecution( + id=db_model.id, + workflow_id=db_model.workflow_id, + sequence_number=db_model.sequence_number, + type=WorkflowType(db_model.type), + workflow_version=db_model.version, + graph=graph, + inputs=inputs, + outputs=outputs, + status=status, + error_message=db_model.error or "", + total_tokens=db_model.total_tokens, + total_steps=db_model.total_steps, + exceptions_count=db_model.exceptions_count, + started_at=db_model.created_at, + finished_at=db_model.finished_at, + ) + + def _to_db_model(self, domain_model: WorkflowExecution) -> WorkflowRun: + """ + Convert a domain model to a database model. + + Args: + domain_model: The domain model to convert + + Returns: + The database model + """ + # Use values from constructor if provided + if not self._triggered_from: + raise ValueError("triggered_from is required in repository constructor") + if not self._creator_user_id: + raise ValueError("created_by is required in repository constructor") + if not self._creator_user_role: + raise ValueError("created_by_role is required in repository constructor") + + db_model = WorkflowRun() + db_model.id = domain_model.id + db_model.tenant_id = self._tenant_id + if self._app_id is not None: + db_model.app_id = self._app_id + db_model.workflow_id = domain_model.workflow_id + db_model.triggered_from = self._triggered_from + db_model.sequence_number = domain_model.sequence_number + db_model.type = domain_model.type + db_model.version = domain_model.workflow_version + db_model.graph = json.dumps(domain_model.graph) if domain_model.graph else None + db_model.inputs = json.dumps(domain_model.inputs) if domain_model.inputs else None + db_model.outputs = json.dumps(domain_model.outputs) if domain_model.outputs else None + db_model.status = domain_model.status + db_model.error = domain_model.error_message if domain_model.error_message else None + db_model.total_tokens = domain_model.total_tokens + db_model.total_steps = domain_model.total_steps + db_model.exceptions_count = domain_model.exceptions_count + db_model.created_by_role = self._creator_user_role + db_model.created_by = self._creator_user_id + db_model.created_at = domain_model.started_at + db_model.finished_at = domain_model.finished_at + + # Calculate elapsed time if finished_at is available + if domain_model.finished_at: + db_model.elapsed_time = (domain_model.finished_at - domain_model.started_at).total_seconds() + else: + db_model.elapsed_time = 0 + + return db_model + + def save(self, execution: WorkflowExecution) -> None: + """ + Save or update a WorkflowExecution domain entity to the database. + + This method serves as a domain-to-database adapter that: + 1. Converts the domain entity to its database representation + 2. Persists the database model using SQLAlchemy's merge operation + 3. Maintains proper multi-tenancy by including tenant context during conversion + 4. Updates the in-memory cache for faster subsequent lookups + + The method handles both creating new records and updating existing ones through + SQLAlchemy's merge operation. + + Args: + execution: The WorkflowExecution domain entity to persist + """ + # Convert domain model to database model using tenant context and other attributes + db_model = self._to_db_model(execution) + + # Create a new database session + with self._session_factory() as session: + # SQLAlchemy merge intelligently handles both insert and update operations + # based on the presence of the primary key + session.merge(db_model) + session.commit() + + # Update the in-memory cache for faster subsequent lookups + logger.debug(f"Updating cache for execution_id: {db_model.id}") + self._execution_cache[db_model.id] = db_model + + def get(self, execution_id: str) -> Optional[WorkflowExecution]: + """ + Retrieve a WorkflowExecution by its ID. + + First checks the in-memory cache, and if not found, queries the database. + If found in the database, adds it to the cache for future lookups. + + Args: + execution_id: The workflow execution ID + + Returns: + The WorkflowExecution instance if found, None otherwise + """ + # First check the cache + if execution_id in self._execution_cache: + logger.debug(f"Cache hit for execution_id: {execution_id}") + # Convert cached DB model to domain model + cached_db_model = self._execution_cache[execution_id] + return self._to_domain_model(cached_db_model) + + # If not in cache, query the database + logger.debug(f"Cache miss for execution_id: {execution_id}, querying database") + with self._session_factory() as session: + stmt = select(WorkflowRun).where( + WorkflowRun.id == execution_id, + WorkflowRun.tenant_id == self._tenant_id, + ) + + if self._app_id: + stmt = stmt.where(WorkflowRun.app_id == self._app_id) + + db_model = session.scalar(stmt) + if db_model: + # Add DB model to cache + self._execution_cache[execution_id] = db_model + + # Convert to domain model and return + return self._to_domain_model(db_model) + + return None diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py new file mode 100644 index 0000000000..8d916a19db --- /dev/null +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -0,0 +1,401 @@ +""" +SQLAlchemy implementation of the WorkflowNodeExecutionRepository. +""" + +import json +import logging +from collections.abc import Sequence +from typing import Optional, Union + +from sqlalchemy import UnaryExpression, asc, delete, desc, select +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from core.model_runtime.utils.encoders import jsonable_encoder +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.entities.node_execution_entities import ( + NodeExecution, + NodeExecutionStatus, +) +from core.workflow.nodes.enums import NodeType +from core.workflow.repository.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository +from models import ( + Account, + CreatorUserRole, + EndUser, + WorkflowNodeExecution, + WorkflowNodeExecutionStatus, + WorkflowNodeExecutionTriggeredFrom, +) + +logger = logging.getLogger(__name__) + + +class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository): + """ + SQLAlchemy implementation of the WorkflowNodeExecutionRepository interface. + + This implementation supports multi-tenancy by filtering operations based on tenant_id. + Each method creates its own session, handles the transaction, and commits changes + to the database. This prevents long-running connections in the workflow core. + + This implementation also includes an in-memory cache for node executions to improve + performance by reducing database queries. + """ + + def __init__( + self, + session_factory: sessionmaker | Engine, + user: Union[Account, EndUser], + app_id: Optional[str], + triggered_from: Optional[WorkflowNodeExecutionTriggeredFrom], + ): + """ + Initialize the repository with a SQLAlchemy sessionmaker or engine and context information. + + Args: + session_factory: SQLAlchemy sessionmaker or engine for creating sessions + user: Account or EndUser object containing tenant_id, user ID, and role information + app_id: App ID for filtering by application (can be None) + triggered_from: Source of the execution trigger (SINGLE_STEP or WORKFLOW_RUN) + """ + # If an engine is provided, create a sessionmaker from it + if isinstance(session_factory, Engine): + self._session_factory = sessionmaker(bind=session_factory, expire_on_commit=False) + elif isinstance(session_factory, sessionmaker): + self._session_factory = session_factory + else: + raise ValueError( + f"Invalid session_factory type {type(session_factory).__name__}; expected sessionmaker or Engine" + ) + + # Extract tenant_id from user + tenant_id: str | None = user.tenant_id if isinstance(user, EndUser) else user.current_tenant_id + if not tenant_id: + raise ValueError("User must have a tenant_id or current_tenant_id") + self._tenant_id = tenant_id + + # Store app context + self._app_id = app_id + + # Extract user context + self._triggered_from = triggered_from + self._creator_user_id = user.id + + # Determine user role based on user type + self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER + + # Initialize in-memory cache for node executions + # Key: node_execution_id, Value: WorkflowNodeExecution (DB model) + self._node_execution_cache: dict[str, WorkflowNodeExecution] = {} + + def _to_domain_model(self, db_model: WorkflowNodeExecution) -> NodeExecution: + """ + Convert a database model to a domain model. + + Args: + db_model: The database model to convert + + Returns: + The domain model + """ + # Parse JSON fields + inputs = db_model.inputs_dict + process_data = db_model.process_data_dict + outputs = db_model.outputs_dict + metadata = {NodeRunMetadataKey(k): v for k, v in db_model.execution_metadata_dict.items()} + + # Convert status to domain enum + status = NodeExecutionStatus(db_model.status) + + return NodeExecution( + id=db_model.id, + node_execution_id=db_model.node_execution_id, + workflow_id=db_model.workflow_id, + workflow_run_id=db_model.workflow_run_id, + index=db_model.index, + predecessor_node_id=db_model.predecessor_node_id, + node_id=db_model.node_id, + node_type=NodeType(db_model.node_type), + title=db_model.title, + inputs=inputs, + process_data=process_data, + outputs=outputs, + status=status, + error=db_model.error, + elapsed_time=db_model.elapsed_time, + metadata=metadata, + created_at=db_model.created_at, + finished_at=db_model.finished_at, + ) + + def to_db_model(self, domain_model: NodeExecution) -> WorkflowNodeExecution: + """ + Convert a domain model to a database model. + + Args: + domain_model: The domain model to convert + + Returns: + The database model + """ + # Use values from constructor if provided + if not self._triggered_from: + raise ValueError("triggered_from is required in repository constructor") + if not self._creator_user_id: + raise ValueError("created_by is required in repository constructor") + if not self._creator_user_role: + raise ValueError("created_by_role is required in repository constructor") + + db_model = WorkflowNodeExecution() + db_model.id = domain_model.id + db_model.tenant_id = self._tenant_id + if self._app_id is not None: + db_model.app_id = self._app_id + db_model.workflow_id = domain_model.workflow_id + db_model.triggered_from = self._triggered_from + db_model.workflow_run_id = domain_model.workflow_run_id + db_model.index = domain_model.index + db_model.predecessor_node_id = domain_model.predecessor_node_id + db_model.node_execution_id = domain_model.node_execution_id + db_model.node_id = domain_model.node_id + db_model.node_type = domain_model.node_type + db_model.title = domain_model.title + db_model.inputs = json.dumps(domain_model.inputs) if domain_model.inputs else None + db_model.process_data = json.dumps(domain_model.process_data) if domain_model.process_data else None + db_model.outputs = json.dumps(domain_model.outputs) if domain_model.outputs else None + db_model.status = domain_model.status + db_model.error = domain_model.error + db_model.elapsed_time = domain_model.elapsed_time + db_model.execution_metadata = ( + json.dumps(jsonable_encoder(domain_model.metadata)) if domain_model.metadata else None + ) + db_model.created_at = domain_model.created_at + db_model.created_by_role = self._creator_user_role + db_model.created_by = self._creator_user_id + db_model.finished_at = domain_model.finished_at + return db_model + + def save(self, execution: NodeExecution) -> None: + """ + Save or update a NodeExecution domain entity to the database. + + This method serves as a domain-to-database adapter that: + 1. Converts the domain entity to its database representation + 2. Persists the database model using SQLAlchemy's merge operation + 3. Maintains proper multi-tenancy by including tenant context during conversion + 4. Updates the in-memory cache for faster subsequent lookups + + The method handles both creating new records and updating existing ones through + SQLAlchemy's merge operation. + + Args: + execution: The NodeExecution domain entity to persist + """ + # Convert domain model to database model using tenant context and other attributes + db_model = self.to_db_model(execution) + + # Create a new database session + with self._session_factory() as session: + # SQLAlchemy merge intelligently handles both insert and update operations + # based on the presence of the primary key + session.merge(db_model) + session.commit() + + # Update the in-memory cache for faster subsequent lookups + # Only cache if we have a node_execution_id to use as the cache key + if db_model.node_execution_id: + logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}") + self._node_execution_cache[db_model.node_execution_id] = db_model + + def get_by_node_execution_id(self, node_execution_id: str) -> Optional[NodeExecution]: + """ + Retrieve a NodeExecution by its node_execution_id. + + First checks the in-memory cache, and if not found, queries the database. + If found in the database, adds it to the cache for future lookups. + + Args: + node_execution_id: The node execution ID + + Returns: + The NodeExecution instance if found, None otherwise + """ + # First check the cache + if node_execution_id in self._node_execution_cache: + logger.debug(f"Cache hit for node_execution_id: {node_execution_id}") + # Convert cached DB model to domain model + cached_db_model = self._node_execution_cache[node_execution_id] + return self._to_domain_model(cached_db_model) + + # If not in cache, query the database + logger.debug(f"Cache miss for node_execution_id: {node_execution_id}, querying database") + with self._session_factory() as session: + stmt = select(WorkflowNodeExecution).where( + WorkflowNodeExecution.node_execution_id == node_execution_id, + WorkflowNodeExecution.tenant_id == self._tenant_id, + ) + + if self._app_id: + stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) + + db_model = session.scalar(stmt) + if db_model: + # Add DB model to cache + self._node_execution_cache[node_execution_id] = db_model + + # Convert to domain model and return + return self._to_domain_model(db_model) + + return None + + def get_db_models_by_workflow_run( + self, + workflow_run_id: str, + order_config: Optional[OrderConfig] = None, + ) -> Sequence[WorkflowNodeExecution]: + """ + Retrieve all WorkflowNodeExecution database models for a specific workflow run. + + This method directly returns database models without converting to domain models, + which is useful when you need to access database-specific fields like triggered_from. + It also updates the in-memory cache with the retrieved models. + + Args: + workflow_run_id: The workflow run ID + order_config: Optional configuration for ordering results + order_config.order_by: List of fields to order by (e.g., ["index", "created_at"]) + order_config.order_direction: Direction to order ("asc" or "desc") + + Returns: + A list of WorkflowNodeExecution database models + """ + with self._session_factory() as session: + stmt = select(WorkflowNodeExecution).where( + WorkflowNodeExecution.workflow_run_id == workflow_run_id, + WorkflowNodeExecution.tenant_id == self._tenant_id, + WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + if self._app_id: + stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) + + # Apply ordering if provided + if order_config and order_config.order_by: + order_columns: list[UnaryExpression] = [] + for field in order_config.order_by: + column = getattr(WorkflowNodeExecution, field, None) + if not column: + continue + if order_config.order_direction == "desc": + order_columns.append(desc(column)) + else: + order_columns.append(asc(column)) + + if order_columns: + stmt = stmt.order_by(*order_columns) + + db_models = session.scalars(stmt).all() + + # Update the cache with the retrieved DB models + for model in db_models: + if model.node_execution_id: + self._node_execution_cache[model.node_execution_id] = model + + return db_models + + def get_by_workflow_run( + self, + workflow_run_id: str, + order_config: Optional[OrderConfig] = None, + ) -> Sequence[NodeExecution]: + """ + Retrieve all NodeExecution instances for a specific workflow run. + + This method always queries the database to ensure complete and ordered results, + but updates the cache with any retrieved executions. + + Args: + workflow_run_id: The workflow run ID + order_config: Optional configuration for ordering results + order_config.order_by: List of fields to order by (e.g., ["index", "created_at"]) + order_config.order_direction: Direction to order ("asc" or "desc") + + Returns: + A list of NodeExecution instances + """ + # Get the database models using the new method + db_models = self.get_db_models_by_workflow_run(workflow_run_id, order_config) + + # Convert database models to domain models + domain_models = [] + for model in db_models: + domain_model = self._to_domain_model(model) + domain_models.append(domain_model) + + return domain_models + + def get_running_executions(self, workflow_run_id: str) -> Sequence[NodeExecution]: + """ + Retrieve all running NodeExecution instances for a specific workflow run. + + This method queries the database directly and updates the cache with any + retrieved executions that have a node_execution_id. + + Args: + workflow_run_id: The workflow run ID + + Returns: + A list of running NodeExecution instances + """ + with self._session_factory() as session: + stmt = select(WorkflowNodeExecution).where( + WorkflowNodeExecution.workflow_run_id == workflow_run_id, + WorkflowNodeExecution.tenant_id == self._tenant_id, + WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING, + WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + if self._app_id: + stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) + + db_models = session.scalars(stmt).all() + domain_models = [] + + for model in db_models: + # Update cache if node_execution_id is present + if model.node_execution_id: + self._node_execution_cache[model.node_execution_id] = model + + # Convert to domain model + domain_model = self._to_domain_model(model) + domain_models.append(domain_model) + + return domain_models + + def clear(self) -> None: + """ + Clear all WorkflowNodeExecution records for the current tenant_id and app_id. + + This method deletes all WorkflowNodeExecution records that match the tenant_id + and app_id (if provided) associated with this repository instance. + It also clears the in-memory cache. + """ + with self._session_factory() as session: + stmt = delete(WorkflowNodeExecution).where(WorkflowNodeExecution.tenant_id == self._tenant_id) + + if self._app_id: + stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) + + result = session.execute(stmt) + session.commit() + + deleted_count = result.rowcount + logger.info( + f"Cleared {deleted_count} workflow node execution records for tenant {self._tenant_id}" + + (f" and app {self._app_id}" if self._app_id else "") + ) + + # Clear the in-memory cache + self._node_execution_cache.clear() + logger.info("Cleared in-memory node execution cache") diff --git a/api/core/repositories/workflow_node_execution/__init__.py b/api/core/repositories/workflow_node_execution/__init__.py deleted file mode 100644 index 76e8282b7d..0000000000 --- a/api/core/repositories/workflow_node_execution/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -""" -WorkflowNodeExecution repository implementations. -""" - -from core.repositories.workflow_node_execution.sqlalchemy_repository import SQLAlchemyWorkflowNodeExecutionRepository - -__all__ = [ - "SQLAlchemyWorkflowNodeExecutionRepository", -] diff --git a/api/core/repositories/workflow_node_execution/sqlalchemy_repository.py b/api/core/repositories/workflow_node_execution/sqlalchemy_repository.py deleted file mode 100644 index b1d37163a4..0000000000 --- a/api/core/repositories/workflow_node_execution/sqlalchemy_repository.py +++ /dev/null @@ -1,196 +0,0 @@ -""" -SQLAlchemy implementation of the WorkflowNodeExecutionRepository. -""" - -import logging -from collections.abc import Sequence -from typing import Optional - -from sqlalchemy import UnaryExpression, asc, delete, desc, select -from sqlalchemy.engine import Engine -from sqlalchemy.orm import sessionmaker - -from core.workflow.repository.workflow_node_execution_repository import OrderConfig -from models.workflow import WorkflowNodeExecution, WorkflowNodeExecutionStatus, WorkflowNodeExecutionTriggeredFrom - -logger = logging.getLogger(__name__) - - -class SQLAlchemyWorkflowNodeExecutionRepository: - """ - SQLAlchemy implementation of the WorkflowNodeExecutionRepository interface. - - This implementation supports multi-tenancy by filtering operations based on tenant_id. - Each method creates its own session, handles the transaction, and commits changes - to the database. This prevents long-running connections in the workflow core. - """ - - def __init__(self, session_factory: sessionmaker | Engine, tenant_id: str, app_id: Optional[str] = None): - """ - Initialize the repository with a SQLAlchemy sessionmaker or engine and tenant context. - - Args: - session_factory: SQLAlchemy sessionmaker or engine for creating sessions - tenant_id: Tenant ID for multi-tenancy - app_id: Optional app ID for filtering by application - """ - # If an engine is provided, create a sessionmaker from it - if isinstance(session_factory, Engine): - self._session_factory = sessionmaker(bind=session_factory, expire_on_commit=False) - elif isinstance(session_factory, sessionmaker): - self._session_factory = session_factory - else: - raise ValueError( - f"Invalid session_factory type {type(session_factory).__name__}; expected sessionmaker or Engine" - ) - - self._tenant_id = tenant_id - self._app_id = app_id - - def save(self, execution: WorkflowNodeExecution) -> None: - """ - Save a WorkflowNodeExecution instance and commit changes to the database. - - Args: - execution: The WorkflowNodeExecution instance to save - """ - with self._session_factory() as session: - # Ensure tenant_id is set - if not execution.tenant_id: - execution.tenant_id = self._tenant_id - - # Set app_id if provided and not already set - if self._app_id and not execution.app_id: - execution.app_id = self._app_id - - session.add(execution) - session.commit() - - def get_by_node_execution_id(self, node_execution_id: str) -> Optional[WorkflowNodeExecution]: - """ - Retrieve a WorkflowNodeExecution by its node_execution_id. - - Args: - node_execution_id: The node execution ID - - Returns: - The WorkflowNodeExecution instance if found, None otherwise - """ - with self._session_factory() as session: - stmt = select(WorkflowNodeExecution).where( - WorkflowNodeExecution.node_execution_id == node_execution_id, - WorkflowNodeExecution.tenant_id == self._tenant_id, - ) - - if self._app_id: - stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) - - return session.scalar(stmt) - - def get_by_workflow_run( - self, - workflow_run_id: str, - order_config: Optional[OrderConfig] = None, - ) -> Sequence[WorkflowNodeExecution]: - """ - Retrieve all WorkflowNodeExecution instances for a specific workflow run. - - Args: - workflow_run_id: The workflow run ID - order_config: Optional configuration for ordering results - order_config.order_by: List of fields to order by (e.g., ["index", "created_at"]) - order_config.order_direction: Direction to order ("asc" or "desc") - - Returns: - A list of WorkflowNodeExecution instances - """ - with self._session_factory() as session: - stmt = select(WorkflowNodeExecution).where( - WorkflowNodeExecution.workflow_run_id == workflow_run_id, - WorkflowNodeExecution.tenant_id == self._tenant_id, - WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, - ) - - if self._app_id: - stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) - - # Apply ordering if provided - if order_config and order_config.order_by: - order_columns: list[UnaryExpression] = [] - for field in order_config.order_by: - column = getattr(WorkflowNodeExecution, field, None) - if not column: - continue - if order_config.order_direction == "desc": - order_columns.append(desc(column)) - else: - order_columns.append(asc(column)) - - if order_columns: - stmt = stmt.order_by(*order_columns) - - return session.scalars(stmt).all() - - def get_running_executions(self, workflow_run_id: str) -> Sequence[WorkflowNodeExecution]: - """ - Retrieve all running WorkflowNodeExecution instances for a specific workflow run. - - Args: - workflow_run_id: The workflow run ID - - Returns: - A list of running WorkflowNodeExecution instances - """ - with self._session_factory() as session: - stmt = select(WorkflowNodeExecution).where( - WorkflowNodeExecution.workflow_run_id == workflow_run_id, - WorkflowNodeExecution.tenant_id == self._tenant_id, - WorkflowNodeExecution.status == WorkflowNodeExecutionStatus.RUNNING, - WorkflowNodeExecution.triggered_from == WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, - ) - - if self._app_id: - stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) - - return session.scalars(stmt).all() - - def update(self, execution: WorkflowNodeExecution) -> None: - """ - Update an existing WorkflowNodeExecution instance and commit changes to the database. - - Args: - execution: The WorkflowNodeExecution instance to update - """ - with self._session_factory() as session: - # Ensure tenant_id is set - if not execution.tenant_id: - execution.tenant_id = self._tenant_id - - # Set app_id if provided and not already set - if self._app_id and not execution.app_id: - execution.app_id = self._app_id - - session.merge(execution) - session.commit() - - def clear(self) -> None: - """ - Clear all WorkflowNodeExecution records for the current tenant_id and app_id. - - This method deletes all WorkflowNodeExecution records that match the tenant_id - and app_id (if provided) associated with this repository instance. - """ - with self._session_factory() as session: - stmt = delete(WorkflowNodeExecution).where(WorkflowNodeExecution.tenant_id == self._tenant_id) - - if self._app_id: - stmt = stmt.where(WorkflowNodeExecution.app_id == self._app_id) - - result = session.execute(stmt) - session.commit() - - deleted_count = result.rowcount - logger.info( - f"Cleared {deleted_count} workflow node execution records for tenant {self._tenant_id}" - + (f" and app {self._app_id}" if self._app_id else "") - ) diff --git a/api/core/tools/tool_engine.py b/api/core/tools/tool_engine.py index 3dce1ca293..178f2b9689 100644 --- a/api/core/tools/tool_engine.py +++ b/api/core/tools/tool_engine.py @@ -32,7 +32,7 @@ from core.tools.errors import ( from core.tools.utils.message_transformer import ToolFileMessageTransformer from core.tools.workflow_as_tool.tool import WorkflowTool from extensions.ext_database import db -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import Message, MessageFile @@ -339,9 +339,9 @@ class ToolEngine: url=message.url, upload_file_id=tool_file_id, created_by_role=( - CreatedByRole.ACCOUNT + CreatorUserRole.ACCOUNT if invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} - else CreatedByRole.END_USER + else CreatorUserRole.END_USER ), created_by=user_id, ) diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index e9a8e0c90c..43040dd2fa 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -668,10 +668,10 @@ class ToolManager: ) workflow_provider_controllers: list[WorkflowToolProviderController] = [] - for provider in workflow_providers: + for workflow_provider in workflow_providers: try: workflow_provider_controllers.append( - ToolTransformService.workflow_provider_to_controller(db_provider=provider) + ToolTransformService.workflow_provider_to_controller(db_provider=workflow_provider) ) except Exception: # app has been deleted diff --git a/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py b/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py index 032274b87e..04437ea6d8 100644 --- a/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py +++ b/api/core/tools/utils/dataset_retriever/dataset_multi_retriever_tool.py @@ -84,13 +84,17 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool): document_context_list = [] index_node_ids = [document.metadata["doc_id"] for document in all_documents if document.metadata] - segments = DocumentSegment.query.filter( - DocumentSegment.dataset_id.in_(self.dataset_ids), - DocumentSegment.completed_at.isnot(None), - DocumentSegment.status == "completed", - DocumentSegment.enabled == True, - DocumentSegment.index_node_id.in_(index_node_ids), - ).all() + segments = ( + db.session.query(DocumentSegment) + .filter( + DocumentSegment.dataset_id.in_(self.dataset_ids), + DocumentSegment.completed_at.isnot(None), + DocumentSegment.status == "completed", + DocumentSegment.enabled == True, + DocumentSegment.index_node_id.in_(index_node_ids), + ) + .all() + ) if segments: index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)} @@ -106,12 +110,16 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool): context_list = [] resource_number = 1 for segment in sorted_segments: - dataset = Dataset.query.filter_by(id=segment.dataset_id).first() - document = Document.query.filter( - Document.id == segment.document_id, - Document.enabled == True, - Document.archived == False, - ).first() + dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first() + document = ( + db.session.query(Document) + .filter( + Document.id == segment.document_id, + Document.enabled == True, + Document.archived == False, + ) + .first() + ) if dataset and document: source = { "position": resource_number, diff --git a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py index dcd3d080f3..7b6882ed52 100644 --- a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py +++ b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py @@ -1,11 +1,12 @@ -from typing import Any +from typing import Any, Optional, cast from pydantic import BaseModel, Field +from core.app.app_config.entities import DatasetRetrieveConfigEntity, ModelConfig from core.rag.datasource.retrieval_service import RetrievalService from core.rag.entities.context_entities import DocumentContext -from core.rag.entities.metadata_entities import MetadataCondition from core.rag.models.document import Document as RetrievalDocument +from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool from extensions.ext_database import db @@ -34,7 +35,9 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): args_schema: type[BaseModel] = DatasetRetrieverToolInput description: str = "use this to retrieve a dataset. " dataset_id: str - metadata_filtering_conditions: MetadataCondition + user_id: Optional[str] = None + retrieve_config: DatasetRetrieveConfigEntity + inputs: dict @classmethod def from_dataset(cls, dataset: Dataset, **kwargs): @@ -48,7 +51,6 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): tenant_id=dataset.tenant_id, dataset_id=dataset.id, description=description, - metadata_filtering_conditions=MetadataCondition(), **kwargs, ) @@ -61,6 +63,21 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): return "" for hit_callback in self.hit_callbacks: hit_callback.on_query(query, dataset.id) + dataset_retrieval = DatasetRetrieval() + metadata_filter_document_ids, metadata_condition = dataset_retrieval.get_metadata_filter_condition( + [dataset.id], + query, + self.tenant_id, + self.user_id or "unknown", + cast(str, self.retrieve_config.metadata_filtering_mode), + cast(ModelConfig, self.retrieve_config.metadata_model_config), + self.retrieve_config.metadata_filtering_conditions, + self.inputs, + ) + if metadata_filter_document_ids: + document_ids_filter = metadata_filter_document_ids.get(dataset.id, []) + else: + document_ids_filter = None if dataset.provider == "external": results = [] external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval( @@ -68,7 +85,7 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): dataset_id=dataset.id, query=query, external_retrieval_parameters=dataset.retrieval_model, - metadata_condition=self.metadata_filtering_conditions, + metadata_condition=metadata_condition, ) for external_document in external_documents: document = RetrievalDocument( @@ -104,12 +121,19 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): return str("\n".join([item.page_content for item in results])) else: + if metadata_condition and not document_ids_filter: + return "" # get retrieval model , if the model is not setting , using default retrieval_model: dict[str, Any] = dataset.retrieval_model or default_retrieval_model + retrieval_resource_list = [] if dataset.indexing_technique == "economy": # use keyword table query documents = RetrievalService.retrieve( - retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=self.top_k + retrieval_method="keyword_search", + dataset_id=dataset.id, + query=query, + top_k=self.top_k, + document_ids_filter=document_ids_filter, ) return str("\n".join([document.page_content for document in documents])) else: @@ -128,6 +152,7 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): else None, reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model", weights=retrieval_model.get("weights"), + document_ids_filter=document_ids_filter, ) else: documents = [] @@ -157,16 +182,20 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): score=record.score, ) ) - retrieval_resource_list = [] + if self.return_resource: for record in records: segment = record.segment - dataset = Dataset.query.filter_by(id=segment.dataset_id).first() - document = DatasetDocument.query.filter( - DatasetDocument.id == segment.document_id, - DatasetDocument.enabled == True, - DatasetDocument.archived == False, - ).first() + dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first() + document = ( + db.session.query(DatasetDocument) # type: ignore + .filter( + DatasetDocument.id == segment.document_id, + DatasetDocument.enabled == True, + DatasetDocument.archived == False, + ) + .first() + ) if dataset and document: source = { "dataset_id": dataset.id, diff --git a/api/core/tools/utils/dataset_retriever_tool.py b/api/core/tools/utils/dataset_retriever_tool.py index b73dec4ebc..ec0575f6c3 100644 --- a/api/core/tools/utils/dataset_retriever_tool.py +++ b/api/core/tools/utils/dataset_retriever_tool.py @@ -34,6 +34,8 @@ class DatasetRetrieverTool(Tool): return_resource: bool, invoke_from: InvokeFrom, hit_callback: DatasetIndexToolCallbackHandler, + user_id: str, + inputs: dict, ) -> list["DatasetRetrieverTool"]: """ get dataset tool @@ -57,6 +59,8 @@ class DatasetRetrieverTool(Tool): return_resource=return_resource, invoke_from=invoke_from, hit_callback=hit_callback, + user_id=user_id, + inputs=inputs, ) if retrieval_tools is None or len(retrieval_tools) == 0: return [] diff --git a/api/core/tools/utils/web_reader_tool.py b/api/core/tools/utils/web_reader_tool.py index d42fd99fce..cbd06fc186 100644 --- a/api/core/tools/utils/web_reader_tool.py +++ b/api/core/tools/utils/web_reader_tool.py @@ -1,21 +1,13 @@ -import hashlib -import json import mimetypes -import os import re -import site -import subprocess -import tempfile -import unicodedata -from contextlib import contextmanager -from pathlib import Path -from typing import Any, Literal, Optional, cast +from collections.abc import Sequence +from dataclasses import dataclass +from typing import Any, Optional, cast from urllib.parse import unquote import chardet import cloudscraper # type: ignore -from bs4 import BeautifulSoup, CData, Comment, NavigableString # type: ignore -from regex import regex # type: ignore +from readabilipy import simple_json_from_html_string # type: ignore from core.helper import ssrf_proxy from core.rag.extractor import extract_processor @@ -23,9 +15,7 @@ from core.rag.extractor.extract_processor import ExtractProcessor FULL_TEMPLATE = """ TITLE: {title} -AUTHORS: {authors} -PUBLISH DATE: {publish_date} -TOP_IMAGE_URL: {top_image} +AUTHOR: {author} TEXT: {text} @@ -73,8 +63,8 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: response = ssrf_proxy.get(url, headers=headers, follow_redirects=True, timeout=(120, 300)) elif response.status_code == 403: scraper = cloudscraper.create_scraper() - scraper.perform_request = ssrf_proxy.make_request - response = scraper.get(url, headers=headers, follow_redirects=True, timeout=(120, 300)) + scraper.perform_request = ssrf_proxy.make_request # type: ignore + response = scraper.get(url, headers=headers, follow_redirects=True, timeout=(120, 300)) # type: ignore if response.status_code != 200: return "URL returned status code {}.".format(response.status_code) @@ -90,273 +80,36 @@ def get_url(url: str, user_agent: Optional[str] = None) -> str: else: content = response.text - a = extract_using_readabilipy(content) + article = extract_using_readabilipy(content) - if not a["plain_text"] or not a["plain_text"].strip(): + if not article.text: return "" res = FULL_TEMPLATE.format( - title=a["title"], - authors=a["byline"], - publish_date=a["date"], - top_image="", - text=a["plain_text"] or "", + title=article.title, + author=article.auther, + text=article.text, ) return res -def extract_using_readabilipy(html): - with tempfile.NamedTemporaryFile(delete=False, mode="w+") as f_html: - f_html.write(html) - f_html.close() - html_path = f_html.name - - # Call Mozilla's Readability.js Readability.parse() function via node, writing output to a temporary file - article_json_path = html_path + ".json" - jsdir = os.path.join(find_module_path("readabilipy"), "javascript") - with chdir(jsdir): - subprocess.check_call(["node", "ExtractArticle.js", "-i", html_path, "-o", article_json_path]) - - # Read output of call to Readability.parse() from JSON file and return as Python dictionary - input_json = json.loads(Path(article_json_path).read_text(encoding="utf-8")) - - # Deleting files after processing - os.unlink(article_json_path) - os.unlink(html_path) - - article_json: dict[str, Any] = { - "title": None, - "byline": None, - "date": None, - "content": None, - "plain_content": None, - "plain_text": None, - } - # Populate article fields from readability fields where present - if input_json: - if input_json.get("title"): - article_json["title"] = input_json["title"] - if input_json.get("byline"): - article_json["byline"] = input_json["byline"] - if input_json.get("date"): - article_json["date"] = input_json["date"] - if input_json.get("content"): - article_json["content"] = input_json["content"] - article_json["plain_content"] = plain_content(article_json["content"], False, False) - article_json["plain_text"] = extract_text_blocks_as_plain_text(article_json["plain_content"]) - if input_json.get("textContent"): - article_json["plain_text"] = input_json["textContent"] - article_json["plain_text"] = re.sub(r"\n\s*\n", "\n", article_json["plain_text"]) - - return article_json +@dataclass +class Article: + title: str + auther: str + text: Sequence[dict] -def find_module_path(module_name): - for package_path in site.getsitepackages(): - potential_path = os.path.join(package_path, module_name) - if os.path.exists(potential_path): - return potential_path - - return None - - -@contextmanager -def chdir(path): - """Change directory in context and return to original on exit""" - # From https://stackoverflow.com/a/37996581, couldn't find a built-in - original_path = os.getcwd() - os.chdir(path) - try: - yield - finally: - os.chdir(original_path) - - -def extract_text_blocks_as_plain_text(paragraph_html): - # Load article as DOM - soup = BeautifulSoup(paragraph_html, "html.parser") - # Select all lists - list_elements = soup.find_all(["ul", "ol"]) - # Prefix text in all list items with "* " and make lists paragraphs - for list_element in list_elements: - plain_items = "".join( - list(filter(None, [plain_text_leaf_node(li)["text"] for li in list_element.find_all("li")])) - ) - list_element.string = plain_items - list_element.name = "p" - # Select all text blocks - text_blocks = [s.parent for s in soup.find_all(string=True)] - text_blocks = [plain_text_leaf_node(block) for block in text_blocks] - # Drop empty paragraphs - text_blocks = list(filter(lambda p: p["text"] is not None, text_blocks)) - return text_blocks - - -def plain_text_leaf_node(element): - # Extract all text, stripped of any child HTML elements and normalize it - plain_text = normalize_text(element.get_text()) - if plain_text != "" and element.name == "li": - plain_text = "* {}, ".format(plain_text) - if plain_text == "": - plain_text = None - if "data-node-index" in element.attrs: - plain = {"node_index": element["data-node-index"], "text": plain_text} - else: - plain = {"text": plain_text} - return plain - - -def plain_content(readability_content, content_digests, node_indexes): - # Load article as DOM - soup = BeautifulSoup(readability_content, "html.parser") - # Make all elements plain - elements = plain_elements(soup.contents, content_digests, node_indexes) - if node_indexes: - # Add node index attributes to nodes - elements = [add_node_indexes(element) for element in elements] - # Replace article contents with plain elements - soup.contents = elements - return str(soup) - - -def plain_elements(elements, content_digests, node_indexes): - # Get plain content versions of all elements - elements = [plain_element(element, content_digests, node_indexes) for element in elements] - if content_digests: - # Add content digest attribute to nodes - elements = [add_content_digest(element) for element in elements] - return elements - - -def plain_element(element, content_digests, node_indexes): - # For lists, we make each item plain text - if is_leaf(element): - # For leaf node elements, extract the text content, discarding any HTML tags - # 1. Get element contents as text - plain_text = element.get_text() - # 2. Normalize the extracted text string to a canonical representation - plain_text = normalize_text(plain_text) - # 3. Update element content to be plain text - element.string = plain_text - elif is_text(element): - if is_non_printing(element): - # The simplified HTML may have come from Readability.js so might - # have non-printing text (e.g. Comment or CData). In this case, we - # keep the structure, but ensure that the string is empty. - element = type(element)("") - else: - plain_text = element.string - plain_text = normalize_text(plain_text) - element = type(element)(plain_text) - else: - # If not a leaf node or leaf type call recursively on child nodes, replacing - element.contents = plain_elements(element.contents, content_digests, node_indexes) - return element - - -def add_node_indexes(element, node_index="0"): - # Can't add attributes to string types - if is_text(element): - return element - # Add index to current element - element["data-node-index"] = node_index - # Add index to child elements - for local_idx, child in enumerate([c for c in element.contents if not is_text(c)], start=1): - # Can't add attributes to leaf string types - child_index = "{stem}.{local}".format(stem=node_index, local=local_idx) - add_node_indexes(child, node_index=child_index) - return element - - -def normalize_text(text): - """Normalize unicode and whitespace.""" - # Normalize unicode first to try and standardize whitespace characters as much as possible before normalizing them - text = strip_control_characters(text) - text = normalize_unicode(text) - text = normalize_whitespace(text) - return text - - -def strip_control_characters(text): - """Strip out unicode control characters which might break the parsing.""" - # Unicode control characters - # [Cc]: Other, Control [includes new lines] - # [Cf]: Other, Format - # [Cn]: Other, Not Assigned - # [Co]: Other, Private Use - # [Cs]: Other, Surrogate - control_chars = {"Cc", "Cf", "Cn", "Co", "Cs"} - retained_chars = ["\t", "\n", "\r", "\f"] - - # Remove non-printing control characters - return "".join( - [ - "" if (unicodedata.category(char) in control_chars) and (char not in retained_chars) else char - for char in text - ] +def extract_using_readabilipy(html: str): + json_article: dict[str, Any] = simple_json_from_html_string(html, use_readability=True) + article = Article( + title=json_article.get("title") or "", + auther=json_article.get("byline") or "", + text=json_article.get("plain_text") or [], ) - -def normalize_unicode(text): - """Normalize unicode such that things that are visually equivalent map to the same unicode string where possible.""" - normal_form: Literal["NFC", "NFD", "NFKC", "NFKD"] = "NFKC" - text = unicodedata.normalize(normal_form, text) - return text - - -def normalize_whitespace(text): - """Replace runs of whitespace characters with a single space as this is what happens when HTML text is displayed.""" - text = regex.sub(r"\s+", " ", text) - # Remove leading and trailing whitespace - text = text.strip() - return text - - -def is_leaf(element): - return element.name in {"p", "li"} - - -def is_text(element): - return isinstance(element, NavigableString) - - -def is_non_printing(element): - return any(isinstance(element, _e) for _e in [Comment, CData]) - - -def add_content_digest(element): - if not is_text(element): - element["data-content-digest"] = content_digest(element) - return element - - -def content_digest(element): - digest: Any - if is_text(element): - # Hash - trimmed_string = element.string.strip() - if trimmed_string == "": - digest = "" - else: - digest = hashlib.sha256(trimmed_string.encode("utf-8")).hexdigest() - else: - contents = element.contents - num_contents = len(contents) - if num_contents == 0: - # No hash when no child elements exist - digest = "" - elif num_contents == 1: - # If single child, use digest of child - digest = content_digest(contents[0]) - else: - # Build content digest from the "non-empty" digests of child nodes - digest = hashlib.sha256() - child_digests = list(filter(lambda x: x != "", [content_digest(content) for content in contents])) - for child in child_digests: - digest.update(child.encode("utf-8")) - digest = digest.hexdigest() - return digest + return article def get_image_upload_file_ids(content): diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 99caab3ba9..2117cfb9a1 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -1,7 +1,9 @@ import json import logging from collections.abc import Generator -from typing import Any, Optional, Union, cast +from typing import Any, Optional, cast + +from flask_login import current_user from core.file import FILE_MODEL_IDENTITY, File, FileTransferMethod from core.tools.__base.tool import Tool @@ -92,7 +94,7 @@ class WorkflowTool(Tool): result = generator.generate( app_model=app, workflow=workflow, - user=self._get_user(user_id), + user=cast("Account | EndUser", current_user), args={"inputs": tool_parameters, "files": files}, invoke_from=self.runtime.invoke_from, streaming=False, @@ -116,20 +118,6 @@ class WorkflowTool(Tool): yield self.create_text_message(json.dumps(outputs, ensure_ascii=False)) yield self.create_json_message(outputs) - def _get_user(self, user_id: str) -> Union[EndUser, Account]: - """ - get the user by user id - """ - - user = db.session.query(EndUser).filter(EndUser.id == user_id).first() - if not user: - user = db.session.query(Account).filter(Account.id == user_id).first() - - if not user: - raise ValueError("user not found") - - return user - def fork_tool_runtime(self, runtime: ToolRuntime) -> "WorkflowTool": """ fork a new tool with metadata diff --git a/api/core/variables/consts.py b/api/core/variables/consts.py new file mode 100644 index 0000000000..03b277d619 --- /dev/null +++ b/api/core/variables/consts.py @@ -0,0 +1,7 @@ +# The minimal selector length for valid variables. +# +# The first element of the selector is the node id, and the second element is the variable name. +# +# If the selector length is more than 2, the remaining parts are the keys / indexes paths used +# to extract part of the variable value. +MIN_SELECTORS_LENGTH = 2 diff --git a/api/core/variables/utils.py b/api/core/variables/utils.py new file mode 100644 index 0000000000..e5d222af7d --- /dev/null +++ b/api/core/variables/utils.py @@ -0,0 +1,8 @@ +from collections.abc import Iterable, Sequence + + +def to_selector(node_id: str, name: str, paths: Iterable[str] = ()) -> Sequence[str]: + selectors = [node_id, name] + if paths: + selectors.extend(paths) + return selectors diff --git a/api/core/variables/variables.py b/api/core/variables/variables.py index c32815b24d..b650b1682e 100644 --- a/api/core/variables/variables.py +++ b/api/core/variables/variables.py @@ -30,7 +30,7 @@ class Variable(Segment): """ id: str = Field( - default=lambda _: str(uuid4()), + default_factory=lambda: str(uuid4()), description="Unique identity for variable.", ) name: str diff --git a/api/core/workflow/entities/node_execution_entities.py b/api/core/workflow/entities/node_execution_entities.py new file mode 100644 index 0000000000..5e5ead062f --- /dev/null +++ b/api/core/workflow/entities/node_execution_entities.py @@ -0,0 +1,98 @@ +""" +Domain entities for workflow node execution. + +This module contains the domain model for workflow node execution, which is used +by the core workflow module. These models are independent of the storage mechanism +and don't contain implementation details like tenant_id, app_id, etc. +""" + +from collections.abc import Mapping +from datetime import datetime +from enum import StrEnum +from typing import Any, Optional + +from pydantic import BaseModel, Field + +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.nodes.enums import NodeType + + +class NodeExecutionStatus(StrEnum): + """ + Node Execution Status Enum. + """ + + RUNNING = "running" + SUCCEEDED = "succeeded" + FAILED = "failed" + EXCEPTION = "exception" + RETRY = "retry" + + +class NodeExecution(BaseModel): + """ + Domain model for workflow node execution. + + This model represents the core business entity of a node execution, + without implementation details like tenant_id, app_id, etc. + + Note: User/context-specific fields (triggered_from, created_by, created_by_role) + have been moved to the repository implementation to keep the domain model clean. + These fields are still accepted in the constructor for backward compatibility, + but they are not stored in the model. + """ + + # Core identification fields + id: str # Unique identifier for this execution record + node_execution_id: Optional[str] = None # Optional secondary ID for cross-referencing + workflow_id: str # ID of the workflow this node belongs to + workflow_run_id: Optional[str] = None # ID of the specific workflow run (null for single-step debugging) + + # Execution positioning and flow + index: int # Sequence number for ordering in trace visualization + predecessor_node_id: Optional[str] = None # ID of the node that executed before this one + node_id: str # ID of the node being executed + node_type: NodeType # Type of node (e.g., start, llm, knowledge) + title: str # Display title of the node + + # Execution data + inputs: Optional[Mapping[str, Any]] = None # Input variables used by this node + process_data: Optional[Mapping[str, Any]] = None # Intermediate processing data + outputs: Optional[Mapping[str, Any]] = None # Output variables produced by this node + + # Execution state + status: NodeExecutionStatus = NodeExecutionStatus.RUNNING # Current execution status + error: Optional[str] = None # Error message if execution failed + elapsed_time: float = Field(default=0.0) # Time taken for execution in seconds + + # Additional metadata + metadata: Optional[Mapping[NodeRunMetadataKey, Any]] = None # Execution metadata (tokens, cost, etc.) + + # Timing information + created_at: datetime # When execution started + finished_at: Optional[datetime] = None # When execution completed + + def update_from_mapping( + self, + inputs: Optional[Mapping[str, Any]] = None, + process_data: Optional[Mapping[str, Any]] = None, + outputs: Optional[Mapping[str, Any]] = None, + metadata: Optional[Mapping[NodeRunMetadataKey, Any]] = None, + ) -> None: + """ + Update the model from mappings. + + Args: + inputs: The inputs to update + process_data: The process data to update + outputs: The outputs to update + metadata: The metadata to update + """ + if inputs is not None: + self.inputs = dict(inputs) + if process_data is not None: + self.process_data = dict(process_data) + if outputs is not None: + self.outputs = dict(outputs) + if metadata is not None: + self.metadata = dict(metadata) diff --git a/api/core/workflow/entities/workflow_execution_entities.py b/api/core/workflow/entities/workflow_execution_entities.py new file mode 100644 index 0000000000..200d4697b5 --- /dev/null +++ b/api/core/workflow/entities/workflow_execution_entities.py @@ -0,0 +1,91 @@ +""" +Domain entities for workflow execution. + +Models are independent of the storage mechanism and don't contain +implementation details like tenant_id, app_id, etc. +""" + +from collections.abc import Mapping +from datetime import UTC, datetime +from enum import StrEnum +from typing import Any, Optional + +from pydantic import BaseModel, Field + + +class WorkflowType(StrEnum): + """ + Workflow Type Enum for domain layer + """ + + WORKFLOW = "workflow" + CHAT = "chat" + + +class WorkflowExecutionStatus(StrEnum): + RUNNING = "running" + SUCCEEDED = "succeeded" + FAILED = "failed" + STOPPED = "stopped" + PARTIAL_SUCCEEDED = "partial-succeeded" + + +class WorkflowExecution(BaseModel): + """ + Domain model for workflow execution based on WorkflowRun but without + user, tenant, and app attributes. + """ + + id: str = Field(...) + workflow_id: str = Field(...) + workflow_version: str = Field(...) + sequence_number: int = Field(...) + + type: WorkflowType = Field(...) + graph: Mapping[str, Any] = Field(...) + + inputs: Mapping[str, Any] = Field(...) + outputs: Optional[Mapping[str, Any]] = None + + status: WorkflowExecutionStatus = WorkflowExecutionStatus.RUNNING + error_message: str = Field(default="") + total_tokens: int = Field(default=0) + total_steps: int = Field(default=0) + exceptions_count: int = Field(default=0) + + started_at: datetime = Field(...) + finished_at: Optional[datetime] = None + + @property + def elapsed_time(self) -> float: + """ + Calculate elapsed time in seconds. + If workflow is not finished, use current time. + """ + end_time = self.finished_at or datetime.now(UTC).replace(tzinfo=None) + return (end_time - self.started_at).total_seconds() + + @classmethod + def new( + cls, + *, + id: str, + workflow_id: str, + sequence_number: int, + type: WorkflowType, + workflow_version: str, + graph: Mapping[str, Any], + inputs: Mapping[str, Any], + started_at: datetime, + ) -> "WorkflowExecution": + return WorkflowExecution( + id=id, + workflow_id=workflow_id, + sequence_number=sequence_number, + type=type, + workflow_version=workflow_version, + graph=graph, + inputs=inputs, + status=WorkflowExecutionStatus.RUNNING, + started_at=started_at, + ) diff --git a/api/core/workflow/graph_engine/entities/graph.py b/api/core/workflow/graph_engine/entities/graph.py index 5c672c985b..8e5b1e7142 100644 --- a/api/core/workflow/graph_engine/entities/graph.py +++ b/api/core/workflow/graph_engine/entities/graph.py @@ -36,7 +36,7 @@ class Graph(BaseModel): root_node_id: str = Field(..., description="root node id of the graph") node_ids: list[str] = Field(default_factory=list, description="graph node ids") node_id_config_mapping: dict[str, dict] = Field( - default_factory=list, description="node configs mapping (node id: node config)" + default_factory=dict, description="node configs mapping (node id: node config)" ) edge_mapping: dict[str, list[GraphEdge]] = Field( default_factory=dict, description="graph edge mapping (source node id: edges)" diff --git a/api/core/workflow/graph_engine/graph_engine.py b/api/core/workflow/graph_engine/graph_engine.py index 36273d8ec1..f61965e07e 100644 --- a/api/core/workflow/graph_engine/graph_engine.py +++ b/api/core/workflow/graph_engine/graph_engine.py @@ -9,7 +9,7 @@ from copy import copy, deepcopy from datetime import UTC, datetime from typing import Any, Optional, cast -from flask import Flask, current_app +from flask import Flask, current_app, has_request_context from configs import dify_config from core.app.apps.base_app_queue_manager import GenerateTaskStoppedError @@ -540,8 +540,21 @@ class GraphEngine: for var, val in context.items(): var.set(val) + # FIXME(-LAN-): Save current user before entering new app context + from flask import g + + saved_user = None + if has_request_context() and hasattr(g, "_login_user"): + saved_user = g._login_user + with flask_app.app_context(): try: + # Restore user in new app context + if saved_user is not None: + from flask import g + + g._login_user = saved_user + q.put( ParallelBranchRunStartedEvent( parallel_id=parallel_id, diff --git a/api/core/workflow/nodes/answer/base_stream_processor.py b/api/core/workflow/nodes/answer/base_stream_processor.py index e4f2478890..6671ff0746 100644 --- a/api/core/workflow/nodes/answer/base_stream_processor.py +++ b/api/core/workflow/nodes/answer/base_stream_processor.py @@ -95,7 +95,12 @@ class StreamProcessor(ABC): if node_id not in self.rest_node_ids: return + if node_id in reachable_node_ids: + return + self.rest_node_ids.remove(node_id) + self.rest_node_ids.extend(set(reachable_node_ids) - set(self.rest_node_ids)) + for edge in self.graph.edge_mapping.get(node_id, []): if edge.target_node_id in reachable_node_ids: continue diff --git a/api/core/workflow/nodes/code/code_node.py b/api/core/workflow/nodes/code/code_node.py index 3c34c5b4e7..804c05f9f4 100644 --- a/api/core/workflow/nodes/code/code_node.py +++ b/api/core/workflow/nodes/code/code_node.py @@ -127,7 +127,7 @@ class CodeNode(BaseNode[CodeNodeData]): depth: int = 1, ): if depth > dify_config.CODE_MAX_DEPTH: - raise DepthLimitError(f"Depth limit ${dify_config.CODE_MAX_DEPTH} reached, object too deep.") + raise DepthLimitError(f"Depth limit {dify_config.CODE_MAX_DEPTH} reached, object too deep.") transformed_result: dict[str, Any] = {} if output_schema is None: diff --git a/api/core/workflow/nodes/document_extractor/node.py b/api/core/workflow/nodes/document_extractor/node.py index 8fb1baec89..65b5623a2e 100644 --- a/api/core/workflow/nodes/document_extractor/node.py +++ b/api/core/workflow/nodes/document_extractor/node.py @@ -7,6 +7,7 @@ import tempfile from collections.abc import Mapping, Sequence from typing import Any, cast +import chardet import docx import pandas as pd import pypandoc # type: ignore @@ -180,26 +181,64 @@ def _extract_text_by_file_extension(*, file_content: bytes, file_extension: str) def _extract_text_from_plain_text(file_content: bytes) -> str: try: - return file_content.decode("utf-8", "ignore") - except UnicodeDecodeError as e: - raise TextExtractionError("Failed to decode plain text file") from e + # Detect encoding using chardet + result = chardet.detect(file_content) + encoding = result["encoding"] + + # Fallback to utf-8 if detection fails + if not encoding: + encoding = "utf-8" + + return file_content.decode(encoding, errors="ignore") + except (UnicodeDecodeError, LookupError) as e: + # If decoding fails, try with utf-8 as last resort + try: + return file_content.decode("utf-8", errors="ignore") + except UnicodeDecodeError: + raise TextExtractionError(f"Failed to decode plain text file: {e}") from e def _extract_text_from_json(file_content: bytes) -> str: try: - json_data = json.loads(file_content.decode("utf-8", "ignore")) + # Detect encoding using chardet + result = chardet.detect(file_content) + encoding = result["encoding"] + + # Fallback to utf-8 if detection fails + if not encoding: + encoding = "utf-8" + + json_data = json.loads(file_content.decode(encoding, errors="ignore")) return json.dumps(json_data, indent=2, ensure_ascii=False) - except (UnicodeDecodeError, json.JSONDecodeError) as e: - raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e + except (UnicodeDecodeError, LookupError, json.JSONDecodeError) as e: + # If decoding fails, try with utf-8 as last resort + try: + json_data = json.loads(file_content.decode("utf-8", errors="ignore")) + return json.dumps(json_data, indent=2, ensure_ascii=False) + except (UnicodeDecodeError, json.JSONDecodeError): + raise TextExtractionError(f"Failed to decode or parse JSON file: {e}") from e def _extract_text_from_yaml(file_content: bytes) -> str: """Extract the content from yaml file""" try: - yaml_data = yaml.safe_load_all(file_content.decode("utf-8", "ignore")) + # Detect encoding using chardet + result = chardet.detect(file_content) + encoding = result["encoding"] + + # Fallback to utf-8 if detection fails + if not encoding: + encoding = "utf-8" + + yaml_data = yaml.safe_load_all(file_content.decode(encoding, errors="ignore")) return cast(str, yaml.dump_all(yaml_data, allow_unicode=True, sort_keys=False)) - except (UnicodeDecodeError, yaml.YAMLError) as e: - raise TextExtractionError(f"Failed to decode or parse YAML file: {e}") from e + except (UnicodeDecodeError, LookupError, yaml.YAMLError) as e: + # If decoding fails, try with utf-8 as last resort + try: + yaml_data = yaml.safe_load_all(file_content.decode("utf-8", errors="ignore")) + return cast(str, yaml.dump_all(yaml_data, allow_unicode=True, sort_keys=False)) + except (UnicodeDecodeError, yaml.YAMLError): + raise TextExtractionError(f"Failed to decode or parse YAML file: {e}") from e def _extract_text_from_pdf(file_content: bytes) -> str: @@ -338,7 +377,20 @@ def _extract_text_from_file(file: File): def _extract_text_from_csv(file_content: bytes) -> str: try: - csv_file = io.StringIO(file_content.decode("utf-8", "ignore")) + # Detect encoding using chardet + result = chardet.detect(file_content) + encoding = result["encoding"] + + # Fallback to utf-8 if detection fails + if not encoding: + encoding = "utf-8" + + try: + csv_file = io.StringIO(file_content.decode(encoding, errors="ignore")) + except (UnicodeDecodeError, LookupError): + # If decoding fails, try with utf-8 as last resort + csv_file = io.StringIO(file_content.decode("utf-8", errors="ignore")) + csv_reader = csv.reader(csv_file) rows = list(csv_reader) @@ -366,7 +418,7 @@ def _extract_text_from_excel(file_content: bytes) -> str: df = excel_file.parse(sheet_name=sheet_name) df.dropna(how="all", inplace=True) # Create Markdown table two times to separate tables with a newline - markdown_table += df.to_markdown(index=False) + "\n\n" + markdown_table += df.to_markdown(index=False, floatfmt="") + "\n\n" except Exception as e: continue return markdown_table diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index a7d0aefc6d..ea0b6863c9 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -7,7 +7,7 @@ from datetime import UTC, datetime from queue import Empty, Queue from typing import TYPE_CHECKING, Any, Optional, cast -from flask import Flask, current_app +from flask import Flask, current_app, has_request_context from configs import dify_config from core.variables import ArrayVariable, IntegerVariable, NoneVariable @@ -353,27 +353,26 @@ class IterationNode(BaseNode[IterationNodeData]): ) -> NodeRunStartedEvent | BaseNodeEvent | InNodeEvent: """ add iteration metadata to event. + ensures iteration context (ID, index/parallel_run_id) is added to metadata, """ if not isinstance(event, BaseNodeEvent): return event if self.node_data.is_parallel and isinstance(event, NodeRunStartedEvent): event.parallel_mode_run_id = parallel_mode_run_id - return event + + iter_metadata = { + NodeRunMetadataKey.ITERATION_ID: self.node_id, + NodeRunMetadataKey.ITERATION_INDEX: iter_run_index, + } + if parallel_mode_run_id: + # for parallel, the specific branch ID is more important than the sequential index + iter_metadata[NodeRunMetadataKey.PARALLEL_MODE_RUN_ID] = parallel_mode_run_id + if event.route_node_state.node_run_result: - metadata = event.route_node_state.node_run_result.metadata - if not metadata: - metadata = {} - if NodeRunMetadataKey.ITERATION_ID not in metadata: - metadata = { - **metadata, - NodeRunMetadataKey.ITERATION_ID: self.node_id, - NodeRunMetadataKey.PARALLEL_MODE_RUN_ID - if self.node_data.is_parallel - else NodeRunMetadataKey.ITERATION_INDEX: parallel_mode_run_id - if self.node_data.is_parallel - else iter_run_index, - } - event.route_node_state.node_run_result.metadata = metadata + current_metadata = event.route_node_state.node_run_result.metadata or {} + if NodeRunMetadataKey.ITERATION_ID not in current_metadata: + event.route_node_state.node_run_result.metadata = {**current_metadata, **iter_metadata} + return event def _run_single_iter( @@ -587,7 +586,21 @@ class IterationNode(BaseNode[IterationNodeData]): """ for var, val in context.items(): var.set(val) + + # FIXME(-LAN-): Save current user before entering new app context + from flask import g + + saved_user = None + if has_request_context() and hasattr(g, "_login_user"): + saved_user = g._login_user + with flask_app.app_context(): + # Restore user in new app context + if saved_user is not None: + from flask import g + + g._login_user = saved_user + parallel_mode_run_id = uuid.uuid4().hex graph_engine_copy = graph_engine.create_copy() variable_pool_copy = graph_engine_copy.graph_runtime_state.variable_pool diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 00dac1b7d7..5955022e5f 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -264,6 +264,7 @@ class KnowledgeRetrievalNode(LLMNode): "data_source_type": "external", "retriever_from": "workflow", "score": item.metadata.get("score"), + "doc_metadata": item.metadata, }, "title": item.metadata.get("title"), "content": item.page_content, @@ -275,12 +276,16 @@ class KnowledgeRetrievalNode(LLMNode): if records: for record in records: segment = record.segment - dataset = Dataset.query.filter_by(id=segment.dataset_id).first() - document = Document.query.filter( - Document.id == segment.document_id, - Document.enabled == True, - Document.archived == False, - ).first() + dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first() # type: ignore + document = ( + db.session.query(Document) + .filter( + Document.id == segment.document_id, + Document.enabled == True, + Document.archived == False, + ) + .first() + ) if dataset and document: source = { "metadata": { @@ -289,7 +294,7 @@ class KnowledgeRetrievalNode(LLMNode): "dataset_name": dataset.name, "document_id": document.id, "document_name": document.name, - "document_data_source_type": document.data_source_type, + "data_source_type": document.data_source_type, "segment_id": segment.id, "retriever_from": "workflow", "score": record.score or 0.0, @@ -356,12 +361,12 @@ class KnowledgeRetrievalNode(LLMNode): ) elif node_data.metadata_filtering_mode == "manual": if node_data.metadata_filtering_conditions: - metadata_condition = MetadataCondition(**node_data.metadata_filtering_conditions.model_dump()) + conditions = [] if node_data.metadata_filtering_conditions: for sequence, condition in enumerate(node_data.metadata_filtering_conditions.conditions): # type: ignore metadata_name = condition.name expected_value = condition.value - if expected_value is not None or condition.comparison_operator in ("empty", "not empty"): + if expected_value is not None and condition.comparison_operator not in ("empty", "not empty"): if isinstance(expected_value, str): expected_value = self.graph_runtime_state.variable_pool.convert_template( expected_value @@ -372,13 +377,24 @@ class KnowledgeRetrievalNode(LLMNode): expected_value = re.sub(r"[\r\n\t]+", " ", expected_value.text).strip() # type: ignore else: raise ValueError("Invalid expected metadata value type") - filters = self._process_metadata_filter_func( - sequence, - condition.comparison_operator, - metadata_name, - expected_value, - filters, + conditions.append( + Condition( + name=metadata_name, + comparison_operator=condition.comparison_operator, + value=expected_value, ) + ) + filters = self._process_metadata_filter_func( + sequence, + condition.comparison_operator, + metadata_name, + expected_value, + filters, + ) + metadata_condition = MetadataCondition( + logical_operator=node_data.metadata_filtering_conditions.logical_operator, + conditions=conditions, + ) else: raise ValueError("Invalid metadata filtering mode") if filters: diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index f42bc6784d..eeb44601ec 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -506,7 +506,7 @@ class LLMNode(BaseNode[LLMNodeData]): "dataset_name": metadata.get("dataset_name"), "document_id": metadata.get("document_id"), "document_name": metadata.get("document_name"), - "data_source_type": metadata.get("document_data_source_type"), + "data_source_type": metadata.get("data_source_type"), "segment_id": metadata.get("segment_id"), "retriever_from": metadata.get("retriever_from"), "score": metadata.get("score"), diff --git a/api/core/workflow/nodes/loop/entities.py b/api/core/workflow/nodes/loop/entities.py index 16802311dc..3f4a5edab9 100644 --- a/api/core/workflow/nodes/loop/entities.py +++ b/api/core/workflow/nodes/loop/entities.py @@ -26,7 +26,7 @@ class LoopNodeData(BaseLoopNodeData): loop_count: int # Maximum number of loops break_conditions: list[Condition] # Conditions to break the loop logical_operator: Literal["and", "or"] - loop_variables: Optional[list[LoopVariableData]] = Field(default_factory=list) + loop_variables: Optional[list[LoopVariableData]] = Field(default_factory=list[LoopVariableData]) outputs: Optional[Mapping[str, Any]] = None diff --git a/api/core/workflow/nodes/loop/loop_node.py b/api/core/workflow/nodes/loop/loop_node.py index eae33c0a92..bad3e2b928 100644 --- a/api/core/workflow/nodes/loop/loop_node.py +++ b/api/core/workflow/nodes/loop/loop_node.py @@ -337,7 +337,7 @@ class LoopNode(BaseNode[LoopNodeData]): return {"check_break_result": True} elif isinstance(event, NodeRunFailedEvent): # Loop run failed - yield event + yield self._handle_event_metadata(event=event, iter_run_index=current_index) yield LoopRunFailedEvent( loop_id=self.id, loop_node_id=self.node_id, diff --git a/api/core/workflow/repository/__init__.py b/api/core/workflow/repository/__init__.py index d91506e72f..672abb6583 100644 --- a/api/core/workflow/repository/__init__.py +++ b/api/core/workflow/repository/__init__.py @@ -6,10 +6,9 @@ for accessing and manipulating data, regardless of the underlying storage mechanism. """ -from core.workflow.repository.repository_factory import RepositoryFactory -from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.repository.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository __all__ = [ - "RepositoryFactory", + "OrderConfig", "WorkflowNodeExecutionRepository", ] diff --git a/api/core/workflow/repository/repository_factory.py b/api/core/workflow/repository/repository_factory.py deleted file mode 100644 index 45d6f5d842..0000000000 --- a/api/core/workflow/repository/repository_factory.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Repository factory for creating repository instances. - -This module provides a simple factory interface for creating repository instances. -It does not contain any implementation details or dependencies on specific repositories. -""" - -from collections.abc import Callable, Mapping -from typing import Any, Literal, Optional, cast - -from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository - -# Type for factory functions - takes a dict of parameters and returns any repository type -RepositoryFactoryFunc = Callable[[Mapping[str, Any]], Any] - -# Type for workflow node execution factory function -WorkflowNodeExecutionFactoryFunc = Callable[[Mapping[str, Any]], WorkflowNodeExecutionRepository] - -# Repository type literals -_RepositoryType = Literal["workflow_node_execution"] - - -class RepositoryFactory: - """ - Factory class for creating repository instances. - - This factory delegates the actual repository creation to implementation-specific - factory functions that are registered with the factory at runtime. - """ - - # Dictionary to store factory functions - _factory_functions: dict[str, RepositoryFactoryFunc] = {} - - @classmethod - def _register_factory(cls, repository_type: _RepositoryType, factory_func: RepositoryFactoryFunc) -> None: - """ - Register a factory function for a specific repository type. - This is a private method and should not be called directly. - - Args: - repository_type: The type of repository (e.g., 'workflow_node_execution') - factory_func: A function that takes parameters and returns a repository instance - """ - cls._factory_functions[repository_type] = factory_func - - @classmethod - def _create_repository(cls, repository_type: _RepositoryType, params: Optional[Mapping[str, Any]] = None) -> Any: - """ - Create a new repository instance with the provided parameters. - This is a private method and should not be called directly. - - Args: - repository_type: The type of repository to create - params: A dictionary of parameters to pass to the factory function - - Returns: - A new instance of the requested repository - - Raises: - ValueError: If no factory function is registered for the repository type - """ - if repository_type not in cls._factory_functions: - raise ValueError(f"No factory function registered for repository type '{repository_type}'") - - # Use empty dict if params is None - params = params or {} - - return cls._factory_functions[repository_type](params) - - @classmethod - def register_workflow_node_execution_factory(cls, factory_func: WorkflowNodeExecutionFactoryFunc) -> None: - """ - Register a factory function for the workflow node execution repository. - - Args: - factory_func: A function that takes parameters and returns a WorkflowNodeExecutionRepository instance - """ - cls._register_factory("workflow_node_execution", factory_func) - - @classmethod - def create_workflow_node_execution_repository( - cls, params: Optional[Mapping[str, Any]] = None - ) -> WorkflowNodeExecutionRepository: - """ - Create a new WorkflowNodeExecutionRepository instance with the provided parameters. - - Args: - params: A dictionary of parameters to pass to the factory function - - Returns: - A new instance of the WorkflowNodeExecutionRepository - - Raises: - ValueError: If no factory function is registered for the workflow_node_execution repository type - """ - # We can safely cast here because we've registered a WorkflowNodeExecutionFactoryFunc - return cast(WorkflowNodeExecutionRepository, cls._create_repository("workflow_node_execution", params)) diff --git a/api/core/workflow/repository/workflow_execution_repository.py b/api/core/workflow/repository/workflow_execution_repository.py new file mode 100644 index 0000000000..a39a98ee33 --- /dev/null +++ b/api/core/workflow/repository/workflow_execution_repository.py @@ -0,0 +1,42 @@ +from typing import Optional, Protocol + +from core.workflow.entities.workflow_execution_entities import WorkflowExecution + + +class WorkflowExecutionRepository(Protocol): + """ + Repository interface for WorkflowExecution. + + This interface defines the contract for accessing and manipulating + WorkflowExecution data, regardless of the underlying storage mechanism. + + Note: Domain-specific concepts like multi-tenancy (tenant_id), application context (app_id), + and other implementation details should be handled at the implementation level, not in + the core interface. This keeps the core domain model clean and independent of specific + application domains or deployment scenarios. + """ + + def save(self, execution: WorkflowExecution) -> None: + """ + Save or update a WorkflowExecution instance. + + This method handles both creating new records and updating existing ones. + The implementation should determine whether to create or update based on + the execution's ID or other identifying fields. + + Args: + execution: The WorkflowExecution instance to save or update + """ + ... + + def get(self, execution_id: str) -> Optional[WorkflowExecution]: + """ + Retrieve a WorkflowExecution by its ID. + + Args: + execution_id: The workflow execution ID + + Returns: + The WorkflowExecution instance if found, None otherwise + """ + ... diff --git a/api/core/workflow/repository/workflow_node_execution_repository.py b/api/core/workflow/repository/workflow_node_execution_repository.py index 9bb790cb0f..3ca9e2ecab 100644 --- a/api/core/workflow/repository/workflow_node_execution_repository.py +++ b/api/core/workflow/repository/workflow_node_execution_repository.py @@ -2,12 +2,12 @@ from collections.abc import Sequence from dataclasses import dataclass from typing import Literal, Optional, Protocol -from models.workflow import WorkflowNodeExecution +from core.workflow.entities.node_execution_entities import NodeExecution @dataclass class OrderConfig: - """Configuration for ordering WorkflowNodeExecution instances.""" + """Configuration for ordering NodeExecution instances.""" order_by: list[str] order_direction: Optional[Literal["asc", "desc"]] = None @@ -15,10 +15,10 @@ class OrderConfig: class WorkflowNodeExecutionRepository(Protocol): """ - Repository interface for WorkflowNodeExecution. + Repository interface for NodeExecution. This interface defines the contract for accessing and manipulating - WorkflowNodeExecution data, regardless of the underlying storage mechanism. + NodeExecution data, regardless of the underlying storage mechanism. Note: Domain-specific concepts like multi-tenancy (tenant_id), application context (app_id), and trigger sources (triggered_from) should be handled at the implementation level, not in @@ -26,24 +26,28 @@ class WorkflowNodeExecutionRepository(Protocol): application domains or deployment scenarios. """ - def save(self, execution: WorkflowNodeExecution) -> None: + def save(self, execution: NodeExecution) -> None: """ - Save a WorkflowNodeExecution instance. + Save or update a NodeExecution instance. + + This method handles both creating new records and updating existing ones. + The implementation should determine whether to create or update based on + the execution's ID or other identifying fields. Args: - execution: The WorkflowNodeExecution instance to save + execution: The NodeExecution instance to save or update """ ... - def get_by_node_execution_id(self, node_execution_id: str) -> Optional[WorkflowNodeExecution]: + def get_by_node_execution_id(self, node_execution_id: str) -> Optional[NodeExecution]: """ - Retrieve a WorkflowNodeExecution by its node_execution_id. + Retrieve a NodeExecution by its node_execution_id. Args: node_execution_id: The node execution ID Returns: - The WorkflowNodeExecution instance if found, None otherwise + The NodeExecution instance if found, None otherwise """ ... @@ -51,9 +55,9 @@ class WorkflowNodeExecutionRepository(Protocol): self, workflow_run_id: str, order_config: Optional[OrderConfig] = None, - ) -> Sequence[WorkflowNodeExecution]: + ) -> Sequence[NodeExecution]: """ - Retrieve all WorkflowNodeExecution instances for a specific workflow run. + Retrieve all NodeExecution instances for a specific workflow run. Args: workflow_run_id: The workflow run ID @@ -62,34 +66,25 @@ class WorkflowNodeExecutionRepository(Protocol): order_config.order_direction: Direction to order ("asc" or "desc") Returns: - A list of WorkflowNodeExecution instances + A list of NodeExecution instances """ ... - def get_running_executions(self, workflow_run_id: str) -> Sequence[WorkflowNodeExecution]: + def get_running_executions(self, workflow_run_id: str) -> Sequence[NodeExecution]: """ - Retrieve all running WorkflowNodeExecution instances for a specific workflow run. + Retrieve all running NodeExecution instances for a specific workflow run. Args: workflow_run_id: The workflow run ID Returns: - A list of running WorkflowNodeExecution instances - """ - ... - - def update(self, execution: WorkflowNodeExecution) -> None: - """ - Update an existing WorkflowNodeExecution instance. - - Args: - execution: The WorkflowNodeExecution instance to update + A list of running NodeExecution instances """ ... def clear(self) -> None: """ - Clear all WorkflowNodeExecution records based on implementation-specific criteria. + Clear all NodeExecution records based on implementation-specific criteria. This method is intended to be used for bulk deletion operations, such as removing all records associated with a specific app_id and tenant_id in multi-tenant implementations. diff --git a/api/core/workflow/utils/condition/entities.py b/api/core/workflow/utils/condition/entities.py index 799c735f54..56871a15d8 100644 --- a/api/core/workflow/utils/condition/entities.py +++ b/api/core/workflow/utils/condition/entities.py @@ -39,7 +39,7 @@ class SubCondition(BaseModel): class SubVariableCondition(BaseModel): logical_operator: Literal["and", "or"] - conditions: list[SubCondition] = Field(default=list) + conditions: list[SubCondition] = Field(default_factory=list) class Condition(BaseModel): diff --git a/api/core/workflow/workflow_cycle_manager.py b/api/core/workflow/workflow_cycle_manager.py new file mode 100644 index 0000000000..24e23af093 --- /dev/null +++ b/api/core/workflow/workflow_cycle_manager.py @@ -0,0 +1,391 @@ +from collections.abc import Mapping +from datetime import UTC, datetime +from typing import Any, Optional, Union +from uuid import uuid4 + +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity +from core.app.entities.queue_entities import ( + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeInIterationFailedEvent, + QueueNodeInLoopFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, +) +from core.app.task_pipeline.exc import WorkflowRunNotFoundError +from core.ops.entities.trace_entity import TraceTaskName +from core.ops.ops_trace_manager import TraceQueueManager, TraceTask +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.entities.node_execution_entities import ( + NodeExecution, + NodeExecutionStatus, +) +from core.workflow.entities.workflow_execution_entities import WorkflowExecution, WorkflowExecutionStatus, WorkflowType +from core.workflow.enums import SystemVariableKey +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.workflow_entry import WorkflowEntry +from models import ( + Workflow, + WorkflowRun, + WorkflowRunStatus, +) + + +class WorkflowCycleManager: + def __init__( + self, + *, + application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], + workflow_system_variables: dict[SystemVariableKey, Any], + workflow_execution_repository: WorkflowExecutionRepository, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, + ) -> None: + self._application_generate_entity = application_generate_entity + self._workflow_system_variables = workflow_system_variables + self._workflow_execution_repository = workflow_execution_repository + self._workflow_node_execution_repository = workflow_node_execution_repository + + def handle_workflow_run_start( + self, + *, + session: Session, + workflow_id: str, + ) -> WorkflowExecution: + workflow_stmt = select(Workflow).where(Workflow.id == workflow_id) + workflow = session.scalar(workflow_stmt) + if not workflow: + raise ValueError(f"Workflow not found: {workflow_id}") + + max_sequence_stmt = select(func.max(WorkflowRun.sequence_number)).where( + WorkflowRun.tenant_id == workflow.tenant_id, + WorkflowRun.app_id == workflow.app_id, + ) + max_sequence = session.scalar(max_sequence_stmt) or 0 + new_sequence_number = max_sequence + 1 + + inputs = {**self._application_generate_entity.inputs} + for key, value in (self._workflow_system_variables or {}).items(): + if key.value == "conversation": + continue + inputs[f"sys.{key.value}"] = value + + # handle special values + inputs = dict(WorkflowEntry.handle_special_values(inputs) or {}) + + # init workflow run + # TODO: This workflow_run_id should always not be None, maybe we can use a more elegant way to handle this + execution_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID) or uuid4()) + execution = WorkflowExecution.new( + id=execution_id, + workflow_id=workflow.id, + sequence_number=new_sequence_number, + type=WorkflowType(workflow.type), + workflow_version=workflow.version, + graph=workflow.graph_dict, + inputs=inputs, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) + + self._workflow_execution_repository.save(execution) + + return execution + + def handle_workflow_run_success( + self, + *, + workflow_run_id: str, + total_tokens: int, + total_steps: int, + outputs: Mapping[str, Any] | None = None, + conversation_id: Optional[str] = None, + trace_manager: Optional[TraceQueueManager] = None, + ) -> WorkflowExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_run_id) + + outputs = WorkflowEntry.handle_special_values(outputs) + + workflow_execution.status = WorkflowExecutionStatus.SUCCEEDED + workflow_execution.outputs = outputs or {} + workflow_execution.total_tokens = total_tokens + workflow_execution.total_steps = total_steps + workflow_execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + + if trace_manager: + trace_manager.add_trace_task( + TraceTask( + TraceTaskName.WORKFLOW_TRACE, + workflow_execution=workflow_execution, + conversation_id=conversation_id, + user_id=trace_manager.user_id, + ) + ) + + self._workflow_execution_repository.save(workflow_execution) + return workflow_execution + + def handle_workflow_run_partial_success( + self, + *, + workflow_run_id: str, + total_tokens: int, + total_steps: int, + outputs: Mapping[str, Any] | None = None, + exceptions_count: int = 0, + conversation_id: Optional[str] = None, + trace_manager: Optional[TraceQueueManager] = None, + ) -> WorkflowExecution: + execution = self._get_workflow_execution_or_raise_error(workflow_run_id) + outputs = WorkflowEntry.handle_special_values(dict(outputs) if outputs else None) + + execution.status = WorkflowExecutionStatus.PARTIAL_SUCCEEDED + execution.outputs = outputs or {} + execution.total_tokens = total_tokens + execution.total_steps = total_steps + execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + execution.exceptions_count = exceptions_count + + if trace_manager: + trace_manager.add_trace_task( + TraceTask( + TraceTaskName.WORKFLOW_TRACE, + workflow_execution=execution, + conversation_id=conversation_id, + user_id=trace_manager.user_id, + ) + ) + + self._workflow_execution_repository.save(execution) + return execution + + def handle_workflow_run_failed( + self, + *, + workflow_run_id: str, + total_tokens: int, + total_steps: int, + status: WorkflowRunStatus, + error_message: str, + conversation_id: Optional[str] = None, + trace_manager: Optional[TraceQueueManager] = None, + exceptions_count: int = 0, + ) -> WorkflowExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_run_id) + + workflow_execution.status = WorkflowExecutionStatus(status.value) + workflow_execution.error_message = error_message + workflow_execution.total_tokens = total_tokens + workflow_execution.total_steps = total_steps + workflow_execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + workflow_execution.exceptions_count = exceptions_count + + # Use the instance repository to find running executions for a workflow run + running_node_executions = self._workflow_node_execution_repository.get_running_executions( + workflow_run_id=workflow_execution.id + ) + + # Update the domain models + now = datetime.now(UTC).replace(tzinfo=None) + for node_execution in running_node_executions: + if node_execution.node_execution_id: + # Update the domain model + node_execution.status = NodeExecutionStatus.FAILED + node_execution.error = error_message + node_execution.finished_at = now + node_execution.elapsed_time = (now - node_execution.created_at).total_seconds() + + # Update the repository with the domain model + self._workflow_node_execution_repository.save(node_execution) + + if trace_manager: + trace_manager.add_trace_task( + TraceTask( + TraceTaskName.WORKFLOW_TRACE, + workflow_execution=workflow_execution, + conversation_id=conversation_id, + user_id=trace_manager.user_id, + ) + ) + + self._workflow_execution_repository.save(workflow_execution) + return workflow_execution + + def handle_node_execution_start( + self, + *, + workflow_execution_id: str, + event: QueueNodeStartedEvent, + ) -> NodeExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_execution_id) + + # Create a domain model + created_at = datetime.now(UTC).replace(tzinfo=None) + metadata = { + NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id, + NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id, + NodeRunMetadataKey.LOOP_ID: event.in_loop_id, + } + + domain_execution = NodeExecution( + id=str(uuid4()), + workflow_id=workflow_execution.workflow_id, + workflow_run_id=workflow_execution.id, + predecessor_node_id=event.predecessor_node_id, + index=event.node_run_index, + node_execution_id=event.node_execution_id, + node_id=event.node_id, + node_type=event.node_type, + title=event.node_data.title, + status=NodeExecutionStatus.RUNNING, + metadata=metadata, + created_at=created_at, + ) + + # Use the instance repository to save the domain model + self._workflow_node_execution_repository.save(domain_execution) + + return domain_execution + + def handle_workflow_node_execution_success(self, *, event: QueueNodeSucceededEvent) -> NodeExecution: + # Get the domain model from repository + domain_execution = self._workflow_node_execution_repository.get_by_node_execution_id(event.node_execution_id) + if not domain_execution: + raise ValueError(f"Domain node execution not found: {event.node_execution_id}") + + # Process data + inputs = WorkflowEntry.handle_special_values(event.inputs) + process_data = WorkflowEntry.handle_special_values(event.process_data) + outputs = WorkflowEntry.handle_special_values(event.outputs) + + # Convert metadata keys to strings + execution_metadata_dict = {} + if event.execution_metadata: + for key, value in event.execution_metadata.items(): + execution_metadata_dict[key] = value + + finished_at = datetime.now(UTC).replace(tzinfo=None) + elapsed_time = (finished_at - event.start_at).total_seconds() + + # Update domain model + domain_execution.status = NodeExecutionStatus.SUCCEEDED + domain_execution.update_from_mapping( + inputs=inputs, process_data=process_data, outputs=outputs, metadata=execution_metadata_dict + ) + domain_execution.finished_at = finished_at + domain_execution.elapsed_time = elapsed_time + + # Update the repository with the domain model + self._workflow_node_execution_repository.save(domain_execution) + + return domain_execution + + def handle_workflow_node_execution_failed( + self, + *, + event: QueueNodeFailedEvent + | QueueNodeInIterationFailedEvent + | QueueNodeInLoopFailedEvent + | QueueNodeExceptionEvent, + ) -> NodeExecution: + """ + Workflow node execution failed + :param event: queue node failed event + :return: + """ + # Get the domain model from repository + domain_execution = self._workflow_node_execution_repository.get_by_node_execution_id(event.node_execution_id) + if not domain_execution: + raise ValueError(f"Domain node execution not found: {event.node_execution_id}") + + # Process data + inputs = WorkflowEntry.handle_special_values(event.inputs) + process_data = WorkflowEntry.handle_special_values(event.process_data) + outputs = WorkflowEntry.handle_special_values(event.outputs) + + # Convert metadata keys to strings + execution_metadata_dict = {} + if event.execution_metadata: + for key, value in event.execution_metadata.items(): + execution_metadata_dict[key] = value + + finished_at = datetime.now(UTC).replace(tzinfo=None) + elapsed_time = (finished_at - event.start_at).total_seconds() + + # Update domain model + domain_execution.status = ( + NodeExecutionStatus.FAILED + if not isinstance(event, QueueNodeExceptionEvent) + else NodeExecutionStatus.EXCEPTION + ) + domain_execution.error = event.error + domain_execution.update_from_mapping( + inputs=inputs, process_data=process_data, outputs=outputs, metadata=execution_metadata_dict + ) + domain_execution.finished_at = finished_at + domain_execution.elapsed_time = elapsed_time + + # Update the repository with the domain model + self._workflow_node_execution_repository.save(domain_execution) + + return domain_execution + + def handle_workflow_node_execution_retried( + self, *, workflow_execution_id: str, event: QueueNodeRetryEvent + ) -> NodeExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_execution_id) + created_at = event.start_at + finished_at = datetime.now(UTC).replace(tzinfo=None) + elapsed_time = (finished_at - created_at).total_seconds() + inputs = WorkflowEntry.handle_special_values(event.inputs) + outputs = WorkflowEntry.handle_special_values(event.outputs) + + # Convert metadata keys to strings + origin_metadata = { + NodeRunMetadataKey.ITERATION_ID: event.in_iteration_id, + NodeRunMetadataKey.PARALLEL_MODE_RUN_ID: event.parallel_mode_run_id, + NodeRunMetadataKey.LOOP_ID: event.in_loop_id, + } + + # Convert execution metadata keys to strings + execution_metadata_dict: dict[NodeRunMetadataKey, str | None] = {} + if event.execution_metadata: + for key, value in event.execution_metadata.items(): + execution_metadata_dict[key] = value + + merged_metadata = {**execution_metadata_dict, **origin_metadata} if execution_metadata_dict else origin_metadata + + # Create a domain model + domain_execution = NodeExecution( + id=str(uuid4()), + workflow_id=workflow_execution.workflow_id, + workflow_run_id=workflow_execution.id, + predecessor_node_id=event.predecessor_node_id, + node_execution_id=event.node_execution_id, + node_id=event.node_id, + node_type=event.node_type, + title=event.node_data.title, + status=NodeExecutionStatus.RETRY, + created_at=created_at, + finished_at=finished_at, + elapsed_time=elapsed_time, + error=event.error, + index=event.node_run_index, + ) + + # Update with mappings + domain_execution.update_from_mapping(inputs=inputs, outputs=outputs, metadata=merged_metadata) + + # Use the instance repository to save the domain model + self._workflow_node_execution_repository.save(domain_execution) + + return domain_execution + + def _get_workflow_execution_or_raise_error(self, id: str, /) -> WorkflowExecution: + execution = self._workflow_execution_repository.get(id) + if not execution: + raise WorkflowRunNotFoundError(id) + return execution diff --git a/api/extensions/ext_logging.py b/api/extensions/ext_logging.py index aa55862b7c..79d49aba5e 100644 --- a/api/extensions/ext_logging.py +++ b/api/extensions/ext_logging.py @@ -39,6 +39,10 @@ def init_app(app: DifyApp): handlers=log_handlers, force=True, ) + + # Apply RequestIdFormatter to all handlers + apply_request_id_formatter() + # Disable propagation for noisy loggers to avoid duplicate logs logging.getLogger("sqlalchemy.engine").propagate = False log_tz = dify_config.LOG_TZ @@ -74,3 +78,16 @@ class RequestIdFilter(logging.Filter): def filter(self, record): record.req_id = get_request_id() if flask.has_request_context() else "" return True + + +class RequestIdFormatter(logging.Formatter): + def format(self, record): + if not hasattr(record, "req_id"): + record.req_id = "" + return super().format(record) + + +def apply_request_id_formatter(): + for handler in logging.root.handlers: + if handler.formatter: + handler.formatter = RequestIdFormatter(dify_config.LOG_FORMAT, dify_config.LOG_DATEFORMAT) diff --git a/api/extensions/ext_login.py b/api/extensions/ext_login.py index 10fb89eb73..06f42494ec 100644 --- a/api/extensions/ext_login.py +++ b/api/extensions/ext_login.py @@ -3,11 +3,14 @@ import json import flask_login # type: ignore from flask import Response, request from flask_login import user_loaded_from_request, user_logged_in -from werkzeug.exceptions import Unauthorized +from werkzeug.exceptions import NotFound, Unauthorized -import contexts +from configs import dify_config from dify_app import DifyApp +from extensions.ext_database import db from libs.passport import PassportService +from models.account import Account, Tenant, TenantAccountJoin +from models.model import EndUser from services.account_service import AccountService login_manager = flask_login.LoginManager() @@ -17,35 +20,69 @@ login_manager = flask_login.LoginManager() @login_manager.request_loader def load_user_from_request(request_from_flask_login): """Load user based on the request.""" - if request.blueprint not in {"console", "inner_api"}: - return None - # Check if the user_id contains a dot, indicating the old format auth_header = request.headers.get("Authorization", "") - if not auth_header: - auth_token = request.args.get("_token") - if not auth_token: - raise Unauthorized("Invalid Authorization token.") - else: + auth_token: str | None = None + if auth_header: if " " not in auth_header: raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") - auth_scheme, auth_token = auth_header.split(None, 1) + auth_scheme, auth_token = auth_header.split(maxsplit=1) auth_scheme = auth_scheme.lower() if auth_scheme != "bearer": raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") + else: + auth_token = request.args.get("_token") - decoded = PassportService().verify(auth_token) - user_id = decoded.get("user_id") + # Check for admin API key authentication first + if dify_config.ADMIN_API_KEY_ENABLE and auth_header: + admin_api_key = dify_config.ADMIN_API_KEY + if admin_api_key and admin_api_key == auth_token: + workspace_id = request.headers.get("X-WORKSPACE-ID") + if workspace_id: + tenant_account_join = ( + db.session.query(Tenant, TenantAccountJoin) + .filter(Tenant.id == workspace_id) + .filter(TenantAccountJoin.tenant_id == Tenant.id) + .filter(TenantAccountJoin.role == "owner") + .one_or_none() + ) + if tenant_account_join: + tenant, ta = tenant_account_join + account = db.session.query(Account).filter_by(id=ta.account_id).first() + if account: + account.current_tenant = tenant + return account - logged_in_account = AccountService.load_logged_in_account(account_id=user_id) - return logged_in_account + if request.blueprint in {"console", "inner_api"}: + if not auth_token: + raise Unauthorized("Invalid Authorization token.") + decoded = PassportService().verify(auth_token) + user_id = decoded.get("user_id") + if not user_id: + raise Unauthorized("Invalid Authorization token.") + + logged_in_account = AccountService.load_logged_in_account(account_id=user_id) + return logged_in_account + elif request.blueprint == "web": + decoded = PassportService().verify(auth_token) + end_user_id = decoded.get("end_user_id") + if not end_user_id: + raise Unauthorized("Invalid Authorization token.") + end_user = db.session.query(EndUser).filter(EndUser.id == decoded["end_user_id"]).first() + if not end_user: + raise NotFound("End user not found.") + return end_user @user_logged_in.connect @user_loaded_from_request.connect def on_user_logged_in(_sender, user): - """Called when a user logged in.""" - if user: - contexts.tenant_id.set(user.current_tenant_id) + """Called when a user logged in. + + Note: AccountService.load_logged_in_account will populate user.current_tenant_id + through the load_user method, which calls account.set_tenant_id(). + """ + # tenant_id context variable removed - using current_user.current_tenant_id directly + pass @login_manager.unauthorized_handler diff --git a/api/extensions/ext_mail.py b/api/extensions/ext_mail.py index 9240ebe7fc..84bc12eca0 100644 --- a/api/extensions/ext_mail.py +++ b/api/extensions/ext_mail.py @@ -26,7 +26,7 @@ class Mail: match mail_type: case "resend": - import resend # type: ignore + import resend api_key = dify_config.RESEND_API_KEY if not api_key: diff --git a/api/extensions/ext_otel.py b/api/extensions/ext_otel.py index be47fdc6d6..6dcfa7bec6 100644 --- a/api/extensions/ext_otel.py +++ b/api/extensions/ext_otel.py @@ -6,27 +6,41 @@ import socket import sys from typing import Union +import flask from celery.signals import worker_init # type: ignore from flask_login import user_loaded_from_request, user_logged_in # type: ignore from configs import dify_config from dify_app import DifyApp +from models import Account, EndUser @user_logged_in.connect @user_loaded_from_request.connect -def on_user_loaded(_sender, user): +def on_user_loaded(_sender, user: Union["Account", "EndUser"]): if dify_config.ENABLE_OTEL: from opentelemetry.trace import get_current_span if user: - current_span = get_current_span() - if current_span: - current_span.set_attribute("service.tenant.id", user.current_tenant_id) - current_span.set_attribute("service.user.id", user.id) + try: + current_span = get_current_span() + if isinstance(user, Account) and user.current_tenant_id: + tenant_id = user.current_tenant_id + elif isinstance(user, EndUser): + tenant_id = user.tenant_id + else: + return + if current_span: + current_span.set_attribute("service.tenant.id", tenant_id) + current_span.set_attribute("service.user.id", user.id) + except Exception: + logging.exception("Error setting tenant and user attributes") + pass def init_app(app: DifyApp): + from opentelemetry.semconv.trace import SpanAttributes + def is_celery_worker(): return "celery" in sys.argv[0].lower() @@ -37,20 +51,32 @@ def init_app(app: DifyApp): def init_flask_instrumentor(app: DifyApp): meter = get_meter("http_metrics", version=dify_config.CURRENT_VERSION) _http_response_counter = meter.create_counter( - "http.server.response.count", description="Total number of HTTP responses by status code", unit="{response}" + "http.server.response.count", + description="Total number of HTTP responses by status code, method and target", + unit="{response}", ) def response_hook(span: Span, status: str, response_headers: list): if span and span.is_recording(): - if status.startswith("2"): - span.set_status(StatusCode.OK) - else: - span.set_status(StatusCode.ERROR, status) + try: + if status.startswith("2"): + span.set_status(StatusCode.OK) + else: + span.set_status(StatusCode.ERROR, status) - status = status.split(" ")[0] - status_code = int(status) - status_class = f"{status_code // 100}xx" - _http_response_counter.add(1, {"status_code": status_code, "status_class": status_class}) + status = status.split(" ")[0] + status_code = int(status) + status_class = f"{status_code // 100}xx" + attributes: dict[str, str | int] = {"status_code": status_code, "status_class": status_class} + request = flask.request + if request and request.url_rule: + attributes[SpanAttributes.HTTP_TARGET] = str(request.url_rule.rule) + if request and request.method: + attributes[SpanAttributes.HTTP_METHOD] = str(request.method) + _http_response_counter.add(1, attributes) + except Exception: + logging.exception("Error setting status and attributes") + pass instrumentor = FlaskInstrumentor() if dify_config.DEBUG: @@ -81,7 +107,7 @@ def init_app(app: DifyApp): class ExceptionLoggingHandler(logging.Handler): """Custom logging handler that creates spans for logging.exception() calls""" - def emit(self, record): + def emit(self, record: logging.LogRecord): try: if record.exc_info: tracer = get_tracer_provider().get_tracer("dify.exception.logging") @@ -96,15 +122,20 @@ def init_app(app: DifyApp): }, ) as span: span.set_status(StatusCode.ERROR) - span.record_exception(record.exc_info[1]) - span.set_attribute("exception.type", record.exc_info[0].__name__) - span.set_attribute("exception.message", str(record.exc_info[1])) + if record.exc_info[1]: + span.record_exception(record.exc_info[1]) + span.set_attribute("exception.message", str(record.exc_info[1])) + if record.exc_info[0]: + span.set_attribute("exception.type", record.exc_info[0].__name__) + except Exception: pass from opentelemetry import trace - from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter - from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter + from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter as GRPCMetricExporter + from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter as GRPCSpanExporter + from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter as HTTPMetricExporter + from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter as HTTPSpanExporter from opentelemetry.instrumentation.celery import CeleryInstrumentor from opentelemetry.instrumentation.flask import FlaskInstrumentor from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor @@ -147,19 +178,32 @@ def init_app(app: DifyApp): sampler = ParentBasedTraceIdRatio(dify_config.OTEL_SAMPLING_RATE) provider = TracerProvider(resource=resource, sampler=sampler) set_tracer_provider(provider) - exporter: Union[OTLPSpanExporter, ConsoleSpanExporter] - metric_exporter: Union[OTLPMetricExporter, ConsoleMetricExporter] + exporter: Union[GRPCSpanExporter, HTTPSpanExporter, ConsoleSpanExporter] + metric_exporter: Union[GRPCMetricExporter, HTTPMetricExporter, ConsoleMetricExporter] + protocol = (dify_config.OTEL_EXPORTER_OTLP_PROTOCOL or "").lower() if dify_config.OTEL_EXPORTER_TYPE == "otlp": - exporter = OTLPSpanExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces", - headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, - ) - metric_exporter = OTLPMetricExporter( - endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics", - headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, - ) + if protocol == "grpc": + exporter = GRPCSpanExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT, + # Header field names must consist of lowercase letters, check RFC7540 + headers=(("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),), + insecure=True, + ) + metric_exporter = GRPCMetricExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT, + headers=(("authorization", f"Bearer {dify_config.OTLP_API_KEY}"),), + insecure=True, + ) + else: + exporter = HTTPSpanExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/traces", + headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, + ) + metric_exporter = HTTPMetricExporter( + endpoint=dify_config.OTLP_BASE_ENDPOINT + "/v1/metrics", + headers={"Authorization": f"Bearer {dify_config.OTLP_API_KEY}"}, + ) else: - # Fallback to console exporter exporter = ConsoleSpanExporter() metric_exporter = ConsoleMetricExporter() diff --git a/api/extensions/ext_redis.py b/api/extensions/ext_redis.py index f8679f7e4b..c283b1b7ca 100644 --- a/api/extensions/ext_redis.py +++ b/api/extensions/ext_redis.py @@ -1,6 +1,7 @@ from typing import Any, Union import redis +from redis.cache import CacheConfig from redis.cluster import ClusterNode, RedisCluster from redis.connection import Connection, SSLConnection from redis.sentinel import Sentinel @@ -51,6 +52,14 @@ def init_app(app: DifyApp): connection_class: type[Union[Connection, SSLConnection]] = Connection if dify_config.REDIS_USE_SSL: connection_class = SSLConnection + resp_protocol = dify_config.REDIS_SERIALIZATION_PROTOCOL + if dify_config.REDIS_ENABLE_CLIENT_SIDE_CACHE: + if resp_protocol >= 3: + clientside_cache_config = CacheConfig() + else: + raise ValueError("Client side cache is only supported in RESP3") + else: + clientside_cache_config = None redis_params: dict[str, Any] = { "username": dify_config.REDIS_USERNAME, @@ -59,6 +68,8 @@ def init_app(app: DifyApp): "encoding": "utf-8", "encoding_errors": "strict", "decode_responses": False, + "protocol": resp_protocol, + "cache_config": clientside_cache_config, } if dify_config.REDIS_USE_SENTINEL: @@ -82,14 +93,22 @@ def init_app(app: DifyApp): ClusterNode(host=node.split(":")[0], port=int(node.split(":")[1])) for node in dify_config.REDIS_CLUSTERS.split(",") ] - # FIXME: mypy error here, try to figure out how to fix it - redis_client.initialize(RedisCluster(startup_nodes=nodes, password=dify_config.REDIS_CLUSTERS_PASSWORD)) # type: ignore + redis_client.initialize( + RedisCluster( + startup_nodes=nodes, + password=dify_config.REDIS_CLUSTERS_PASSWORD, + protocol=resp_protocol, + cache_config=clientside_cache_config, + ) + ) else: redis_params.update( { "host": dify_config.REDIS_HOST, "port": dify_config.REDIS_PORT, "connection_class": connection_class, + "protocol": resp_protocol, + "cache_config": clientside_cache_config, } ) pool = redis.ConnectionPool(**redis_params) diff --git a/api/extensions/ext_repositories.py b/api/extensions/ext_repositories.py deleted file mode 100644 index b8cfea121b..0000000000 --- a/api/extensions/ext_repositories.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Extension for initializing repositories. - -This extension registers repository implementations with the RepositoryFactory. -""" - -from core.repositories.repository_registry import register_repositories -from dify_app import DifyApp - - -def init_app(_app: DifyApp) -> None: - """ - Initialize repository implementations. - - Args: - _app: The Flask application instance (unused) - """ - register_repositories() diff --git a/api/extensions/ext_request_logging.py b/api/extensions/ext_request_logging.py new file mode 100644 index 0000000000..7c69483e0f --- /dev/null +++ b/api/extensions/ext_request_logging.py @@ -0,0 +1,73 @@ +import json +import logging + +import flask +import werkzeug.http +from flask import Flask +from flask.signals import request_finished, request_started + +from configs import dify_config + +_logger = logging.getLogger(__name__) + + +def _is_content_type_json(content_type: str) -> bool: + if not content_type: + return False + content_type_no_option, _ = werkzeug.http.parse_options_header(content_type) + return content_type_no_option.lower() == "application/json" + + +def _log_request_started(_sender, **_extra): + """Log the start of a request.""" + if not _logger.isEnabledFor(logging.DEBUG): + return + + request = flask.request + if not (_is_content_type_json(request.content_type) and request.data): + _logger.debug("Received Request %s -> %s", request.method, request.path) + return + try: + json_data = json.loads(request.data) + except (TypeError, ValueError): + _logger.exception("Failed to parse JSON request") + return + formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2) + _logger.debug( + "Received Request %s -> %s, Request Body:\n%s", + request.method, + request.path, + formatted_json, + ) + + +def _log_request_finished(_sender, response, **_extra): + """Log the end of a request.""" + if not _logger.isEnabledFor(logging.DEBUG) or response is None: + return + + if not _is_content_type_json(response.content_type): + _logger.debug("Response %s %s", response.status, response.content_type) + return + + response_data = response.get_data(as_text=True) + try: + json_data = json.loads(response_data) + except (TypeError, ValueError): + _logger.exception("Failed to parse JSON response") + return + formatted_json = json.dumps(json_data, ensure_ascii=False, indent=2) + _logger.debug( + "Response %s %s, Response Body:\n%s", + response.status, + response.content_type, + formatted_json, + ) + + +def init_app(app: Flask): + """Initialize the request logging extension.""" + if not dify_config.ENABLE_REQUEST_LOGGING: + return + request_started.connect(_log_request_started, app) + request_finished.connect(_log_request_finished, app) diff --git a/api/fields/app_fields.py b/api/fields/app_fields.py index 0142595e41..73c224542a 100644 --- a/api/fields/app_fields.py +++ b/api/fields/app_fields.py @@ -76,6 +76,7 @@ app_detail_fields = { "created_at": TimestampField, "updated_by": fields.String, "updated_at": TimestampField, + "access_mode": fields.String, } prompt_config_fields = { @@ -111,6 +112,9 @@ app_partial_fields = { "updated_by": fields.String, "updated_at": TimestampField, "tags": fields.List(fields.Nested(tag_fields)), + "access_mode": fields.String, + "create_user_name": fields.String, + "author_name": fields.String, } @@ -189,6 +193,7 @@ app_detail_fields_with_site = { "updated_by": fields.String, "updated_at": TimestampField, "deleted_tools": fields.List(fields.Nested(deleted_tool_fields)), + "access_mode": fields.String, } diff --git a/api/libs/login.py b/api/libs/login.py index be9478e850..e3a7fe2948 100644 --- a/api/libs/login.py +++ b/api/libs/login.py @@ -2,14 +2,11 @@ from functools import wraps from typing import Any from flask import current_app, g, has_request_context, request -from flask_login import user_logged_in # type: ignore from flask_login.config import EXEMPT_METHODS # type: ignore -from werkzeug.exceptions import Unauthorized from werkzeug.local import LocalProxy from configs import dify_config -from extensions.ext_database import db -from models.account import Account, Tenant, TenantAccountJoin +from models.account import Account from models.model import EndUser #: A proxy for the current user. If no user is logged in, this will be an @@ -53,36 +50,6 @@ def login_required(func): @wraps(func) def decorated_view(*args, **kwargs): - auth_header = request.headers.get("Authorization") - if dify_config.ADMIN_API_KEY_ENABLE: - if auth_header: - if " " not in auth_header: - raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") - auth_scheme, auth_token = auth_header.split(None, 1) - auth_scheme = auth_scheme.lower() - if auth_scheme != "bearer": - raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") - - admin_api_key = dify_config.ADMIN_API_KEY - if admin_api_key: - if admin_api_key == auth_token: - workspace_id = request.headers.get("X-WORKSPACE-ID") - if workspace_id: - tenant_account_join = ( - db.session.query(Tenant, TenantAccountJoin) - .filter(Tenant.id == workspace_id) - .filter(TenantAccountJoin.tenant_id == Tenant.id) - .filter(TenantAccountJoin.role == "owner") - .one_or_none() - ) - if tenant_account_join: - tenant, ta = tenant_account_join - account = db.session.query(Account).filter_by(id=ta.account_id).first() - # Login admin - if account: - account.current_tenant = tenant - current_app.login_manager._update_request_context_with_user(account) # type: ignore - user_logged_in.send(current_app._get_current_object(), user=_get_user()) # type: ignore if request.method in EXEMPT_METHODS or dify_config.LOGIN_DISABLED: pass elif not current_user.is_authenticated: diff --git a/api/libs/oauth_data_source.py b/api/libs/oauth_data_source.py index 1c151633f0..218109522d 100644 --- a/api/libs/oauth_data_source.py +++ b/api/libs/oauth_data_source.py @@ -61,13 +61,17 @@ class NotionOAuth(OAuthDataSource): "total": len(pages), } # save data source binding - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.access_token == access_token, + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.access_token == access_token, + ) ) - ).first() + .first() + ) if data_source_binding: data_source_binding.source_info = source_info data_source_binding.disabled = False @@ -97,13 +101,17 @@ class NotionOAuth(OAuthDataSource): "total": len(pages), } # save data source binding - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.access_token == access_token, + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.access_token == access_token, + ) ) - ).first() + .first() + ) if data_source_binding: data_source_binding.source_info = source_info data_source_binding.disabled = False @@ -121,14 +129,18 @@ class NotionOAuth(OAuthDataSource): def sync_data_source(self, binding_id: str): # save data source binding - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.id == binding_id, - DataSourceOauthBinding.disabled == False, + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.id == binding_id, + DataSourceOauthBinding.disabled == False, + ) ) - ).first() + .first() + ) if data_source_binding: # get all authorized pages pages = self.get_authorized_pages(data_source_binding.access_token) diff --git a/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py b/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py index 5b5656e7ed..00f2b15802 100644 --- a/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py +++ b/api/migrations/versions/2024_10_10_0516-bbadea11becb_add_name_and_size_to_tool_files.py @@ -5,45 +5,61 @@ Revises: 33f5fac87f29 Create Date: 2024-10-10 05:16:14.764268 """ -from alembic import op -import models as models + import sqlalchemy as sa -from sqlalchemy.dialects import postgresql +from alembic import op, context # revision identifiers, used by Alembic. -revision = 'bbadea11becb' -down_revision = 'd8e744d88ed6' +revision = "bbadea11becb" +down_revision = "d8e744d88ed6" branch_labels = None depends_on = None def upgrade(): + def _has_name_or_size_column() -> bool: + # We cannot access the database in offline mode, so assume + # the "name" and "size" columns do not exist. + if context.is_offline_mode(): + # Log a warning message to inform the user that the database schema cannot be inspected + # in offline mode, and the generated SQL may not accurately reflect the actual execution. + op.execute( + "-- Executing in offline mode, assuming the name and size columns do not exist.\n" + "-- The generated SQL may differ from what will actually be executed.\n" + "-- Please review the migration script carefully!" + ) + + return False + # Use SQLAlchemy inspector to get the columns of the 'tool_files' table + inspector = sa.inspect(conn) + columns = [col["name"] for col in inspector.get_columns("tool_files")] + + # If 'name' or 'size' columns already exist, exit the upgrade function + if "name" in columns or "size" in columns: + return True + return False + # ### commands auto generated by Alembic - please adjust! ### # Get the database connection conn = op.get_bind() - # Use SQLAlchemy inspector to get the columns of the 'tool_files' table - inspector = sa.inspect(conn) - columns = [col['name'] for col in inspector.get_columns('tool_files')] - - # If 'name' or 'size' columns already exist, exit the upgrade function - if 'name' in columns or 'size' in columns: + if _has_name_or_size_column(): return - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.add_column(sa.Column('name', sa.String(), nullable=True)) - batch_op.add_column(sa.Column('size', sa.Integer(), nullable=True)) + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.add_column(sa.Column("name", sa.String(), nullable=True)) + batch_op.add_column(sa.Column("size", sa.Integer(), nullable=True)) op.execute("UPDATE tool_files SET name = '' WHERE name IS NULL") op.execute("UPDATE tool_files SET size = -1 WHERE size IS NULL") - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('name', existing_type=sa.String(), nullable=False) - batch_op.alter_column('size', existing_type=sa.Integer(), nullable=False) + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.alter_column("name", existing_type=sa.String(), nullable=False) + batch_op.alter_column("size", existing_type=sa.Integer(), nullable=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.drop_column('size') - batch_op.drop_column('name') + with op.batch_alter_table("tool_files", schema=None) as batch_op: + batch_op.drop_column("size") + batch_op.drop_column("name") # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_12_23_1154-d7999dfa4aae_remove_workflow_node_executions_retry_.py b/api/migrations/versions/2024_12_23_1154-d7999dfa4aae_remove_workflow_node_executions_retry_.py index 07454b0917..adf6421e57 100644 --- a/api/migrations/versions/2024_12_23_1154-d7999dfa4aae_remove_workflow_node_executions_retry_.py +++ b/api/migrations/versions/2024_12_23_1154-d7999dfa4aae_remove_workflow_node_executions_retry_.py @@ -5,28 +5,38 @@ Revises: e1944c35e15e Create Date: 2024-12-23 11:54:15.344543 """ -from alembic import op -import models as models -import sqlalchemy as sa + +from alembic import op, context from sqlalchemy import inspect # revision identifiers, used by Alembic. -revision = 'd7999dfa4aae' -down_revision = 'e1944c35e15e' +revision = "d7999dfa4aae" +down_revision = "e1944c35e15e" branch_labels = None depends_on = None def upgrade(): - # Check if column exists before attempting to remove it - conn = op.get_bind() - inspector = inspect(conn) - has_column = 'retry_index' in [col['name'] for col in inspector.get_columns('workflow_node_executions')] + def _has_retry_index_column() -> bool: + if context.is_offline_mode(): + # Log a warning message to inform the user that the database schema cannot be inspected + # in offline mode, and the generated SQL may not accurately reflect the actual execution. + op.execute( + '-- Executing in offline mode: assuming the "retry_index" column does not exist.\n' + "-- The generated SQL may differ from what will actually be executed.\n" + "-- Please review the migration script carefully!" + ) + return False + conn = op.get_bind() + inspector = inspect(conn) + return "retry_index" in [col["name"] for col in inspector.get_columns("workflow_node_executions")] + + has_column = _has_retry_index_column() if has_column: - with op.batch_alter_table('workflow_node_executions', schema=None) as batch_op: - batch_op.drop_column('retry_index') + with op.batch_alter_table("workflow_node_executions", schema=None) as batch_op: + batch_op.drop_column("retry_index") def downgrade(): diff --git a/api/migrations/versions/2025_05_14_1403-d28f2004b072_add_index_for_workflow_conversation_.py b/api/migrations/versions/2025_05_14_1403-d28f2004b072_add_index_for_workflow_conversation_.py new file mode 100644 index 0000000000..19f6c01655 --- /dev/null +++ b/api/migrations/versions/2025_05_14_1403-d28f2004b072_add_index_for_workflow_conversation_.py @@ -0,0 +1,33 @@ +"""add index for workflow_conversation_variables.conversation_id + +Revision ID: d28f2004b072 +Revises: 6a9f914f656c +Create Date: 2025-05-14 14:03:36.713828 + +""" +from alembic import op +import models as models +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'd28f2004b072' +down_revision = '6a9f914f656c' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('workflow_conversation_variables', schema=None) as batch_op: + batch_op.create_index(batch_op.f('workflow_conversation_variables_conversation_id_idx'), ['conversation_id'], unique=False) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('workflow_conversation_variables', schema=None) as batch_op: + batch_op.drop_index(batch_op.f('workflow_conversation_variables_conversation_id_idx')) + + # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py b/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py new file mode 100644 index 0000000000..5bf394b21c --- /dev/null +++ b/api/migrations/versions/2025_05_15_1531-2adcbe1f5dfb_add_workflowdraftvariable_model.py @@ -0,0 +1,51 @@ +"""add WorkflowDraftVariable model + +Revision ID: 2adcbe1f5dfb +Revises: d28f2004b072 +Create Date: 2025-05-15 15:31:03.128680 + +""" + +import sqlalchemy as sa +from alembic import op + +import models as models + +# revision identifiers, used by Alembic. +revision = "2adcbe1f5dfb" +down_revision = "d28f2004b072" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + "workflow_draft_variables", + sa.Column("id", models.types.StringUUID(), server_default=sa.text("uuid_generate_v4()"), nullable=False), + sa.Column("created_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("updated_at", sa.DateTime(), server_default=sa.text("CURRENT_TIMESTAMP"), nullable=False), + sa.Column("app_id", models.types.StringUUID(), nullable=False), + sa.Column("last_edited_at", sa.DateTime(), nullable=True), + sa.Column("node_id", sa.String(length=255), nullable=False), + sa.Column("name", sa.String(length=255), nullable=False), + sa.Column("description", sa.String(length=255), nullable=False), + sa.Column("selector", sa.String(length=255), nullable=False), + sa.Column("value_type", sa.String(length=20), nullable=False), + sa.Column("value", sa.Text(), nullable=False), + sa.Column("visible", sa.Boolean(), nullable=False), + sa.Column("editable", sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint("id", name=op.f("workflow_draft_variables_pkey")), + sa.UniqueConstraint("app_id", "node_id", "name", name=op.f("workflow_draft_variables_app_id_key")), + ) + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + # Dropping `workflow_draft_variables` also drops any index associated with it. + op.drop_table("workflow_draft_variables") + + # ### end Alembic commands ### diff --git a/api/models/__init__.py b/api/models/__init__.py index e5a3567979..61cfe2665b 100644 --- a/api/models/__init__.py +++ b/api/models/__init__.py @@ -27,7 +27,7 @@ from .dataset import ( Whitelist, ) from .engine import db -from .enums import CreatedByRole, UserFrom, WorkflowRunTriggeredFrom +from .enums import CreatorUserRole, UserFrom, WorkflowRunTriggeredFrom from .model import ( ApiRequest, ApiToken, @@ -114,7 +114,7 @@ __all__ = [ "CeleryTaskSet", "Conversation", "ConversationVariable", - "CreatedByRole", + "CreatorUserRole", "DataSourceApiKeyAuthBinding", "DataSourceOauthBinding", "Dataset", diff --git a/api/models/account.py b/api/models/account.py index a0b8957fe1..7ffeefa980 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -1,9 +1,10 @@ import enum import json +from typing import Optional, cast from flask_login import UserMixin # type: ignore from sqlalchemy import func -from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.orm import Mapped, mapped_column, reconstructor from models.base import Base @@ -11,6 +12,66 @@ from .engine import db from .types import StringUUID +class TenantAccountRole(enum.StrEnum): + OWNER = "owner" + ADMIN = "admin" + EDITOR = "editor" + NORMAL = "normal" + DATASET_OPERATOR = "dataset_operator" + + @staticmethod + def is_valid_role(role: str) -> bool: + if not role: + return False + return role in { + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.NORMAL, + TenantAccountRole.DATASET_OPERATOR, + } + + @staticmethod + def is_privileged_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN} + + @staticmethod + def is_admin_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role == TenantAccountRole.ADMIN + + @staticmethod + def is_non_owner_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in { + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.NORMAL, + TenantAccountRole.DATASET_OPERATOR, + } + + @staticmethod + def is_editing_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR} + + @staticmethod + def is_dataset_edit_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in { + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.DATASET_OPERATOR, + } + + class AccountStatus(enum.StrEnum): PENDING = "pending" UNINITIALIZED = "uninitialized" @@ -40,54 +101,54 @@ class Account(UserMixin, Base): created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + @reconstructor + def init_on_load(self): + self.role: Optional[TenantAccountRole] = None + self._current_tenant: Optional[Tenant] = None + @property def is_password_set(self): return self.password is not None @property def current_tenant(self): - # FIXME: fix the type error later, because the type is important maybe cause some bugs - return self._current_tenant # type: ignore + return self._current_tenant @current_tenant.setter - def current_tenant(self, value: "Tenant"): - tenant = value - ta = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=self.id).first() + def current_tenant(self, tenant: "Tenant"): + ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=self.id).first() if ta: - tenant.current_role = ta.role - else: - tenant = None # type: ignore - - self._current_tenant = tenant + self.role = TenantAccountRole(ta.role) + self._current_tenant = tenant + return + self._current_tenant = None @property def current_tenant_id(self) -> str | None: return self._current_tenant.id if self._current_tenant else None - @current_tenant_id.setter - def current_tenant_id(self, value: str): - try: - tenant_account_join = ( + def set_tenant_id(self, tenant_id: str): + tenant_account_join = cast( + tuple[Tenant, TenantAccountJoin], + ( db.session.query(Tenant, TenantAccountJoin) - .filter(Tenant.id == value) + .filter(Tenant.id == tenant_id) .filter(TenantAccountJoin.tenant_id == Tenant.id) .filter(TenantAccountJoin.account_id == self.id) .one_or_none() - ) + ), + ) - if tenant_account_join: - tenant, ta = tenant_account_join - tenant.current_role = ta.role - else: - tenant = None - except Exception: - tenant = None + if not tenant_account_join: + return + tenant, join = tenant_account_join + self.role = join.role self._current_tenant = tenant @property def current_role(self): - return self._current_tenant.current_role + return self.role def get_status(self) -> AccountStatus: status_str = self.status @@ -107,23 +168,23 @@ class Account(UserMixin, Base): # check current_user.current_tenant.current_role in ['admin', 'owner'] @property def is_admin_or_owner(self): - return TenantAccountRole.is_privileged_role(self._current_tenant.current_role) + return TenantAccountRole.is_privileged_role(self.role) @property def is_admin(self): - return TenantAccountRole.is_admin_role(self._current_tenant.current_role) + return TenantAccountRole.is_admin_role(self.role) @property def is_editor(self): - return TenantAccountRole.is_editing_role(self._current_tenant.current_role) + return TenantAccountRole.is_editing_role(self.role) @property def is_dataset_editor(self): - return TenantAccountRole.is_dataset_edit_role(self._current_tenant.current_role) + return TenantAccountRole.is_dataset_edit_role(self.role) @property def is_dataset_operator(self): - return self._current_tenant.current_role == TenantAccountRole.DATASET_OPERATOR + return self.role == TenantAccountRole.DATASET_OPERATOR class TenantStatus(enum.StrEnum): @@ -131,67 +192,7 @@ class TenantStatus(enum.StrEnum): ARCHIVE = "archive" -class TenantAccountRole(enum.StrEnum): - OWNER = "owner" - ADMIN = "admin" - EDITOR = "editor" - NORMAL = "normal" - DATASET_OPERATOR = "dataset_operator" - - @staticmethod - def is_valid_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.OWNER, - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.NORMAL, - TenantAccountRole.DATASET_OPERATOR, - } - - @staticmethod - def is_privileged_role(role: str) -> bool: - if not role: - return False - return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN} - - @staticmethod - def is_admin_role(role: str) -> bool: - if not role: - return False - return role == TenantAccountRole.ADMIN - - @staticmethod - def is_non_owner_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.NORMAL, - TenantAccountRole.DATASET_OPERATOR, - } - - @staticmethod - def is_editing_role(role: str) -> bool: - if not role: - return False - return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR} - - @staticmethod - def is_dataset_edit_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.OWNER, - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.DATASET_OPERATOR, - } - - -class Tenant(db.Model): # type: ignore[name-defined] +class Tenant(Base): __tablename__ = "tenants" __table_args__ = (db.PrimaryKeyConstraint("id", name="tenant_pkey"),) @@ -220,7 +221,7 @@ class Tenant(db.Model): # type: ignore[name-defined] self.custom_config = json.dumps(value) -class TenantAccountJoin(db.Model): # type: ignore[name-defined] +class TenantAccountJoin(Base): __tablename__ = "tenant_account_joins" __table_args__ = ( db.PrimaryKeyConstraint("id", name="tenant_account_join_pkey"), @@ -239,7 +240,7 @@ class TenantAccountJoin(db.Model): # type: ignore[name-defined] updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class AccountIntegrate(db.Model): # type: ignore[name-defined] +class AccountIntegrate(Base): __tablename__ = "account_integrates" __table_args__ = ( db.PrimaryKeyConstraint("id", name="account_integrate_pkey"), @@ -256,7 +257,7 @@ class AccountIntegrate(db.Model): # type: ignore[name-defined] updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class InvitationCode(db.Model): # type: ignore[name-defined] +class InvitationCode(Base): __tablename__ = "invitation_codes" __table_args__ = ( db.PrimaryKeyConstraint("id", name="invitation_code_pkey"), diff --git a/api/models/api_based_extension.py b/api/models/api_based_extension.py index 6b6d808710..5a70e18622 100644 --- a/api/models/api_based_extension.py +++ b/api/models/api_based_extension.py @@ -2,6 +2,7 @@ import enum from sqlalchemy import func +from .base import Base from .engine import db from .types import StringUUID @@ -13,7 +14,7 @@ class APIBasedExtensionPoint(enum.Enum): APP_MODERATION_OUTPUT = "app.moderation.output" -class APIBasedExtension(db.Model): # type: ignore[name-defined] +class APIBasedExtension(Base): __tablename__ = "api_based_extensions" __table_args__ = ( db.PrimaryKeyConstraint("id", name="api_based_extension_pkey"), diff --git a/api/models/base.py b/api/models/base.py index da9509301a..bd120f5487 100644 --- a/api/models/base.py +++ b/api/models/base.py @@ -1,5 +1,7 @@ -from sqlalchemy.orm import declarative_base +from sqlalchemy.orm import DeclarativeBase from models.engine import metadata -Base = declarative_base(metadata=metadata) + +class Base(DeclarativeBase): + metadata = metadata diff --git a/api/models/dataset.py b/api/models/dataset.py index d6708ac88b..ad43d6f371 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -22,6 +22,7 @@ from extensions.ext_storage import storage from services.entities.knowledge_entities.knowledge_entities import ParentMode, Rule from .account import Account +from .base import Base from .engine import db from .model import App, Tag, TagBinding, UploadFile from .types import StringUUID @@ -33,7 +34,7 @@ class DatasetPermissionEnum(enum.StrEnum): PARTIAL_TEAM = "partial_members" -class Dataset(db.Model): # type: ignore[name-defined] +class Dataset(Base): __tablename__ = "datasets" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_pkey"), @@ -92,7 +93,8 @@ class Dataset(db.Model): # type: ignore[name-defined] @property def latest_process_rule(self): return ( - DatasetProcessRule.query.filter(DatasetProcessRule.dataset_id == self.id) + db.session.query(DatasetProcessRule) + .filter(DatasetProcessRule.dataset_id == self.id) .order_by(DatasetProcessRule.created_at.desc()) .first() ) @@ -137,7 +139,8 @@ class Dataset(db.Model): # type: ignore[name-defined] @property def word_count(self): return ( - Document.query.with_entities(func.coalesce(func.sum(Document.word_count))) + db.session.query(Document) + .with_entities(func.coalesce(func.sum(Document.word_count))) .filter(Document.dataset_id == self.id) .scalar() ) @@ -255,7 +258,7 @@ class Dataset(db.Model): # type: ignore[name-defined] return f"Vector_index_{normalized_dataset_id}_Node" -class DatasetProcessRule(db.Model): # type: ignore[name-defined] +class DatasetProcessRule(Base): __tablename__ = "dataset_process_rules" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_process_rule_pkey"), @@ -295,7 +298,7 @@ class DatasetProcessRule(db.Model): # type: ignore[name-defined] return None -class Document(db.Model): # type: ignore[name-defined] +class Document(Base): __tablename__ = "documents" __table_args__ = ( db.PrimaryKeyConstraint("id", name="document_pkey"), @@ -439,12 +442,13 @@ class Document(db.Model): # type: ignore[name-defined] @property def segment_count(self): - return DocumentSegment.query.filter(DocumentSegment.document_id == self.id).count() + return db.session.query(DocumentSegment).filter(DocumentSegment.document_id == self.id).count() @property def hit_count(self): return ( - DocumentSegment.query.with_entities(func.coalesce(func.sum(DocumentSegment.hit_count))) + db.session.query(DocumentSegment) + .with_entities(func.coalesce(func.sum(DocumentSegment.hit_count))) .filter(DocumentSegment.document_id == self.id) .scalar() ) @@ -635,7 +639,7 @@ class Document(db.Model): # type: ignore[name-defined] ) -class DocumentSegment(db.Model): # type: ignore[name-defined] +class DocumentSegment(Base): __tablename__ = "document_segments" __table_args__ = ( db.PrimaryKeyConstraint("id", name="document_segment_pkey"), @@ -786,7 +790,7 @@ class DocumentSegment(db.Model): # type: ignore[name-defined] return text -class ChildChunk(db.Model): # type: ignore[name-defined] +class ChildChunk(Base): __tablename__ = "child_chunks" __table_args__ = ( db.PrimaryKeyConstraint("id", name="child_chunk_pkey"), @@ -829,7 +833,7 @@ class ChildChunk(db.Model): # type: ignore[name-defined] return db.session.query(DocumentSegment).filter(DocumentSegment.id == self.segment_id).first() -class AppDatasetJoin(db.Model): # type: ignore[name-defined] +class AppDatasetJoin(Base): __tablename__ = "app_dataset_joins" __table_args__ = ( db.PrimaryKeyConstraint("id", name="app_dataset_join_pkey"), @@ -846,7 +850,7 @@ class AppDatasetJoin(db.Model): # type: ignore[name-defined] return db.session.get(App, self.app_id) -class DatasetQuery(db.Model): # type: ignore[name-defined] +class DatasetQuery(Base): __tablename__ = "dataset_queries" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_query_pkey"), @@ -863,7 +867,7 @@ class DatasetQuery(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=db.func.current_timestamp()) -class DatasetKeywordTable(db.Model): # type: ignore[name-defined] +class DatasetKeywordTable(Base): __tablename__ = "dataset_keyword_tables" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_keyword_table_pkey"), @@ -891,7 +895,7 @@ class DatasetKeywordTable(db.Model): # type: ignore[name-defined] return dct # get dataset - dataset = Dataset.query.filter_by(id=self.dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=self.dataset_id).first() if not dataset: return None if self.data_source_type == "database": @@ -908,7 +912,7 @@ class DatasetKeywordTable(db.Model): # type: ignore[name-defined] return None -class Embedding(db.Model): # type: ignore[name-defined] +class Embedding(Base): __tablename__ = "embeddings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="embedding_pkey"), @@ -932,7 +936,7 @@ class Embedding(db.Model): # type: ignore[name-defined] return cast(list[float], pickle.loads(self.embedding)) # noqa: S301 -class DatasetCollectionBinding(db.Model): # type: ignore[name-defined] +class DatasetCollectionBinding(Base): __tablename__ = "dataset_collection_bindings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_collection_bindings_pkey"), @@ -947,7 +951,7 @@ class DatasetCollectionBinding(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class TidbAuthBinding(db.Model): # type: ignore[name-defined] +class TidbAuthBinding(Base): __tablename__ = "tidb_auth_bindings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="tidb_auth_bindings_pkey"), @@ -967,7 +971,7 @@ class TidbAuthBinding(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class Whitelist(db.Model): # type: ignore[name-defined] +class Whitelist(Base): __tablename__ = "whitelists" __table_args__ = ( db.PrimaryKeyConstraint("id", name="whitelists_pkey"), @@ -979,7 +983,7 @@ class Whitelist(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class DatasetPermission(db.Model): # type: ignore[name-defined] +class DatasetPermission(Base): __tablename__ = "dataset_permissions" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_permission_pkey"), @@ -996,7 +1000,7 @@ class DatasetPermission(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class ExternalKnowledgeApis(db.Model): # type: ignore[name-defined] +class ExternalKnowledgeApis(Base): __tablename__ = "external_knowledge_apis" __table_args__ = ( db.PrimaryKeyConstraint("id", name="external_knowledge_apis_pkey"), @@ -1049,7 +1053,7 @@ class ExternalKnowledgeApis(db.Model): # type: ignore[name-defined] return dataset_bindings -class ExternalKnowledgeBindings(db.Model): # type: ignore[name-defined] +class ExternalKnowledgeBindings(Base): __tablename__ = "external_knowledge_bindings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="external_knowledge_bindings_pkey"), @@ -1070,7 +1074,7 @@ class ExternalKnowledgeBindings(db.Model): # type: ignore[name-defined] updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class DatasetAutoDisableLog(db.Model): # type: ignore[name-defined] +class DatasetAutoDisableLog(Base): __tablename__ = "dataset_auto_disable_logs" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_auto_disable_log_pkey"), @@ -1087,7 +1091,7 @@ class DatasetAutoDisableLog(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)")) -class RateLimitLog(db.Model): # type: ignore[name-defined] +class RateLimitLog(Base): __tablename__ = "rate_limit_logs" __table_args__ = ( db.PrimaryKeyConstraint("id", name="rate_limit_log_pkey"), @@ -1102,7 +1106,7 @@ class RateLimitLog(db.Model): # type: ignore[name-defined] created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)")) -class DatasetMetadata(db.Model): # type: ignore[name-defined] +class DatasetMetadata(Base): __tablename__ = "dataset_metadatas" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_metadata_pkey"), @@ -1121,7 +1125,7 @@ class DatasetMetadata(db.Model): # type: ignore[name-defined] updated_by = db.Column(StringUUID, nullable=True) -class DatasetMetadataBinding(db.Model): # type: ignore[name-defined] +class DatasetMetadataBinding(Base): __tablename__ = "dataset_metadata_bindings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="dataset_metadata_binding_pkey"), diff --git a/api/models/enums.py b/api/models/enums.py index 7b9500ebe4..4434c3fec8 100644 --- a/api/models/enums.py +++ b/api/models/enums.py @@ -1,7 +1,7 @@ from enum import StrEnum -class CreatedByRole(StrEnum): +class CreatorUserRole(StrEnum): ACCOUNT = "account" END_USER = "end_user" @@ -14,3 +14,10 @@ class UserFrom(StrEnum): class WorkflowRunTriggeredFrom(StrEnum): DEBUGGING = "debugging" APP_RUN = "app-run" + + +class DraftVariableType(StrEnum): + # node means that the correspond variable + NODE = "node" + SYS = "sys" + CONVERSATION = "conversation" diff --git a/api/models/model.py b/api/models/model.py index 516ea5afc2..85c44a81ff 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: import sqlalchemy as sa from flask import request -from flask_login import UserMixin # type: ignore +from flask_login import UserMixin from sqlalchemy import Float, Index, PrimaryKeyConstraint, func, text from sqlalchemy.orm import Mapped, Session, mapped_column @@ -25,13 +25,13 @@ from constants import DEFAULT_FILE_NUMBER_LIMITS from core.file import FILE_MODEL_IDENTITY, File, FileTransferMethod, FileType from core.file import helpers as file_helpers from libs.helper import generate_string -from models.base import Base -from models.enums import CreatedByRole -from models.workflow import WorkflowRunStatus from .account import Account, Tenant +from .base import Base from .engine import db +from .enums import CreatorUserRole from .types import StringUUID +from .workflow import WorkflowRunStatus if TYPE_CHECKING: from .workflow import Workflow @@ -298,6 +298,15 @@ class App(Base): def mcp_server(self): return db.session.query(AppMCPServer).filter(AppMCPServer.app_id == self.id).first() + @property + def author_name(self): + if self.created_by: + account = db.session.query(Account).filter(Account.id == self.created_by).first() + if account: + return account.name + + return None + class AppModelConfig(Base): __tablename__ = "app_model_configs" @@ -606,7 +615,7 @@ class InstalledApp(Base): return tenant -class Conversation(db.Model): # type: ignore[name-defined] +class Conversation(Base): __tablename__ = "conversations" __table_args__ = ( db.PrimaryKeyConstraint("id", name="conversation_pkey"), @@ -798,7 +807,7 @@ class Conversation(db.Model): # type: ignore[name-defined] for message in messages: if message.workflow_run: - status_counts[message.workflow_run.status] += 1 + status_counts[WorkflowRunStatus(message.workflow_run.status)] += 1 return ( { @@ -868,7 +877,7 @@ class Conversation(db.Model): # type: ignore[name-defined] } -class Message(db.Model): # type: ignore[name-defined] +class Message(Base): __tablename__ = "messages" __table_args__ = ( PrimaryKeyConstraint("id", name="message_pkey"), @@ -1215,7 +1224,7 @@ class Message(db.Model): # type: ignore[name-defined] ) -class MessageFeedback(db.Model): # type: ignore[name-defined] +class MessageFeedback(Base): __tablename__ = "message_feedbacks" __table_args__ = ( db.PrimaryKeyConstraint("id", name="message_feedback_pkey"), @@ -1241,8 +1250,23 @@ class MessageFeedback(db.Model): # type: ignore[name-defined] account = db.session.query(Account).filter(Account.id == self.from_account_id).first() return account + def to_dict(self): + return { + "id": str(self.id), + "app_id": str(self.app_id), + "conversation_id": str(self.conversation_id), + "message_id": str(self.message_id), + "rating": self.rating, + "content": self.content, + "from_source": self.from_source, + "from_end_user_id": str(self.from_end_user_id) if self.from_end_user_id else None, + "from_account_id": str(self.from_account_id) if self.from_account_id else None, + "created_at": self.created_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + } -class MessageFile(db.Model): # type: ignore[name-defined] + +class MessageFile(Base): __tablename__ = "message_files" __table_args__ = ( db.PrimaryKeyConstraint("id", name="message_file_pkey"), @@ -1259,7 +1283,7 @@ class MessageFile(db.Model): # type: ignore[name-defined] url: str | None = None, belongs_to: Literal["user", "assistant"] | None = None, upload_file_id: str | None = None, - created_by_role: CreatedByRole, + created_by_role: CreatorUserRole, created_by: str, ): self.message_id = message_id @@ -1283,7 +1307,7 @@ class MessageFile(db.Model): # type: ignore[name-defined] created_at: Mapped[datetime] = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) -class MessageAnnotation(db.Model): # type: ignore[name-defined] +class MessageAnnotation(Base): __tablename__ = "message_annotations" __table_args__ = ( db.PrimaryKeyConstraint("id", name="message_annotation_pkey"), @@ -1314,7 +1338,7 @@ class MessageAnnotation(db.Model): # type: ignore[name-defined] return account -class AppAnnotationHitHistory(db.Model): # type: ignore[name-defined] +class AppAnnotationHitHistory(Base): __tablename__ = "app_annotation_hit_histories" __table_args__ = ( db.PrimaryKeyConstraint("id", name="app_annotation_hit_histories_pkey"), @@ -1326,7 +1350,7 @@ class AppAnnotationHitHistory(db.Model): # type: ignore[name-defined] id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) app_id = db.Column(StringUUID, nullable=False) - annotation_id = db.Column(StringUUID, nullable=False) + annotation_id: Mapped[str] = db.Column(StringUUID, nullable=False) source = db.Column(db.Text, nullable=False) question = db.Column(db.Text, nullable=False) account_id = db.Column(StringUUID, nullable=False) @@ -1352,7 +1376,7 @@ class AppAnnotationHitHistory(db.Model): # type: ignore[name-defined] return account -class AppAnnotationSetting(db.Model): # type: ignore[name-defined] +class AppAnnotationSetting(Base): __tablename__ = "app_annotation_settings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="app_annotation_settings_pkey"), @@ -1368,26 +1392,6 @@ class AppAnnotationSetting(db.Model): # type: ignore[name-defined] updated_user_id = db.Column(StringUUID, nullable=False) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) - @property - def created_account(self): - account = ( - db.session.query(Account) - .join(AppAnnotationSetting, AppAnnotationSetting.created_user_id == Account.id) - .filter(AppAnnotationSetting.id == self.annotation_id) - .first() - ) - return account - - @property - def updated_account(self): - account = ( - db.session.query(Account) - .join(AppAnnotationSetting, AppAnnotationSetting.updated_user_id == Account.id) - .filter(AppAnnotationSetting.id == self.annotation_id) - .first() - ) - return account - @property def collection_binding_detail(self): from .dataset import DatasetCollectionBinding @@ -1426,7 +1430,7 @@ class EndUser(Base, UserMixin): ) id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) - tenant_id = db.Column(StringUUID, nullable=False) + tenant_id: Mapped[str] = db.Column(StringUUID, nullable=False) app_id = db.Column(StringUUID, nullable=True) type = db.Column(db.String(255), nullable=False) external_user_id = db.Column(db.String(255), nullable=True) @@ -1585,7 +1589,7 @@ class UploadFile(Base): size: int, extension: str, mime_type: str, - created_by_role: CreatedByRole, + created_by_role: CreatorUserRole, created_by: str, created_at: datetime, used: bool, diff --git a/api/models/provider.py b/api/models/provider.py index 567400702d..497cbefc61 100644 --- a/api/models/provider.py +++ b/api/models/provider.py @@ -2,8 +2,7 @@ from enum import Enum from sqlalchemy import func -from models.base import Base - +from .base import Base from .engine import db from .types import StringUUID diff --git a/api/models/source.py b/api/models/source.py index b9d7d91346..f6e0900ae6 100644 --- a/api/models/source.py +++ b/api/models/source.py @@ -9,7 +9,7 @@ from .engine import db from .types import StringUUID -class DataSourceOauthBinding(db.Model): # type: ignore[name-defined] +class DataSourceOauthBinding(Base): __tablename__ = "data_source_oauth_bindings" __table_args__ = ( db.PrimaryKeyConstraint("id", name="source_binding_pkey"), diff --git a/api/models/tools.py b/api/models/tools.py index 9148021404..31c97e8236 100644 --- a/api/models/tools.py +++ b/api/models/tools.py @@ -1,6 +1,6 @@ import json from datetime import datetime -from typing import Any, Optional, cast +from typing import Any, cast import sqlalchemy as sa from deprecated import deprecated @@ -173,10 +173,6 @@ class WorkflowToolProvider(Base): db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)") ) - @property - def schema_type(self) -> ApiProviderSchemaType: - return ApiProviderSchemaType.value_of(self.schema_type_str) - @property def user(self) -> Account | None: return db.session.query(Account).filter(Account.id == self.user_id).first() @@ -366,8 +362,11 @@ class DeprecatedPublishedAppTool(Base): db.UniqueConstraint("app_id", "user_id", name="unique_published_app_tool"), ) + id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) # id of the app app_id = db.Column(StringUUID, ForeignKey("apps.id"), nullable=False) + + user_id: Mapped[str] = db.Column(StringUUID, nullable=False) # who published this tool description = db.Column(db.Text, nullable=False) # llm_description of the tool, for LLM @@ -387,34 +386,3 @@ class DeprecatedPublishedAppTool(Base): @property def description_i18n(self) -> I18nObject: return I18nObject(**json.loads(self.description)) - - id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()")) - user_id: Mapped[str] = db.Column(StringUUID, nullable=False) - tenant_id: Mapped[str] = db.Column(StringUUID, nullable=False) - conversation_id: Mapped[Optional[str]] = db.Column(StringUUID, nullable=True) - file_key: Mapped[str] = db.Column(db.String(255), nullable=False) - mimetype: Mapped[str] = db.Column(db.String(255), nullable=False) - original_url: Mapped[Optional[str]] = db.Column(db.String(2048), nullable=True) - name: Mapped[str] = mapped_column(default="") - size: Mapped[int] = mapped_column(default=-1) - - def __init__( - self, - *, - user_id: str, - tenant_id: str, - conversation_id: Optional[str] = None, - file_key: str, - mimetype: str, - original_url: Optional[str] = None, - name: str, - size: int, - ): - self.user_id = user_id - self.tenant_id = tenant_id - self.conversation_id = conversation_id - self.file_key = file_key - self.mimetype = mimetype - self.original_url = original_url - self.name = name - self.size = size diff --git a/api/models/types.py b/api/models/types.py index cb6773e70c..e5581c3ab0 100644 --- a/api/models/types.py +++ b/api/models/types.py @@ -1,4 +1,7 @@ -from sqlalchemy import CHAR, TypeDecorator +import enum +from typing import Generic, TypeVar + +from sqlalchemy import CHAR, VARCHAR, TypeDecorator from sqlalchemy.dialects.postgresql import UUID @@ -24,3 +27,51 @@ class StringUUID(TypeDecorator): if value is None: return value return str(value) + + +_E = TypeVar("_E", bound=enum.StrEnum) + + +class EnumText(TypeDecorator, Generic[_E]): + impl = VARCHAR + cache_ok = True + + _length: int + _enum_class: type[_E] + + def __init__(self, enum_class: type[_E], length: int | None = None): + self._enum_class = enum_class + max_enum_value_len = max(len(e.value) for e in enum_class) + if length is not None: + if length < max_enum_value_len: + raise ValueError("length should be greater than enum value length.") + self._length = length + else: + # leave some rooms for future longer enum values. + self._length = max(max_enum_value_len, 20) + + def process_bind_param(self, value: _E | str | None, dialect): + if value is None: + return value + if isinstance(value, self._enum_class): + return value.value + elif isinstance(value, str): + self._enum_class(value) + return value + else: + raise TypeError(f"expected str or {self._enum_class}, got {type(value)}") + + def load_dialect_impl(self, dialect): + return dialect.type_descriptor(VARCHAR(self._length)) + + def process_result_value(self, value, dialect) -> _E | None: + if value is None: + return value + if not isinstance(value, str): + raise TypeError(f"expected str, got {type(value)}") + return self._enum_class(value) + + def compare_values(self, x, y): + if x is None or y is None: + return x is y + return x == y diff --git a/api/models/workflow.py b/api/models/workflow.py index da60617de5..ae341dd1b5 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -1,29 +1,37 @@ import json +import logging from collections.abc import Mapping, Sequence from datetime import UTC, datetime from enum import Enum, StrEnum -from typing import TYPE_CHECKING, Any, Optional, Self, Union +from typing import TYPE_CHECKING, Any, Optional, Union from uuid import uuid4 +from flask_login import current_user + +from core.variables import utils as variable_utils +from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID +from factories.variable_factory import build_segment + if TYPE_CHECKING: from models.model import AppMode import sqlalchemy as sa -from sqlalchemy import Index, PrimaryKeyConstraint, func +from sqlalchemy import UniqueConstraint, func from sqlalchemy.orm import Mapped, mapped_column -import contexts from constants import DEFAULT_FILE_NUMBER_LIMITS, HIDDEN_VALUE from core.helper import encrypter -from core.variables import SecretVariable, Variable +from core.variables import SecretVariable, Segment, SegmentType, Variable from factories import variable_factory from libs import helper -from models.base import Base -from models.enums import CreatedByRole from .account import Account +from .base import Base from .engine import db -from .types import StringUUID +from .enums import CreatorUserRole, DraftVariableType +from .types import EnumText, StringUUID + +_logger = logging.getLogger(__name__) if TYPE_CHECKING: from models.model import AppMode @@ -143,7 +151,7 @@ class Workflow(Base): conversation_variables: Sequence[Variable], marked_name: str = "", marked_comment: str = "", - ) -> Self: + ) -> "Workflow": workflow = Workflow() workflow.id = str(uuid4()) workflow.tenant_id = tenant_id @@ -192,7 +200,9 @@ class Workflow(Base): features["file_upload"]["number_limits"] = image_number_limits features["file_upload"]["allowed_file_upload_methods"] = image_transfer_methods features["file_upload"]["allowed_file_types"] = features["file_upload"].get("allowed_file_types", ["image"]) - features["file_upload"]["allowed_file_extensions"] = [] + features["file_upload"]["allowed_file_extensions"] = features["file_upload"].get( + "allowed_file_extensions", [] + ) del features["file_upload"]["image"] self._features = json.dumps(features) return self._features @@ -265,7 +275,16 @@ class Workflow(Base): if self._environment_variables is None: self._environment_variables = "{}" - tenant_id = contexts.tenant_id.get() + # Get tenant_id from current_user (Account or EndUser) + if isinstance(current_user, Account): + # Account user + tenant_id = current_user.current_tenant_id + else: + # EndUser + tenant_id = current_user.tenant_id + + if not tenant_id: + return [] environment_variables_dict: dict[str, Any] = json.loads(self._environment_variables) results = [ @@ -288,7 +307,17 @@ class Workflow(Base): self._environment_variables = "{}" return - tenant_id = contexts.tenant_id.get() + # Get tenant_id from current_user (Account or EndUser) + if isinstance(current_user, Account): + # Account user + tenant_id = current_user.current_tenant_id + else: + # EndUser + tenant_id = current_user.tenant_id + + if not tenant_id: + self._environment_variables = "{}" + return value = list(value) if any(var for var in value if not var.id): @@ -418,29 +447,29 @@ class WorkflowRun(Base): status: Mapped[str] = mapped_column(db.String(255)) # running, succeeded, failed, stopped, partial-succeeded outputs: Mapped[Optional[str]] = mapped_column(sa.Text, default="{}") error: Mapped[Optional[str]] = mapped_column(db.Text) - elapsed_time = db.Column(db.Float, nullable=False, server_default=sa.text("0")) + elapsed_time: Mapped[float] = mapped_column(db.Float, nullable=False, server_default=sa.text("0")) total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0")) - total_steps = db.Column(db.Integer, server_default=db.text("0")) + total_steps: Mapped[int] = mapped_column(db.Integer, server_default=db.text("0")) created_by_role: Mapped[str] = mapped_column(db.String(255)) # account, end_user - created_by = db.Column(StringUUID, nullable=False) - created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) - finished_at = db.Column(db.DateTime) - exceptions_count = db.Column(db.Integer, server_default=db.text("0")) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + finished_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime) + exceptions_count: Mapped[int] = mapped_column(db.Integer, server_default=db.text("0")) @property def created_by_account(self): - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(Account, self.created_by) if created_by_role == CreatedByRole.ACCOUNT else None + created_by_role = CreatorUserRole(self.created_by_role) + return db.session.get(Account, self.created_by) if created_by_role == CreatorUserRole.ACCOUNT else None @property def created_by_end_user(self): from models.model import EndUser - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(EndUser, self.created_by) if created_by_role == CreatedByRole.END_USER else None + created_by_role = CreatorUserRole(self.created_by_role) + return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @property - def graph_dict(self): + def graph_dict(self) -> Mapping[str, Any]: return json.loads(self.graph) if self.graph else {} @property @@ -634,24 +663,24 @@ class WorkflowNodeExecution(Base): @property def created_by_account(self): - created_by_role = CreatedByRole(self.created_by_role) + created_by_role = CreatorUserRole(self.created_by_role) # TODO(-LAN-): Avoid using db.session.get() here. - return db.session.get(Account, self.created_by) if created_by_role == CreatedByRole.ACCOUNT else None + return db.session.get(Account, self.created_by) if created_by_role == CreatorUserRole.ACCOUNT else None @property def created_by_end_user(self): from models.model import EndUser - created_by_role = CreatedByRole(self.created_by_role) + created_by_role = CreatorUserRole(self.created_by_role) # TODO(-LAN-): Avoid using db.session.get() here. - return db.session.get(EndUser, self.created_by) if created_by_role == CreatedByRole.END_USER else None + return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @property def inputs_dict(self): return json.loads(self.inputs) if self.inputs else None @property - def outputs_dict(self): + def outputs_dict(self) -> dict[str, Any] | None: return json.loads(self.outputs) if self.outputs else None @property @@ -659,8 +688,11 @@ class WorkflowNodeExecution(Base): return json.loads(self.process_data) if self.process_data else None @property - def execution_metadata_dict(self): - return json.loads(self.execution_metadata) if self.execution_metadata else None + def execution_metadata_dict(self) -> dict[str, Any]: + # When the metadata is unset, we return an empty dictionary instead of `None`. + # This approach streamlines the logic for the caller, making it easier to handle + # cases where metadata is absent. + return json.loads(self.execution_metadata) if self.execution_metadata else {} @property def extras(self): @@ -736,19 +768,18 @@ class WorkflowAppLog(Base): __tablename__ = "workflow_app_logs" __table_args__ = ( db.PrimaryKeyConstraint("id", name="workflow_app_log_pkey"), - db.Index("workflow_app_log_app_idx", "tenant_id", "app_id", "created_at"), - db.Index("workflow_app_log_workflow_run_idx", "workflow_run_id"), + db.Index("workflow_app_log_app_idx", "tenant_id", "app_id"), ) id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) - workflow_id = db.Column(StringUUID, nullable=False) + workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID) - created_from = db.Column(db.String(255), nullable=False) - created_by_role = db.Column(db.String(255), nullable=False) - created_by = db.Column(StringUUID, nullable=False) - created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + created_from: Mapped[str] = mapped_column(db.String(255), nullable=False) + created_by_role: Mapped[str] = mapped_column(db.String(255), nullable=False) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp()) @property def workflow_run(self): @@ -756,31 +787,28 @@ class WorkflowAppLog(Base): @property def created_by_account(self): - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(Account, self.created_by) if created_by_role == CreatedByRole.ACCOUNT else None + created_by_role = CreatorUserRole(self.created_by_role) + return db.session.get(Account, self.created_by) if created_by_role == CreatorUserRole.ACCOUNT else None @property def created_by_end_user(self): from models.model import EndUser - created_by_role = CreatedByRole(self.created_by_role) - return db.session.get(EndUser, self.created_by) if created_by_role == CreatedByRole.END_USER else None + created_by_role = CreatorUserRole(self.created_by_role) + return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None class ConversationVariable(Base): __tablename__ = "workflow_conversation_variables" - __table_args__ = ( - PrimaryKeyConstraint("id", "conversation_id", name="workflow_conversation_variables_pkey"), - Index("workflow__conversation_variables_app_id_idx", "app_id"), - Index("workflow__conversation_variables_created_at_idx", "created_at"), - ) id: Mapped[str] = mapped_column(StringUUID, primary_key=True) - conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False, primary_key=True) - app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - data = mapped_column(db.Text, nullable=False) - created_at = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp()) - updated_at = mapped_column( + conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False, primary_key=True, index=True) + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False, index=True) + data: Mapped[str] = mapped_column(db.Text, nullable=False) + created_at: Mapped[datetime] = mapped_column( + db.DateTime, nullable=False, server_default=func.current_timestamp(), index=True + ) + updated_at: Mapped[datetime] = mapped_column( db.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() ) @@ -803,3 +831,201 @@ class ConversationVariable(Base): def to_variable(self) -> Variable: mapping = json.loads(self.data) return variable_factory.build_conversation_variable_from_mapping(mapping) + + +# Only `sys.query` and `sys.files` could be modified. +_EDITABLE_SYSTEM_VARIABLE = frozenset(["query", "files"]) + + +def _naive_utc_datetime(): + return datetime.now(UTC).replace(tzinfo=None) + + +class WorkflowDraftVariable(Base): + @staticmethod + def unique_columns() -> list[str]: + return [ + "app_id", + "node_id", + "name", + ] + + __tablename__ = "workflow_draft_variables" + __table_args__ = (UniqueConstraint(*unique_columns()),) + + # id is the unique identifier of a draft variable. + id: Mapped[str] = mapped_column(StringUUID, primary_key=True, server_default=db.text("uuid_generate_v4()")) + + created_at: Mapped[datetime] = mapped_column( + db.DateTime, + nullable=False, + default=_naive_utc_datetime, + server_default=func.current_timestamp(), + ) + + updated_at: Mapped[datetime] = mapped_column( + db.DateTime, + nullable=False, + default=_naive_utc_datetime, + server_default=func.current_timestamp(), + onupdate=func.current_timestamp(), + ) + + # "`app_id` maps to the `id` field in the `model.App` model." + app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) + + # `last_edited_at` records when the value of a given draft variable + # is edited. + # + # If it's not edited after creation, its value is `None`. + last_edited_at: Mapped[datetime | None] = mapped_column( + db.DateTime, + nullable=True, + default=None, + ) + + # The `node_id` field is special. + # + # If the variable is a conversation variable or a system variable, then the value of `node_id` + # is `conversation` or `sys`, respective. + # + # Otherwise, if the variable is a variable belonging to a specific node, the value of `_node_id` is + # the identity of correspond node in graph definition. An example of node id is `"1745769620734"`. + # + # However, there's one caveat. The id of the first "Answer" node in chatflow is "answer". (Other + # "Answer" node conform the rules above.) + node_id: Mapped[str] = mapped_column(sa.String(255), nullable=False, name="node_id") + + # From `VARIABLE_PATTERN`, we may conclude that the length of a top level variable is less than + # 80 chars. + # + # ref: api/core/workflow/entities/variable_pool.py:18 + name: Mapped[str] = mapped_column(sa.String(255), nullable=False) + description: Mapped[str] = mapped_column( + sa.String(255), + default="", + nullable=False, + ) + + selector: Mapped[str] = mapped_column(sa.String(255), nullable=False, name="selector") + + value_type: Mapped[SegmentType] = mapped_column(EnumText(SegmentType, length=20)) + # JSON string + value: Mapped[str] = mapped_column(sa.Text, nullable=False, name="value") + + # visible + visible: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=True) + editable: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, default=False) + + def get_selector(self) -> list[str]: + selector = json.loads(self.selector) + if not isinstance(selector, list): + _logger.error( + "invalid selector loaded from database, type=%s, value=%s", + type(selector), + self.selector, + ) + raise ValueError("invalid selector.") + return selector + + def _set_selector(self, value: list[str]): + self.selector = json.dumps(value) + + def get_value(self) -> Segment | None: + return build_segment(json.loads(self.value)) + + def set_name(self, name: str): + self.name = name + self._set_selector([self.node_id, name]) + + def set_value(self, value: Segment): + self.value = json.dumps(value.value) + self.value_type = value.value_type + + def get_node_id(self) -> str | None: + if self.get_variable_type() == DraftVariableType.NODE: + return self.node_id + else: + return None + + def get_variable_type(self) -> DraftVariableType: + match self.node_id: + case DraftVariableType.CONVERSATION: + return DraftVariableType.CONVERSATION + case DraftVariableType.SYS: + return DraftVariableType.SYS + case _: + return DraftVariableType.NODE + + @classmethod + def _new( + cls, + *, + app_id: str, + node_id: str, + name: str, + value: Segment, + description: str = "", + ) -> "WorkflowDraftVariable": + variable = WorkflowDraftVariable() + variable.created_at = _naive_utc_datetime() + variable.updated_at = _naive_utc_datetime() + variable.description = description + variable.app_id = app_id + variable.node_id = node_id + variable.name = name + variable.set_value(value) + variable._set_selector(list(variable_utils.to_selector(node_id, name))) + return variable + + @classmethod + def new_conversation_variable( + cls, + *, + app_id: str, + name: str, + value: Segment, + ) -> "WorkflowDraftVariable": + variable = cls._new( + app_id=app_id, + node_id=CONVERSATION_VARIABLE_NODE_ID, + name=name, + value=value, + ) + return variable + + @classmethod + def new_sys_variable( + cls, + *, + app_id: str, + name: str, + value: Segment, + editable: bool = False, + ) -> "WorkflowDraftVariable": + variable = cls._new(app_id=app_id, node_id=SYSTEM_VARIABLE_NODE_ID, name=name, value=value) + variable.editable = editable + return variable + + @classmethod + def new_node_variable( + cls, + *, + app_id: str, + node_id: str, + name: str, + value: Segment, + visible: bool = True, + ) -> "WorkflowDraftVariable": + variable = cls._new(app_id=app_id, node_id=node_id, name=name, value=value) + variable.visible = visible + variable.editable = True + return variable + + @property + def edited(self): + return self.last_edited_at is not None + + +def is_system_variable_editable(name: str) -> bool: + return name in _EDITABLE_SYSTEM_VARIABLE diff --git a/api/pyproject.toml b/api/pyproject.toml index 92f50ced36..317a8a41b8 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "chardet~=5.1.0", "flask~=3.1.0", "flask-compress~=1.17", - "flask-cors~=4.0.0", + "flask-cors~=5.0.0", "flask-login~=0.6.3", "flask-migrate~=4.0.7", "flask-restful~=0.3.10", @@ -39,7 +39,7 @@ dependencies = [ "oci~=2.135.1", "openai~=1.61.0", "openpyxl~=3.1.5", - "opik~=1.3.4", + "opik~=1.7.25", "opentelemetry-api==1.27.0", "opentelemetry-distro==0.48b0", "opentelemetry-exporter-otlp==1.27.0", @@ -63,25 +63,24 @@ dependencies = [ "psycogreen~=1.0.2", "psycopg2-binary~=2.9.6", "pycryptodome==3.19.1", - "pydantic~=2.9.2", - "pydantic-extra-types~=2.9.0", - "pydantic-settings~=2.6.0", + "pydantic~=2.11.4", + "pydantic-extra-types~=2.10.3", + "pydantic-settings~=2.9.1", "pyjwt~=2.8.0", - "pypdfium2~=4.30.0", + "pypdfium2==4.30.0", "python-docx~=1.1.0", "python-dotenv==1.0.1", "pyyaml~=6.0.1", - "readabilipy==0.2.0", - "redis[hiredis]~=5.0.3", - "resend~=0.7.0", - "sentry-sdk[flask]~=1.44.1", + "readabilipy~=0.3.0", + "redis[hiredis]~=6.1.0", + "resend~=2.9.0", + "sentry-sdk[flask]~=2.28.0", "sqlalchemy~=2.0.29", "starlette==0.41.0", "tiktoken~=0.9.0", - "tokenizers~=0.15.0", - "transformers~=4.35.0", + "transformers~=4.51.0", "unstructured[docx,epub,md,ppt,pptx]~=0.16.1", - "weave~=0.51.34", + "weave~=0.51.0", "yarl~=1.18.3", "webvtt-py~=0.5.1", "sseclient-py>=1.8.0", @@ -151,6 +150,7 @@ dev = [ "types-tensorflow~=2.18.0", "types-tqdm~=4.67.0", "types-ujson~=5.10.0", + "boto3-stubs>=1.38.20", ] ############################################################ @@ -192,12 +192,12 @@ vdb = [ "pymilvus~=2.5.0", "pymochow==1.3.1", "pyobvector~=0.1.6", - "qdrant-client==1.7.3", + "qdrant-client==1.9.0", "tablestore==6.1.0", "tcvectordb~=1.6.4", "tidb-vector==0.0.9", "upstash-vector==0.6.0", - "volcengine-compat~=1.0.156", + "volcengine-compat~=1.0.0", "weaviate-client~=3.24.0", "xinference-client~=1.2.2", ] diff --git a/api/schedule/clean_messages.py b/api/schedule/clean_messages.py index 5e4d3ec323..f41f5264c7 100644 --- a/api/schedule/clean_messages.py +++ b/api/schedule/clean_messages.py @@ -1,4 +1,5 @@ import datetime +import logging import time import click @@ -20,6 +21,8 @@ from models.model import ( from models.web import SavedMessage from services.feature_service import FeatureService +_logger = logging.getLogger(__name__) + @app.celery.task(queue="dataset") def clean_messages(): @@ -46,7 +49,14 @@ def clean_messages(): break for message in messages: plan_sandbox_clean_message_day = message.created_at - app = App.query.filter_by(id=message.app_id).first() + app = db.session.query(App).filter_by(id=message.app_id).first() + if not app: + _logger.warning( + "Expected App record to exist, but none was found, app_id=%s, message_id=%s", + message.app_id, + message.id, + ) + continue features_cache_key = f"features:{app.tenant_id}" plan_cache = redis_client.get(features_cache_key) if plan_cache is None: diff --git a/api/schedule/clean_unused_datasets_task.py b/api/schedule/clean_unused_datasets_task.py index 4e7e443c2c..c0cd42a226 100644 --- a/api/schedule/clean_unused_datasets_task.py +++ b/api/schedule/clean_unused_datasets_task.py @@ -2,7 +2,7 @@ import datetime import time import click -from sqlalchemy import func +from sqlalchemy import func, select from werkzeug.exceptions import NotFound import app @@ -51,8 +51,9 @@ def clean_unused_datasets_task(): ) # Main query with join and filter - datasets = ( - Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id) + stmt = ( + select(Dataset) + .outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id) .outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id) .filter( Dataset.created_at < plan_sandbox_clean_day, @@ -60,9 +61,10 @@ def clean_unused_datasets_task(): func.coalesce(document_subquery_old.c.document_count, 0) > 0, ) .order_by(Dataset.created_at.desc()) - .paginate(page=1, per_page=50) ) + datasets = db.paginate(stmt, page=1, per_page=50) + except NotFound: break if datasets.items is None or len(datasets.items) == 0: @@ -99,7 +101,7 @@ def clean_unused_datasets_task(): # update document update_params = {Document.enabled: False} - Document.query.filter_by(dataset_id=dataset.id).update(update_params) + db.session.query(Document).filter_by(dataset_id=dataset.id).update(update_params) db.session.commit() click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green")) except Exception as e: @@ -135,8 +137,9 @@ def clean_unused_datasets_task(): ) # Main query with join and filter - datasets = ( - Dataset.query.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id) + stmt = ( + select(Dataset) + .outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id) .outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id) .filter( Dataset.created_at < plan_pro_clean_day, @@ -144,8 +147,8 @@ def clean_unused_datasets_task(): func.coalesce(document_subquery_old.c.document_count, 0) > 0, ) .order_by(Dataset.created_at.desc()) - .paginate(page=1, per_page=50) ) + datasets = db.paginate(stmt, page=1, per_page=50) except NotFound: break @@ -175,7 +178,7 @@ def clean_unused_datasets_task(): # update document update_params = {Document.enabled: False} - Document.query.filter_by(dataset_id=dataset.id).update(update_params) + db.session.query(Document).filter_by(dataset_id=dataset.id).update(update_params) db.session.commit() click.echo( click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green") diff --git a/api/schedule/create_tidb_serverless_task.py b/api/schedule/create_tidb_serverless_task.py index 1c985461c6..8a02278de8 100644 --- a/api/schedule/create_tidb_serverless_task.py +++ b/api/schedule/create_tidb_serverless_task.py @@ -19,7 +19,9 @@ def create_tidb_serverless_task(): while True: try: # check the number of idle tidb serverless - idle_tidb_serverless_number = TidbAuthBinding.query.filter(TidbAuthBinding.active == False).count() + idle_tidb_serverless_number = ( + db.session.query(TidbAuthBinding).filter(TidbAuthBinding.active == False).count() + ) if idle_tidb_serverless_number >= tidb_serverless_number: break # create tidb serverless diff --git a/api/schedule/mail_clean_document_notify_task.py b/api/schedule/mail_clean_document_notify_task.py index b3d0e09784..5ee813e1de 100644 --- a/api/schedule/mail_clean_document_notify_task.py +++ b/api/schedule/mail_clean_document_notify_task.py @@ -29,7 +29,9 @@ def mail_clean_document_notify_task(): # send document clean notify mail try: - dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter(DatasetAutoDisableLog.notified == False).all() + dataset_auto_disable_logs = ( + db.session.query(DatasetAutoDisableLog).filter(DatasetAutoDisableLog.notified == False).all() + ) # group by tenant_id dataset_auto_disable_logs_map: dict[str, list[DatasetAutoDisableLog]] = defaultdict(list) for dataset_auto_disable_log in dataset_auto_disable_logs: @@ -43,14 +45,16 @@ def mail_clean_document_notify_task(): if plan != "sandbox": knowledge_details = [] # check tenant - tenant = Tenant.query.filter(Tenant.id == tenant_id).first() + tenant = db.session.query(Tenant).filter(Tenant.id == tenant_id).first() if not tenant: continue # check current owner - current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first() + current_owner_join = ( + db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first() + ) if not current_owner_join: continue - account = Account.query.filter(Account.id == current_owner_join.account_id).first() + account = db.session.query(Account).filter(Account.id == current_owner_join.account_id).first() if not account: continue @@ -63,7 +67,7 @@ def mail_clean_document_notify_task(): ) for dataset_id, document_ids in dataset_auto_dataset_map.items(): - dataset = Dataset.query.filter(Dataset.id == dataset_id).first() + dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first() if dataset: document_count = len(document_ids) knowledge_details.append(rf"Knowledge base {dataset.name}: {document_count} documents") diff --git a/api/schedule/update_tidb_serverless_status_task.py b/api/schedule/update_tidb_serverless_status_task.py index 11a39e60ee..ce4ecb6e7c 100644 --- a/api/schedule/update_tidb_serverless_status_task.py +++ b/api/schedule/update_tidb_serverless_status_task.py @@ -5,6 +5,7 @@ import click import app from configs import dify_config from core.rag.datasource.vdb.tidb_on_qdrant.tidb_service import TidbService +from extensions.ext_database import db from models.dataset import TidbAuthBinding @@ -14,9 +15,11 @@ def update_tidb_serverless_status_task(): start_at = time.perf_counter() try: # check the number of idle tidb serverless - tidb_serverless_list = TidbAuthBinding.query.filter( - TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING" - ).all() + tidb_serverless_list = ( + db.session.query(TidbAuthBinding) + .filter(TidbAuthBinding.active == False, TidbAuthBinding.status == "CREATING") + .all() + ) if len(tidb_serverless_list) == 0: return # update tidb serverless status diff --git a/api/services/account_service.py b/api/services/account_service.py index f930ef910b..ac84a46299 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -49,7 +49,7 @@ from services.errors.account import ( RoleAlreadyAssignedError, TenantNotFoundError, ) -from services.errors.workspace import WorkSpaceNotAllowedCreateError +from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError from services.feature_service import FeatureService from tasks.delete_account_task import delete_account_task from tasks.mail_account_deletion_task import send_account_deletion_verification_code @@ -108,17 +108,20 @@ class AccountService: if account.status == AccountStatus.BANNED.value: raise Unauthorized("Account is banned.") - current_tenant = TenantAccountJoin.query.filter_by(account_id=account.id, current=True).first() + current_tenant = db.session.query(TenantAccountJoin).filter_by(account_id=account.id, current=True).first() if current_tenant: - account.current_tenant_id = current_tenant.tenant_id + account.set_tenant_id(current_tenant.tenant_id) else: available_ta = ( - TenantAccountJoin.query.filter_by(account_id=account.id).order_by(TenantAccountJoin.id.asc()).first() + db.session.query(TenantAccountJoin) + .filter_by(account_id=account.id) + .order_by(TenantAccountJoin.id.asc()) + .first() ) if not available_ta: return None - account.current_tenant_id = available_ta.tenant_id + account.set_tenant_id(available_ta.tenant_id) available_ta.current = True db.session.commit() @@ -297,9 +300,9 @@ class AccountService: """Link account integrate""" try: # Query whether there is an existing binding record for the same provider - account_integrate: Optional[AccountIntegrate] = AccountIntegrate.query.filter_by( - account_id=account.id, provider=provider - ).first() + account_integrate: Optional[AccountIntegrate] = ( + db.session.query(AccountIntegrate).filter_by(account_id=account.id, provider=provider).first() + ) if account_integrate: # If it exists, update the record @@ -612,7 +615,10 @@ class TenantService: ): """Check if user have a workspace or not""" available_ta = ( - TenantAccountJoin.query.filter_by(account_id=account.id).order_by(TenantAccountJoin.id.asc()).first() + db.session.query(TenantAccountJoin) + .filter_by(account_id=account.id) + .order_by(TenantAccountJoin.id.asc()) + .first() ) if available_ta: @@ -622,6 +628,10 @@ class TenantService: if not FeatureService.get_system_features().is_allow_create_workspace and not is_setup: raise WorkSpaceNotAllowedCreateError() + workspaces = FeatureService.get_system_features().license.workspaces + if not workspaces.is_available(): + raise WorkspacesLimitExceededError() + if name: tenant = TenantService.create_tenant(name=name, is_setup=is_setup) else: @@ -666,7 +676,7 @@ class TenantService: if not tenant: raise TenantNotFoundError("Tenant not found.") - ta = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=account.id).first() + ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() if ta: tenant.role = ta.role else: @@ -695,12 +705,12 @@ class TenantService: if not tenant_account_join: raise AccountNotLinkTenantError("Tenant not found or account is not a member of the tenant.") else: - TenantAccountJoin.query.filter( + db.session.query(TenantAccountJoin).filter( TenantAccountJoin.account_id == account.id, TenantAccountJoin.tenant_id != tenant_id ).update({"current": False}) tenant_account_join.current = True # Set the current tenant for the account - account.current_tenant_id = tenant_account_join.tenant_id + account.set_tenant_id(tenant_account_join.tenant_id) db.session.commit() @staticmethod @@ -787,7 +797,7 @@ class TenantService: if operator.id == member.id: raise CannotOperateSelfError("Cannot operate self.") - ta_operator = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=operator.id).first() + ta_operator = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=operator.id).first() if not ta_operator or ta_operator.role not in perms[action]: raise NoPermissionError(f"No permission to {action} member.") @@ -800,7 +810,7 @@ class TenantService: TenantService.check_member_permission(tenant, operator, account, "remove") - ta = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=account.id).first() + ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() if not ta: raise MemberNotInTenantError("Member not in tenant.") @@ -812,15 +822,23 @@ class TenantService: """Update member role""" TenantService.check_member_permission(tenant, operator, member, "update") - target_member_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=member.id).first() + target_member_join = ( + db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=member.id).first() + ) + + if not target_member_join: + raise MemberNotInTenantError("Member not in tenant.") if target_member_join.role == new_role: raise RoleAlreadyAssignedError("The provided role is already assigned to the member.") if new_role == "owner": # Find the current owner and change their role to 'admin' - current_owner_join = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, role="owner").first() - current_owner_join.role = "admin" + current_owner_join = ( + db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first() + ) + if current_owner_join: + current_owner_join.role = "admin" # Update the role of the target member target_member_join.role = new_role @@ -837,7 +855,7 @@ class TenantService: @staticmethod def get_custom_config(tenant_id: str) -> dict: - tenant = Tenant.query.filter(Tenant.id == tenant_id).one_or_404() + tenant = db.get_or_404(Tenant, tenant_id) return cast(dict, tenant.custom_config_dict) @@ -914,7 +932,11 @@ class RegisterService: if open_id is not None and provider is not None: AccountService.link_account_integrate(provider, open_id, account) - if FeatureService.get_system_features().is_allow_create_workspace and create_workspace_required: + if ( + FeatureService.get_system_features().is_allow_create_workspace + and create_workspace_required + and FeatureService.get_system_features().license.workspaces.is_available() + ): tenant = TenantService.create_tenant(f"{account.name}'s Workspace") TenantService.create_tenant_member(tenant, account, role="owner") account.current_tenant = tenant @@ -959,7 +981,7 @@ class RegisterService: TenantService.switch_tenant(account, tenant.id) else: TenantService.check_member_permission(tenant, inviter, account, "add") - ta = TenantAccountJoin.query.filter_by(tenant_id=tenant.id, account_id=account.id).first() + ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() if not ta: TenantService.create_tenant_member(tenant, account, role) diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index ae7b372b82..8c950abc24 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -4,7 +4,7 @@ from typing import cast import pandas as pd from flask_login import current_user -from sqlalchemy import or_ +from sqlalchemy import or_, select from werkzeug.datastructures import FileStorage from werkzeug.exceptions import NotFound @@ -124,8 +124,9 @@ class AppAnnotationService: if not app: raise NotFound("App not found") if keyword: - annotations = ( - MessageAnnotation.query.filter(MessageAnnotation.app_id == app_id) + stmt = ( + select(MessageAnnotation) + .filter(MessageAnnotation.app_id == app_id) .filter( or_( MessageAnnotation.question.ilike("%{}%".format(keyword)), @@ -133,14 +134,14 @@ class AppAnnotationService: ) ) .order_by(MessageAnnotation.created_at.desc(), MessageAnnotation.id.desc()) - .paginate(page=page, per_page=limit, max_per_page=100, error_out=False) ) else: - annotations = ( - MessageAnnotation.query.filter(MessageAnnotation.app_id == app_id) + stmt = ( + select(MessageAnnotation) + .filter(MessageAnnotation.app_id == app_id) .order_by(MessageAnnotation.created_at.desc(), MessageAnnotation.id.desc()) - .paginate(page=page, per_page=limit, max_per_page=100, error_out=False) ) + annotations = db.paginate(select=stmt, page=page, per_page=limit, max_per_page=100, error_out=False) return annotations.items, annotations.total @classmethod @@ -325,13 +326,16 @@ class AppAnnotationService: if not annotation: raise NotFound("Annotation not found") - annotation_hit_histories = ( - AppAnnotationHitHistory.query.filter( + stmt = ( + select(AppAnnotationHitHistory) + .filter( AppAnnotationHitHistory.app_id == app_id, AppAnnotationHitHistory.annotation_id == annotation_id, ) .order_by(AppAnnotationHitHistory.created_at.desc()) - .paginate(page=page, per_page=limit, max_per_page=100, error_out=False) + ) + annotation_hit_histories = db.paginate( + select=stmt, page=page, per_page=limit, max_per_page=100, error_out=False ) return annotation_hit_histories.items, annotation_hit_histories.total diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index a2775fe6ad..d2875180d8 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -40,7 +40,7 @@ IMPORT_INFO_REDIS_KEY_PREFIX = "app_import_info:" CHECK_DEPENDENCIES_REDIS_KEY_PREFIX = "app_check_dependencies:" IMPORT_INFO_REDIS_EXPIRY = 10 * 60 # 10 minutes DSL_MAX_SIZE = 10 * 1024 * 1024 # 10MB -CURRENT_DSL_VERSION = "0.2.0" +CURRENT_DSL_VERSION = "0.3.0" class ImportMode(StrEnum): diff --git a/api/services/app_service.py b/api/services/app_service.py index 2fae479e05..ebebf8fa58 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -18,8 +18,10 @@ from core.tools.utils.configuration import ToolParameterConfigurationManager from events.app_event import app_was_created from extensions.ext_database import db from models.account import Account -from models.model import App, AppMode, AppModelConfig +from models.model import App, AppMode, AppModelConfig, Site from models.tools import ApiToolProvider +from services.enterprise.enterprise_service import EnterpriseService +from services.feature_service import FeatureService from services.tag_service import TagService from tasks.remove_app_and_related_data_task import remove_app_and_related_data_task @@ -155,6 +157,10 @@ class AppService: app_was_created.send(app, account=account) + if FeatureService.get_system_features().webapp_auth.enabled: + # update web app setting as private + EnterpriseService.WebAppAuth.update_app_access_mode(app.id, "private") + return app def get_app(self, app: App) -> App: @@ -307,6 +313,10 @@ class AppService: db.session.delete(app) db.session.commit() + # clean up web app settings + if FeatureService.get_system_features().webapp_auth.enabled: + EnterpriseService.WebAppAuth.cleanup_webapp(app.id) + # Trigger asynchronous deletion of app and related data remove_app_and_related_data_task.delay(tenant_id=app.tenant_id, app_id=app.id) @@ -373,3 +383,15 @@ class AppService: meta["tool_icons"][tool_name] = {"background": "#252525", "content": "\ud83d\ude01"} return meta + + @staticmethod + def get_app_code_by_id(app_id: str) -> str: + """ + Get app code by app id + :param app_id: app id + :return: app code + """ + site = db.session.query(Site).filter(Site.app_id == app_id).first() + if not site: + raise ValueError(f"App with id {app_id} not found") + return str(site.code) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index de90355ebf..4a5e9b3520 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -9,7 +9,7 @@ from collections import Counter from typing import Any, Optional from flask_login import current_user -from sqlalchemy import func +from sqlalchemy import func, select from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound @@ -77,11 +77,13 @@ from tasks.sync_website_document_indexing_task import sync_website_document_inde class DatasetService: @staticmethod def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None, include_all=False): - query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc()) + query = select(Dataset).filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc()) if user: # get permitted dataset ids - dataset_permission = DatasetPermission.query.filter_by(account_id=user.id, tenant_id=tenant_id).all() + dataset_permission = ( + db.session.query(DatasetPermission).filter_by(account_id=user.id, tenant_id=tenant_id).all() + ) permitted_dataset_ids = {dp.dataset_id for dp in dataset_permission} if dataset_permission else None if user.current_role == TenantAccountRole.DATASET_OPERATOR: @@ -129,7 +131,7 @@ class DatasetService: else: return [], 0 - datasets = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False) + datasets = db.paginate(select=query, page=page, per_page=per_page, max_per_page=100, error_out=False) return datasets.items, datasets.total @@ -153,9 +155,10 @@ class DatasetService: @staticmethod def get_datasets_by_ids(ids, tenant_id): - datasets = Dataset.query.filter(Dataset.id.in_(ids), Dataset.tenant_id == tenant_id).paginate( - page=1, per_page=len(ids), max_per_page=len(ids), error_out=False - ) + stmt = select(Dataset).filter(Dataset.id.in_(ids), Dataset.tenant_id == tenant_id) + + datasets = db.paginate(select=stmt, page=1, per_page=len(ids), max_per_page=len(ids), error_out=False) + return datasets.items, datasets.total @staticmethod @@ -174,7 +177,7 @@ class DatasetService: retrieval_model: Optional[RetrievalModel] = None, ): # check if dataset name already exists - if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first(): + if db.session.query(Dataset).filter_by(name=name, tenant_id=tenant_id).first(): raise DatasetNameDuplicateError(f"Dataset with name {name} already exists.") embedding_model = None if indexing_technique == "high_quality": @@ -235,7 +238,7 @@ class DatasetService: @staticmethod def get_dataset(dataset_id) -> Optional[Dataset]: - dataset: Optional[Dataset] = Dataset.query.filter_by(id=dataset_id).first() + dataset: Optional[Dataset] = db.session.query(Dataset).filter_by(id=dataset_id).first() return dataset @staticmethod @@ -436,7 +439,7 @@ class DatasetService: # update Retrieval model filtered_data["retrieval_model"] = data["retrieval_model"] - dataset.query.filter_by(id=dataset_id).update(filtered_data) + db.session.query(Dataset).filter_by(id=dataset_id).update(filtered_data) db.session.commit() if action: @@ -460,7 +463,7 @@ class DatasetService: @staticmethod def dataset_use_check(dataset_id) -> bool: - count = AppDatasetJoin.query.filter_by(dataset_id=dataset_id).count() + count = db.session.query(AppDatasetJoin).filter_by(dataset_id=dataset_id).count() if count > 0: return True return False @@ -474,15 +477,15 @@ class DatasetService: if dataset.permission == DatasetPermissionEnum.ONLY_ME and dataset.created_by != user.id: logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") raise NoPermissionError("You do not have permission to access this dataset.") - if dataset.permission == "partial_members": - user_permission = DatasetPermission.query.filter_by(dataset_id=dataset.id, account_id=user.id).first() - if ( - not user_permission - and dataset.tenant_id != user.current_tenant_id - and dataset.created_by != user.id - ): - logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") - raise NoPermissionError("You do not have permission to access this dataset.") + if dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: + # For partial team permission, user needs explicit permission or be the creator + if dataset.created_by != user.id: + user_permission = ( + db.session.query(DatasetPermission).filter_by(dataset_id=dataset.id, account_id=user.id).first() + ) + if not user_permission: + logging.debug(f"User {user.id} does not have permission to access dataset {dataset.id}") + raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod def check_dataset_operator_permission(user: Optional[Account] = None, dataset: Optional[Dataset] = None): @@ -499,23 +502,24 @@ class DatasetService: elif dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM: if not any( - dp.dataset_id == dataset.id for dp in DatasetPermission.query.filter_by(account_id=user.id).all() + dp.dataset_id == dataset.id + for dp in db.session.query(DatasetPermission).filter_by(account_id=user.id).all() ): raise NoPermissionError("You do not have permission to access this dataset.") @staticmethod def get_dataset_queries(dataset_id: str, page: int, per_page: int): - dataset_queries = ( - DatasetQuery.query.filter_by(dataset_id=dataset_id) - .order_by(db.desc(DatasetQuery.created_at)) - .paginate(page=page, per_page=per_page, max_per_page=100, error_out=False) - ) + stmt = select(DatasetQuery).filter_by(dataset_id=dataset_id).order_by(db.desc(DatasetQuery.created_at)) + + dataset_queries = db.paginate(select=stmt, page=page, per_page=per_page, max_per_page=100, error_out=False) + return dataset_queries.items, dataset_queries.total @staticmethod def get_related_apps(dataset_id: str): return ( - AppDatasetJoin.query.filter(AppDatasetJoin.dataset_id == dataset_id) + db.session.query(AppDatasetJoin) + .filter(AppDatasetJoin.dataset_id == dataset_id) .order_by(db.desc(AppDatasetJoin.created_at)) .all() ) @@ -530,10 +534,14 @@ class DatasetService: } # get recent 30 days auto disable logs start_date = datetime.datetime.now() - datetime.timedelta(days=30) - dataset_auto_disable_logs = DatasetAutoDisableLog.query.filter( - DatasetAutoDisableLog.dataset_id == dataset_id, - DatasetAutoDisableLog.created_at >= start_date, - ).all() + dataset_auto_disable_logs = ( + db.session.query(DatasetAutoDisableLog) + .filter( + DatasetAutoDisableLog.dataset_id == dataset_id, + DatasetAutoDisableLog.created_at >= start_date, + ) + .all() + ) if dataset_auto_disable_logs: return { "document_ids": [log.document_id for log in dataset_auto_disable_logs], @@ -873,7 +881,9 @@ class DocumentService: @staticmethod def get_documents_position(dataset_id): - document = Document.query.filter_by(dataset_id=dataset_id).order_by(Document.position.desc()).first() + document = ( + db.session.query(Document).filter_by(dataset_id=dataset_id).order_by(Document.position.desc()).first() + ) if document: return document.position + 1 else: @@ -948,11 +958,11 @@ class DocumentService: "score_threshold_enabled": False, } - dataset.retrieval_model = ( - knowledge_config.retrieval_model.model_dump() - if knowledge_config.retrieval_model - else default_retrieval_model - ) # type: ignore + dataset.retrieval_model = ( + knowledge_config.retrieval_model.model_dump() + if knowledge_config.retrieval_model + else default_retrieval_model + ) # type: ignore documents = [] if knowledge_config.original_document_id: @@ -980,7 +990,7 @@ class DocumentService: created_by=account.id, ) else: - logging.warn( + logging.warning( f"Invalid process rule mode: {process_rule.mode}, can not find dataset process rule" ) return @@ -1010,13 +1020,17 @@ class DocumentService: } # check duplicate if knowledge_config.duplicate: - document = Document.query.filter_by( - dataset_id=dataset.id, - tenant_id=current_user.current_tenant_id, - data_source_type="upload_file", - enabled=True, - name=file_name, - ).first() + document = ( + db.session.query(Document) + .filter_by( + dataset_id=dataset.id, + tenant_id=current_user.current_tenant_id, + data_source_type="upload_file", + enabled=True, + name=file_name, + ) + .first() + ) if document: document.dataset_process_rule_id = dataset_process_rule.id # type: ignore document.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) @@ -1054,12 +1068,16 @@ class DocumentService: raise ValueError("No notion info list found.") exist_page_ids = [] exist_document = {} - documents = Document.query.filter_by( - dataset_id=dataset.id, - tenant_id=current_user.current_tenant_id, - data_source_type="notion_import", - enabled=True, - ).all() + documents = ( + db.session.query(Document) + .filter_by( + dataset_id=dataset.id, + tenant_id=current_user.current_tenant_id, + data_source_type="notion_import", + enabled=True, + ) + .all() + ) if documents: for document in documents: data_source_info = json.loads(document.data_source_info) @@ -1067,14 +1085,18 @@ class DocumentService: exist_document[data_source_info["notion_page_id"]] = document.id for notion_info in notion_info_list: workspace_id = notion_info.workspace_id - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.disabled == False, + DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + ) ) - ).first() + .first() + ) if not data_source_binding: raise ValueError("Data source binding not found.") for page in notion_info.pages: @@ -1206,12 +1228,16 @@ class DocumentService: @staticmethod def get_tenant_documents_count(): - documents_count = Document.query.filter( - Document.completed_at.isnot(None), - Document.enabled == True, - Document.archived == False, - Document.tenant_id == current_user.current_tenant_id, - ).count() + documents_count = ( + db.session.query(Document) + .filter( + Document.completed_at.isnot(None), + Document.enabled == True, + Document.archived == False, + Document.tenant_id == current_user.current_tenant_id, + ) + .count() + ) return documents_count @staticmethod @@ -1278,14 +1304,18 @@ class DocumentService: notion_info_list = document_data.data_source.info_list.notion_info_list for notion_info in notion_info_list: workspace_id = notion_info.workspace_id - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.disabled == False, + DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + ) ) - ).first() + .first() + ) if not data_source_binding: raise ValueError("Data source binding not found.") for page in notion_info.pages: @@ -1328,7 +1358,7 @@ class DocumentService: db.session.commit() # update document segment update_params = {DocumentSegment.status: "re_segment"} - DocumentSegment.query.filter_by(document_id=document.id).update(update_params) + db.session.query(DocumentSegment).filter_by(document_id=document.id).update(update_params) db.session.commit() # trigger async task document_indexing_update_task.delay(document.dataset_id, document.id) @@ -1918,7 +1948,8 @@ class SegmentService: @classmethod def delete_segments(cls, segment_ids: list, document: Document, dataset: Dataset): index_node_ids = ( - DocumentSegment.query.with_entities(DocumentSegment.index_node_id) + db.session.query(DocumentSegment) + .with_entities(DocumentSegment.index_node_id) .filter( DocumentSegment.id.in_(segment_ids), DocumentSegment.dataset_id == dataset.id, @@ -2157,20 +2188,28 @@ class SegmentService: def get_child_chunks( cls, segment_id: str, document_id: str, dataset_id: str, page: int, limit: int, keyword: Optional[str] = None ): - query = ChildChunk.query.filter_by( - tenant_id=current_user.current_tenant_id, - dataset_id=dataset_id, - document_id=document_id, - segment_id=segment_id, - ).order_by(ChildChunk.position.asc()) + query = ( + select(ChildChunk) + .filter_by( + tenant_id=current_user.current_tenant_id, + dataset_id=dataset_id, + document_id=document_id, + segment_id=segment_id, + ) + .order_by(ChildChunk.position.asc()) + ) if keyword: query = query.where(ChildChunk.content.ilike(f"%{keyword}%")) - return query.paginate(page=page, per_page=limit, max_per_page=100, error_out=False) + return db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) @classmethod def get_child_chunk_by_id(cls, child_chunk_id: str, tenant_id: str) -> Optional[ChildChunk]: """Get a child chunk by its ID.""" - result = ChildChunk.query.filter(ChildChunk.id == child_chunk_id, ChildChunk.tenant_id == tenant_id).first() + result = ( + db.session.query(ChildChunk) + .filter(ChildChunk.id == child_chunk_id, ChildChunk.tenant_id == tenant_id) + .first() + ) return result if isinstance(result, ChildChunk) else None @classmethod @@ -2184,7 +2223,7 @@ class SegmentService: limit: int = 20, ): """Get segments for a document with optional filtering.""" - query = DocumentSegment.query.filter( + query = select(DocumentSegment).filter( DocumentSegment.document_id == document_id, DocumentSegment.tenant_id == tenant_id ) @@ -2194,9 +2233,8 @@ class SegmentService: if keyword: query = query.filter(DocumentSegment.content.ilike(f"%{keyword}%")) - paginated_segments = query.order_by(DocumentSegment.position.asc()).paginate( - page=page, per_page=limit, max_per_page=100, error_out=False - ) + query = query.order_by(DocumentSegment.position.asc()) + paginated_segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False) return paginated_segments.items, paginated_segments.total @@ -2236,9 +2274,11 @@ class SegmentService: raise ValueError(ex.description) # check segment - segment = DocumentSegment.query.filter( - DocumentSegment.id == segment_id, DocumentSegment.tenant_id == user_id - ).first() + segment = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == segment_id, DocumentSegment.tenant_id == user_id) + .first() + ) if not segment: raise NotFound("Segment not found.") @@ -2251,9 +2291,11 @@ class SegmentService: @classmethod def get_segment_by_id(cls, segment_id: str, tenant_id: str) -> Optional[DocumentSegment]: """Get a segment by its ID.""" - result = DocumentSegment.query.filter( - DocumentSegment.id == segment_id, DocumentSegment.tenant_id == tenant_id - ).first() + result = ( + db.session.query(DocumentSegment) + .filter(DocumentSegment.id == segment_id, DocumentSegment.tenant_id == tenant_id) + .first() + ) return result if isinstance(result, DocumentSegment) else None diff --git a/api/services/enterprise/enterprise_service.py b/api/services/enterprise/enterprise_service.py index abc01ddf8f..1be78d2e62 100644 --- a/api/services/enterprise/enterprise_service.py +++ b/api/services/enterprise/enterprise_service.py @@ -1,11 +1,90 @@ +from pydantic import BaseModel, Field + from services.enterprise.base import EnterpriseRequest +class WebAppSettings(BaseModel): + access_mode: str = Field( + description="Access mode for the web app. Can be 'public' or 'private'", + default="private", + alias="accessMode", + ) + + class EnterpriseService: @classmethod def get_info(cls): return EnterpriseRequest.send_request("GET", "/info") @classmethod - def get_app_web_sso_enabled(cls, app_code): - return EnterpriseRequest.send_request("GET", f"/app-sso-setting?appCode={app_code}") + def get_workspace_info(cls, tenant_id: str): + return EnterpriseRequest.send_request("GET", f"/workspace/{tenant_id}/info") + + class WebAppAuth: + @classmethod + def is_user_allowed_to_access_webapp(cls, user_id: str, app_code: str): + params = {"userId": user_id, "appCode": app_code} + data = EnterpriseRequest.send_request("GET", "/webapp/permission", params=params) + + return data.get("result", False) + + @classmethod + def get_app_access_mode_by_id(cls, app_id: str) -> WebAppSettings: + if not app_id: + raise ValueError("app_id must be provided.") + params = {"appId": app_id} + data = EnterpriseRequest.send_request("GET", "/webapp/access-mode/id", params=params) + if not data: + raise ValueError("No data found.") + return WebAppSettings(**data) + + @classmethod + def batch_get_app_access_mode_by_id(cls, app_ids: list[str]) -> dict[str, WebAppSettings]: + if not app_ids: + return {} + body = {"appIds": app_ids} + data: dict[str, str] = EnterpriseRequest.send_request("POST", "/webapp/access-mode/batch/id", json=body) + if not data: + raise ValueError("No data found.") + + if not isinstance(data["accessModes"], dict): + raise ValueError("Invalid data format.") + + ret = {} + for key, value in data["accessModes"].items(): + curr = WebAppSettings() + curr.access_mode = value + ret[key] = curr + + return ret + + @classmethod + def get_app_access_mode_by_code(cls, app_code: str) -> WebAppSettings: + if not app_code: + raise ValueError("app_code must be provided.") + params = {"appCode": app_code} + data = EnterpriseRequest.send_request("GET", "/webapp/access-mode/code", params=params) + if not data: + raise ValueError("No data found.") + return WebAppSettings(**data) + + @classmethod + def update_app_access_mode(cls, app_id: str, access_mode: str): + if not app_id: + raise ValueError("app_id must be provided.") + if access_mode not in ["public", "private", "private_all"]: + raise ValueError("access_mode must be either 'public', 'private', or 'private_all'") + + data = {"appId": app_id, "accessMode": access_mode} + + response = EnterpriseRequest.send_request("POST", "/webapp/access-mode", json=data) + + return response.get("result", False) + + @classmethod + def cleanup_webapp(cls, app_id: str): + if not app_id: + raise ValueError("app_id must be provided.") + + body = {"appId": app_id} + EnterpriseRequest.send_request("DELETE", "/webapp/clean", json=body) diff --git a/api/services/enterprise/mail_service.py b/api/services/enterprise/mail_service.py new file mode 100644 index 0000000000..630e7679ac --- /dev/null +++ b/api/services/enterprise/mail_service.py @@ -0,0 +1,18 @@ +from pydantic import BaseModel + +from tasks.mail_enterprise_task import send_enterprise_email_task + + +class DifyMail(BaseModel): + to: list[str] + subject: str + body: str + substitutions: dict[str, str] = {} + + +class EnterpriseMailService: + @classmethod + def send_mail(cls, mail: DifyMail): + send_enterprise_email_task.delay( + to=mail.to, subject=mail.subject, body=mail.body, substitutions=mail.substitutions + ) diff --git a/api/services/errors/workspace.py b/api/services/errors/workspace.py index 714064ffdf..577238507f 100644 --- a/api/services/errors/workspace.py +++ b/api/services/errors/workspace.py @@ -7,3 +7,7 @@ class WorkSpaceNotAllowedCreateError(BaseServiceError): class WorkSpaceNotFoundError(BaseServiceError): pass + + +class WorkspacesLimitExceededError(BaseServiceError): + pass diff --git a/api/services/external_knowledge_service.py b/api/services/external_knowledge_service.py index 6b75c29d95..eb50d79494 100644 --- a/api/services/external_knowledge_service.py +++ b/api/services/external_knowledge_service.py @@ -5,6 +5,7 @@ from typing import Any, Optional, Union, cast from urllib.parse import urlparse import httpx +from sqlalchemy import select from constants import HIDDEN_VALUE from core.helper import ssrf_proxy @@ -24,14 +25,20 @@ from services.errors.dataset import DatasetNameDuplicateError class ExternalDatasetService: @staticmethod - def get_external_knowledge_apis(page, per_page, tenant_id, search=None) -> tuple[list[ExternalKnowledgeApis], int]: - query = ExternalKnowledgeApis.query.filter(ExternalKnowledgeApis.tenant_id == tenant_id).order_by( - ExternalKnowledgeApis.created_at.desc() + def get_external_knowledge_apis( + page, per_page, tenant_id, search=None + ) -> tuple[list[ExternalKnowledgeApis], int | None]: + query = ( + select(ExternalKnowledgeApis) + .filter(ExternalKnowledgeApis.tenant_id == tenant_id) + .order_by(ExternalKnowledgeApis.created_at.desc()) ) if search: query = query.filter(ExternalKnowledgeApis.name.ilike(f"%{search}%")) - external_knowledge_apis = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False) + external_knowledge_apis = db.paginate( + select=query, page=page, per_page=per_page, max_per_page=100, error_out=False + ) return external_knowledge_apis.items, external_knowledge_apis.total @@ -92,18 +99,18 @@ class ExternalDatasetService: @staticmethod def get_external_knowledge_api(external_knowledge_api_id: str) -> ExternalKnowledgeApis: - external_knowledge_api: Optional[ExternalKnowledgeApis] = ExternalKnowledgeApis.query.filter_by( - id=external_knowledge_api_id - ).first() + external_knowledge_api: Optional[ExternalKnowledgeApis] = ( + db.session.query(ExternalKnowledgeApis).filter_by(id=external_knowledge_api_id).first() + ) if external_knowledge_api is None: raise ValueError("api template not found") return external_knowledge_api @staticmethod def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis: - external_knowledge_api: Optional[ExternalKnowledgeApis] = ExternalKnowledgeApis.query.filter_by( - id=external_knowledge_api_id, tenant_id=tenant_id - ).first() + external_knowledge_api: Optional[ExternalKnowledgeApis] = ( + db.session.query(ExternalKnowledgeApis).filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + ) if external_knowledge_api is None: raise ValueError("api template not found") if args.get("settings") and args.get("settings").get("api_key") == HIDDEN_VALUE: @@ -120,9 +127,9 @@ class ExternalDatasetService: @staticmethod def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str): - external_knowledge_api = ExternalKnowledgeApis.query.filter_by( - id=external_knowledge_api_id, tenant_id=tenant_id - ).first() + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis).filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + ) if external_knowledge_api is None: raise ValueError("api template not found") @@ -131,25 +138,29 @@ class ExternalDatasetService: @staticmethod def external_knowledge_api_use_check(external_knowledge_api_id: str) -> tuple[bool, int]: - count = ExternalKnowledgeBindings.query.filter_by(external_knowledge_api_id=external_knowledge_api_id).count() + count = ( + db.session.query(ExternalKnowledgeBindings) + .filter_by(external_knowledge_api_id=external_knowledge_api_id) + .count() + ) if count > 0: return True, count return False, 0 @staticmethod def get_external_knowledge_binding_with_dataset_id(tenant_id: str, dataset_id: str) -> ExternalKnowledgeBindings: - external_knowledge_binding: Optional[ExternalKnowledgeBindings] = ExternalKnowledgeBindings.query.filter_by( - dataset_id=dataset_id, tenant_id=tenant_id - ).first() + external_knowledge_binding: Optional[ExternalKnowledgeBindings] = ( + db.session.query(ExternalKnowledgeBindings).filter_by(dataset_id=dataset_id, tenant_id=tenant_id).first() + ) if not external_knowledge_binding: raise ValueError("external knowledge binding not found") return external_knowledge_binding @staticmethod def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict): - external_knowledge_api = ExternalKnowledgeApis.query.filter_by( - id=external_knowledge_api_id, tenant_id=tenant_id - ).first() + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis).filter_by(id=external_knowledge_api_id, tenant_id=tenant_id).first() + ) if external_knowledge_api is None: raise ValueError("api template not found") settings = json.loads(external_knowledge_api.settings) @@ -212,11 +223,13 @@ class ExternalDatasetService: @staticmethod def create_external_dataset(tenant_id: str, user_id: str, args: dict) -> Dataset: # check if dataset name already exists - if Dataset.query.filter_by(name=args.get("name"), tenant_id=tenant_id).first(): + if db.session.query(Dataset).filter_by(name=args.get("name"), tenant_id=tenant_id).first(): raise DatasetNameDuplicateError(f"Dataset with name {args.get('name')} already exists.") - external_knowledge_api = ExternalKnowledgeApis.query.filter_by( - id=args.get("external_knowledge_api_id"), tenant_id=tenant_id - ).first() + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis) + .filter_by(id=args.get("external_knowledge_api_id"), tenant_id=tenant_id) + .first() + ) if external_knowledge_api is None: raise ValueError("api template not found") @@ -254,15 +267,17 @@ class ExternalDatasetService: external_retrieval_parameters: dict, metadata_condition: Optional[MetadataCondition] = None, ) -> list: - external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by( - dataset_id=dataset_id, tenant_id=tenant_id - ).first() + external_knowledge_binding = ( + db.session.query(ExternalKnowledgeBindings).filter_by(dataset_id=dataset_id, tenant_id=tenant_id).first() + ) if not external_knowledge_binding: raise ValueError("external knowledge binding not found") - external_knowledge_api = ExternalKnowledgeApis.query.filter_by( - id=external_knowledge_binding.external_knowledge_api_id - ).first() + external_knowledge_api = ( + db.session.query(ExternalKnowledgeApis) + .filter_by(id=external_knowledge_binding.external_knowledge_api_id) + .first() + ) if not external_knowledge_api: raise ValueError("external api template not found") diff --git a/api/services/feature_service.py b/api/services/feature_service.py index c2226c319f..be85a03e80 100644 --- a/api/services/feature_service.py +++ b/api/services/feature_service.py @@ -1,6 +1,6 @@ from enum import StrEnum -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field from configs import dify_config from services.billing_service import BillingService @@ -27,6 +27,32 @@ class LimitationModel(BaseModel): limit: int = 0 +class LicenseLimitationModel(BaseModel): + """ + - enabled: whether this limit is enforced + - size: current usage count + - limit: maximum allowed count; 0 means unlimited + """ + + enabled: bool = Field(False, description="Whether this limit is currently active") + size: int = Field(0, description="Number of resources already consumed") + limit: int = Field(0, description="Maximum number of resources allowed; 0 means no limit") + + def is_available(self, required: int = 1) -> bool: + """ + Determine whether the requested amount can be allocated. + + Returns True if: + - this limit is not active, or + - the limit is zero (unlimited), or + - there is enough remaining quota. + """ + if not self.enabled or self.limit == 0: + return True + + return (self.limit - self.size) >= required + + class LicenseStatus(StrEnum): NONE = "none" INACTIVE = "inactive" @@ -39,6 +65,27 @@ class LicenseStatus(StrEnum): class LicenseModel(BaseModel): status: LicenseStatus = LicenseStatus.NONE expired_at: str = "" + workspaces: LicenseLimitationModel = LicenseLimitationModel(enabled=False, size=0, limit=0) + + +class BrandingModel(BaseModel): + enabled: bool = False + application_title: str = "" + login_page_logo: str = "" + workspace_logo: str = "" + favicon: str = "" + + +class WebAppAuthSSOModel(BaseModel): + protocol: str = "" + + +class WebAppAuthModel(BaseModel): + enabled: bool = False + allow_sso: bool = False + sso_config: WebAppAuthSSOModel = WebAppAuthSSOModel() + allow_email_code_login: bool = False + allow_email_password_login: bool = False class FeatureModel(BaseModel): @@ -54,6 +101,8 @@ class FeatureModel(BaseModel): can_replace_logo: bool = False model_load_balancing_enabled: bool = False dataset_operator_enabled: bool = False + webapp_copyright_enabled: bool = False + workspace_members: LicenseLimitationModel = LicenseLimitationModel(enabled=False, size=0, limit=0) # pydantic configs model_config = ConfigDict(protected_namespaces=()) @@ -68,9 +117,6 @@ class KnowledgeRateLimitModel(BaseModel): class SystemFeatureModel(BaseModel): sso_enforced_for_signin: bool = False sso_enforced_for_signin_protocol: str = "" - sso_enforced_for_web: bool = False - sso_enforced_for_web_protocol: str = "" - enable_web_sso_switch_component: bool = False enable_marketplace: bool = False max_plugin_package_size: int = dify_config.PLUGIN_MAX_PACKAGE_SIZE enable_email_code_login: bool = False @@ -80,6 +126,8 @@ class SystemFeatureModel(BaseModel): is_allow_create_workspace: bool = False is_email_setup: bool = False license: LicenseModel = LicenseModel() + branding: BrandingModel = BrandingModel() + webapp_auth: WebAppAuthModel = WebAppAuthModel() class FeatureService: @@ -92,6 +140,10 @@ class FeatureService: if dify_config.BILLING_ENABLED and tenant_id: cls._fulfill_params_from_billing_api(features, tenant_id) + if dify_config.ENTERPRISE_ENABLED: + features.webapp_copyright_enabled = True + cls._fulfill_params_from_workspace_info(features, tenant_id) + return features @classmethod @@ -111,8 +163,8 @@ class FeatureService: cls._fulfill_system_params_from_env(system_features) if dify_config.ENTERPRISE_ENABLED: - system_features.enable_web_sso_switch_component = True - + system_features.branding.enabled = True + system_features.webapp_auth.enabled = True cls._fulfill_params_from_enterprise(system_features) if dify_config.MARKETPLACE_ENABLED: @@ -136,6 +188,14 @@ class FeatureService: features.dataset_operator_enabled = dify_config.DATASET_OPERATOR_ENABLED features.education.enabled = dify_config.EDUCATION_ENABLED + @classmethod + def _fulfill_params_from_workspace_info(cls, features: FeatureModel, tenant_id: str): + workspace_info = EnterpriseService.get_workspace_info(tenant_id) + if "WorkspaceMembers" in workspace_info: + features.workspace_members.size = workspace_info["WorkspaceMembers"]["used"] + features.workspace_members.limit = workspace_info["WorkspaceMembers"]["limit"] + features.workspace_members.enabled = workspace_info["WorkspaceMembers"]["enabled"] + @classmethod def _fulfill_params_from_billing_api(cls, features: FeatureModel, tenant_id: str): billing_info = BillingService.get_info(tenant_id) @@ -145,6 +205,9 @@ class FeatureService: features.billing.subscription.interval = billing_info["subscription"]["interval"] features.education.activated = billing_info["subscription"].get("education", False) + if features.billing.subscription.plan != "sandbox": + features.webapp_copyright_enabled = True + if "members" in billing_info: features.members.size = billing_info["members"]["size"] features.members.limit = billing_info["members"]["limit"] @@ -178,38 +241,53 @@ class FeatureService: features.knowledge_rate_limit = billing_info["knowledge_rate_limit"]["limit"] @classmethod - def _fulfill_params_from_enterprise(cls, features): + def _fulfill_params_from_enterprise(cls, features: SystemFeatureModel): enterprise_info = EnterpriseService.get_info() - if "sso_enforced_for_signin" in enterprise_info: - features.sso_enforced_for_signin = enterprise_info["sso_enforced_for_signin"] + if "SSOEnforcedForSignin" in enterprise_info: + features.sso_enforced_for_signin = enterprise_info["SSOEnforcedForSignin"] - if "sso_enforced_for_signin_protocol" in enterprise_info: - features.sso_enforced_for_signin_protocol = enterprise_info["sso_enforced_for_signin_protocol"] + if "SSOEnforcedForSigninProtocol" in enterprise_info: + features.sso_enforced_for_signin_protocol = enterprise_info["SSOEnforcedForSigninProtocol"] - if "sso_enforced_for_web" in enterprise_info: - features.sso_enforced_for_web = enterprise_info["sso_enforced_for_web"] + if "EnableEmailCodeLogin" in enterprise_info: + features.enable_email_code_login = enterprise_info["EnableEmailCodeLogin"] - if "sso_enforced_for_web_protocol" in enterprise_info: - features.sso_enforced_for_web_protocol = enterprise_info["sso_enforced_for_web_protocol"] + if "EnableEmailPasswordLogin" in enterprise_info: + features.enable_email_password_login = enterprise_info["EnableEmailPasswordLogin"] - if "enable_email_code_login" in enterprise_info: - features.enable_email_code_login = enterprise_info["enable_email_code_login"] + if "IsAllowRegister" in enterprise_info: + features.is_allow_register = enterprise_info["IsAllowRegister"] - if "enable_email_password_login" in enterprise_info: - features.enable_email_password_login = enterprise_info["enable_email_password_login"] + if "IsAllowCreateWorkspace" in enterprise_info: + features.is_allow_create_workspace = enterprise_info["IsAllowCreateWorkspace"] - if "is_allow_register" in enterprise_info: - features.is_allow_register = enterprise_info["is_allow_register"] + if "Branding" in enterprise_info: + features.branding.application_title = enterprise_info["Branding"].get("applicationTitle", "") + features.branding.login_page_logo = enterprise_info["Branding"].get("loginPageLogo", "") + features.branding.workspace_logo = enterprise_info["Branding"].get("workspaceLogo", "") + features.branding.favicon = enterprise_info["Branding"].get("favicon", "") - if "is_allow_create_workspace" in enterprise_info: - features.is_allow_create_workspace = enterprise_info["is_allow_create_workspace"] + if "WebAppAuth" in enterprise_info: + features.webapp_auth.allow_sso = enterprise_info["WebAppAuth"].get("allowSso", False) + features.webapp_auth.allow_email_code_login = enterprise_info["WebAppAuth"].get( + "allowEmailCodeLogin", False + ) + features.webapp_auth.allow_email_password_login = enterprise_info["WebAppAuth"].get( + "allowEmailPasswordLogin", False + ) + features.webapp_auth.sso_config.protocol = enterprise_info.get("SSOEnforcedForWebProtocol", "") - if "license" in enterprise_info: - license_info = enterprise_info["license"] + if "License" in enterprise_info: + license_info = enterprise_info["License"] if "status" in license_info: features.license.status = LicenseStatus(license_info.get("status", LicenseStatus.INACTIVE)) - if "expired_at" in license_info: - features.license.expired_at = license_info["expired_at"] + if "expiredAt" in license_info: + features.license.expired_at = license_info["expiredAt"] + + if "workspaces" in license_info: + features.license.workspaces.enabled = license_info["workspaces"]["enabled"] + features.license.workspaces.limit = license_info["workspaces"]["limit"] + features.license.workspaces.size = license_info["workspaces"]["used"] diff --git a/api/services/file_service.py b/api/services/file_service.py index 2ca6b4f9aa..2d68f30c5a 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -19,7 +19,7 @@ from core.rag.extractor.extract_processor import ExtractProcessor from extensions.ext_database import db from extensions.ext_storage import storage from models.account import Account -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.model import EndUser, UploadFile from .errors.file import FileTooLargeError, UnsupportedFileTypeError @@ -81,7 +81,7 @@ class FileService: size=file_size, extension=extension, mime_type=mimetype, - created_by_role=(CreatedByRole.ACCOUNT if isinstance(user, Account) else CreatedByRole.END_USER), + created_by_role=(CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER), created_by=user.id, created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), used=False, @@ -133,7 +133,7 @@ class FileService: extension="txt", mime_type="text/plain", created_by=current_user.id, - created_by_role=CreatedByRole.ACCOUNT, + created_by_role=CreatorUserRole.ACCOUNT, created_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None), used=True, used_by=current_user.id, diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index 0b98065f5d..56e06cc33e 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -69,6 +69,7 @@ class HitTestingService: query: str, account: Account, external_retrieval_model: dict, + metadata_filtering_conditions: dict, ) -> dict: if dataset.provider != "external": return { @@ -82,6 +83,7 @@ class HitTestingService: dataset_id=dataset.id, query=cls.escape_query_for_search(query), external_retrieval_model=external_retrieval_model, + metadata_filtering_conditions=metadata_filtering_conditions, ) end = time.perf_counter() diff --git a/api/services/message_service.py b/api/services/message_service.py index aefab1556c..51b070ece7 100644 --- a/api/services/message_service.py +++ b/api/services/message_service.py @@ -177,6 +177,21 @@ class MessageService: return feedback + @classmethod + def get_all_messages_feedbacks(cls, app_model: App, page: int, limit: int): + """Get all feedbacks of an app""" + offset = (page - 1) * limit + feedbacks = ( + db.session.query(MessageFeedback) + .filter(MessageFeedback.app_id == app_model.id) + .order_by(MessageFeedback.created_at.desc(), MessageFeedback.id.desc()) + .limit(limit) + .offset(offset) + .all() + ) + + return [record.to_dict() for record in feedbacks] + @classmethod def get_message(cls, app_model: App, user: Optional[Union[Account, EndUser]], message_id: str): message = ( diff --git a/api/services/metadata_service.py b/api/services/metadata_service.py index c47c16f2f7..26d6d4ce18 100644 --- a/api/services/metadata_service.py +++ b/api/services/metadata_service.py @@ -20,9 +20,11 @@ class MetadataService: @staticmethod def create_metadata(dataset_id: str, metadata_args: MetadataArgs) -> DatasetMetadata: # check if metadata name already exists - if DatasetMetadata.query.filter_by( - tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name - ).first(): + if ( + db.session.query(DatasetMetadata) + .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=metadata_args.name) + .first() + ): raise ValueError("Metadata name already exists.") for field in BuiltInField: if field.value == metadata_args.name: @@ -42,16 +44,18 @@ class MetadataService: def update_metadata_name(dataset_id: str, metadata_id: str, name: str) -> DatasetMetadata: # type: ignore lock_key = f"dataset_metadata_lock_{dataset_id}" # check if metadata name already exists - if DatasetMetadata.query.filter_by( - tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name - ).first(): + if ( + db.session.query(DatasetMetadata) + .filter_by(tenant_id=current_user.current_tenant_id, dataset_id=dataset_id, name=name) + .first() + ): raise ValueError("Metadata name already exists.") for field in BuiltInField: if field.value == name: raise ValueError("Metadata name already exists in Built-in fields.") try: MetadataService.knowledge_base_metadata_lock_check(dataset_id, None) - metadata = DatasetMetadata.query.filter_by(id=metadata_id).first() + metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first() if metadata is None: raise ValueError("Metadata not found.") old_name = metadata.name @@ -60,7 +64,9 @@ class MetadataService: metadata.updated_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) # update related documents - dataset_metadata_bindings = DatasetMetadataBinding.query.filter_by(metadata_id=metadata_id).all() + dataset_metadata_bindings = ( + db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all() + ) if dataset_metadata_bindings: document_ids = [binding.document_id for binding in dataset_metadata_bindings] documents = DocumentService.get_document_by_ids(document_ids) @@ -82,13 +88,15 @@ class MetadataService: lock_key = f"dataset_metadata_lock_{dataset_id}" try: MetadataService.knowledge_base_metadata_lock_check(dataset_id, None) - metadata = DatasetMetadata.query.filter_by(id=metadata_id).first() + metadata = db.session.query(DatasetMetadata).filter_by(id=metadata_id).first() if metadata is None: raise ValueError("Metadata not found.") db.session.delete(metadata) # deal related documents - dataset_metadata_bindings = DatasetMetadataBinding.query.filter_by(metadata_id=metadata_id).all() + dataset_metadata_bindings = ( + db.session.query(DatasetMetadataBinding).filter_by(metadata_id=metadata_id).all() + ) if dataset_metadata_bindings: document_ids = [binding.document_id for binding in dataset_metadata_bindings] documents = DocumentService.get_document_by_ids(document_ids) @@ -193,7 +201,7 @@ class MetadataService: db.session.add(document) db.session.commit() # deal metadata binding - DatasetMetadataBinding.query.filter_by(document_id=operation.document_id).delete() + db.session.query(DatasetMetadataBinding).filter_by(document_id=operation.document_id).delete() for metadata_value in operation.metadata_list: dataset_metadata_binding = DatasetMetadataBinding( tenant_id=current_user.current_tenant_id, @@ -230,9 +238,9 @@ class MetadataService: "id": item.get("id"), "name": item.get("name"), "type": item.get("type"), - "count": DatasetMetadataBinding.query.filter_by( - metadata_id=item.get("id"), dataset_id=dataset.id - ).count(), + "count": db.session.query(DatasetMetadataBinding) + .filter_by(metadata_id=item.get("id"), dataset_id=dataset.id) + .count(), } for item in dataset.doc_metadata or [] if item.get("id") != "built-in" diff --git a/api/services/ops_service.py b/api/services/ops_service.py index 6b317212d1..a9c2b28476 100644 --- a/api/services/ops_service.py +++ b/api/services/ops_service.py @@ -87,7 +87,9 @@ class OpsService: :param tracing_config: tracing config :return: """ - if tracing_provider not in provider_config_map and tracing_provider: + try: + provider_config_map[tracing_provider] + except KeyError: return {"error": f"Invalid tracing provider: {tracing_provider}"} config_class, other_keys = ( @@ -150,7 +152,9 @@ class OpsService: :param tracing_config: tracing config :return: """ - if tracing_provider not in provider_config_map: + try: + provider_config_map[tracing_provider] + except KeyError: raise ValueError(f"Invalid tracing provider: {tracing_provider}") # check if trace config already exists diff --git a/api/services/plugin/plugin_service.py b/api/services/plugin/plugin_service.py index be722a59ad..a8b64f27db 100644 --- a/api/services/plugin/plugin_service.py +++ b/api/services/plugin/plugin_service.py @@ -17,7 +17,7 @@ from core.plugin.entities.plugin import ( PluginInstallation, PluginInstallationSource, ) -from core.plugin.entities.plugin_daemon import PluginInstallTask, PluginUploadResponse +from core.plugin.entities.plugin_daemon import PluginInstallTask, PluginListResponse, PluginUploadResponse from core.plugin.impl.asset import PluginAssetManager from core.plugin.impl.debugging import PluginDebuggingClient from core.plugin.impl.plugin import PluginInstaller @@ -110,6 +110,15 @@ class PluginService: plugins = manager.list_plugins(tenant_id) return plugins + @staticmethod + def list_with_total(tenant_id: str, page: int, page_size: int) -> PluginListResponse: + """ + list all plugins of the tenant + """ + manager = PluginInstaller() + plugins = manager.list_plugins_with_total(tenant_id, page, page_size) + return plugins + @staticmethod def list_installations_from_ids(tenant_id: str, ids: Sequence[str]) -> Sequence[PluginInstallation]: """ diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 92422bf29d..19e37f4ee3 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -1,3 +1,4 @@ +import logging from typing import Optional from core.model_manager import ModelInstance, ModelManager @@ -12,21 +13,30 @@ from models.dataset import ChildChunk, Dataset, DatasetProcessRule, DocumentSegm from models.dataset import Document as DatasetDocument from services.entities.knowledge_entities.knowledge_entities import ParentMode +_logger = logging.getLogger(__name__) + class VectorService: @classmethod def create_segments_vector( cls, keywords_list: Optional[list[list[str]]], segments: list[DocumentSegment], dataset: Dataset, doc_form: str ): - documents = [] + documents: list[Document] = [] for segment in segments: if doc_form == IndexType.PARENT_CHILD_INDEX: - document = DatasetDocument.query.filter_by(id=segment.document_id).first() + dataset_document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first() + if not dataset_document: + _logger.warning( + "Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s", + segment.document_id, + segment.id, + ) + continue # get the process rule processing_rule = ( db.session.query(DatasetProcessRule) - .filter(DatasetProcessRule.id == document.dataset_process_rule_id) + .filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id) .first() ) if not processing_rule: @@ -50,9 +60,11 @@ class VectorService: ) else: raise ValueError("The knowledge base index technique is not high quality!") - cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False) + cls.generate_child_chunks( + segment, dataset_document, dataset, embedding_model_instance, processing_rule, False + ) else: - document = Document( + rag_document = Document( page_content=segment.content, metadata={ "doc_id": segment.index_node_id, @@ -61,7 +73,7 @@ class VectorService: "dataset_id": segment.dataset_id, }, ) - documents.append(document) + documents.append(rag_document) if len(documents) > 0: index_processor = IndexProcessorFactory(doc_form).init_index_processor() index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list) diff --git a/api/services/webapp_auth_service.py b/api/services/webapp_auth_service.py new file mode 100644 index 0000000000..79d5217de7 --- /dev/null +++ b/api/services/webapp_auth_service.py @@ -0,0 +1,141 @@ +import random +from datetime import UTC, datetime, timedelta +from typing import Any, Optional, cast + +from werkzeug.exceptions import NotFound, Unauthorized + +from configs import dify_config +from controllers.web.error import WebAppAuthAccessDeniedError +from extensions.ext_database import db +from libs.helper import TokenManager +from libs.passport import PassportService +from libs.password import compare_password +from models.account import Account, AccountStatus +from models.model import App, EndUser, Site +from services.enterprise.enterprise_service import EnterpriseService +from services.errors.account import AccountLoginError, AccountNotFoundError, AccountPasswordError +from services.feature_service import FeatureService +from tasks.mail_email_code_login import send_email_code_login_mail_task + + +class WebAppAuthService: + """Service for web app authentication.""" + + @staticmethod + def authenticate(email: str, password: str) -> Account: + """authenticate account with email and password""" + + account = Account.query.filter_by(email=email).first() + if not account: + raise AccountNotFoundError() + + if account.status == AccountStatus.BANNED.value: + raise AccountLoginError("Account is banned.") + + if account.password is None or not compare_password(password, account.password, account.password_salt): + raise AccountPasswordError("Invalid email or password.") + + return cast(Account, account) + + @classmethod + def login(cls, account: Account, app_code: str, end_user_id: str) -> str: + site = db.session.query(Site).filter(Site.code == app_code).first() + if not site: + raise NotFound("Site not found.") + + access_token = cls._get_account_jwt_token(account=account, site=site, end_user_id=end_user_id) + + return access_token + + @classmethod + def get_user_through_email(cls, email: str): + account = db.session.query(Account).filter(Account.email == email).first() + if not account: + return None + + if account.status == AccountStatus.BANNED.value: + raise Unauthorized("Account is banned.") + + return account + + @classmethod + def send_email_code_login_email( + cls, account: Optional[Account] = None, email: Optional[str] = None, language: Optional[str] = "en-US" + ): + email = account.email if account else email + if email is None: + raise ValueError("Email must be provided.") + + code = "".join([str(random.randint(0, 9)) for _ in range(6)]) + token = TokenManager.generate_token( + account=account, email=email, token_type="webapp_email_code_login", additional_data={"code": code} + ) + send_email_code_login_mail_task.delay( + language=language, + to=account.email if account else email, + code=code, + ) + + return token + + @classmethod + def get_email_code_login_data(cls, token: str) -> Optional[dict[str, Any]]: + return TokenManager.get_token_data(token, "webapp_email_code_login") + + @classmethod + def revoke_email_code_login_token(cls, token: str): + TokenManager.revoke_token(token, "webapp_email_code_login") + + @classmethod + def create_end_user(cls, app_code, email) -> EndUser: + site = db.session.query(Site).filter(Site.code == app_code).first() + if not site: + raise NotFound("Site not found.") + app_model = db.session.query(App).filter(App.id == site.app_id).first() + if not app_model: + raise NotFound("App not found.") + end_user = EndUser( + tenant_id=app_model.tenant_id, + app_id=app_model.id, + type="browser", + is_anonymous=False, + session_id=email, + name="enterpriseuser", + external_user_id="enterpriseuser", + ) + db.session.add(end_user) + db.session.commit() + + return end_user + + @classmethod + def _validate_user_accessibility(cls, account: Account, app_code: str): + """Check if the user is allowed to access the app.""" + system_features = FeatureService.get_system_features() + if system_features.webapp_auth.enabled: + app_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code) + + if ( + app_settings.access_mode != "public" + and not EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(account.id, app_code=app_code) + ): + raise WebAppAuthAccessDeniedError() + + @classmethod + def _get_account_jwt_token(cls, account: Account, site: Site, end_user_id: str) -> str: + exp_dt = datetime.now(UTC) + timedelta(hours=dify_config.ACCESS_TOKEN_EXPIRE_MINUTES * 24) + exp = int(exp_dt.timestamp()) + + payload = { + "iss": site.id, + "sub": "Web API Passport", + "app_id": site.app_id, + "app_code": site.code, + "user_id": account.id, + "end_user_id": end_user_id, + "token_source": "webapp", + "exp": exp, + } + + token: str = PassportService().issue(payload) + return token diff --git a/api/services/workflow_app_service.py b/api/services/workflow_app_service.py index e526517b51..a899ebe278 100644 --- a/api/services/workflow_app_service.py +++ b/api/services/workflow_app_service.py @@ -5,7 +5,7 @@ from sqlalchemy import and_, func, or_, select from sqlalchemy.orm import Session from models import App, EndUser, WorkflowAppLog, WorkflowRun -from models.enums import CreatedByRole +from models.enums import CreatorUserRole from models.workflow import WorkflowRunStatus @@ -58,7 +58,7 @@ class WorkflowAppService: stmt = stmt.outerjoin( EndUser, - and_(WorkflowRun.created_by == EndUser.id, WorkflowRun.created_by_role == CreatedByRole.END_USER), + and_(WorkflowRun.created_by == EndUser.id, WorkflowRun.created_by_role == CreatorUserRole.END_USER), ).where(or_(*keyword_conditions)) if status: diff --git a/api/services/workflow_run_service.py b/api/services/workflow_run_service.py index f7c4f500a8..21366a4552 100644 --- a/api/services/workflow_run_service.py +++ b/api/services/workflow_run_service.py @@ -1,17 +1,21 @@ import threading +from collections.abc import Sequence from typing import Optional import contexts -from core.workflow.repository import RepositoryFactory +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from core.workflow.repository.workflow_node_execution_repository import OrderConfig from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination -from models.enums import WorkflowRunTriggeredFrom -from models.model import App -from models.workflow import ( +from models import ( + Account, + App, + EndUser, WorkflowNodeExecution, WorkflowRun, + WorkflowRunTriggeredFrom, ) +from models.workflow import WorkflowNodeExecutionTriggeredFrom class WorkflowRunService: @@ -116,7 +120,12 @@ class WorkflowRunService: return workflow_run - def get_workflow_run_node_executions(self, app_model: App, run_id: str) -> list[WorkflowNodeExecution]: + def get_workflow_run_node_executions( + self, + app_model: App, + run_id: str, + user: Account | EndUser, + ) -> Sequence[WorkflowNodeExecution]: """ Get workflow run node execution list """ @@ -128,17 +137,17 @@ class WorkflowRunService: if not workflow_run: return [] - # Use the repository to get the node executions - repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": app_model.tenant_id, - "app_id": app_model.id, - "session_factory": db.session.get_bind(), - } + repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=db.engine, + user=user, + app_id=app_model.id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) - # Use the repository to get the node executions with ordering + # Use the repository to get the database models directly order_config = OrderConfig(order_by=["index"], order_direction="desc") - node_executions = repository.get_by_workflow_run(workflow_run_id=run_id, order_config=order_config) + workflow_node_executions = repository.get_db_models_by_workflow_run( + workflow_run_id=run_id, order_config=order_config + ) - return list(node_executions) + return workflow_node_executions diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index ebe65e5d5f..50bb8f40ae 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -10,9 +10,10 @@ from sqlalchemy.orm import Session from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager -from core.model_runtime.utils.encoders import jsonable_encoder +from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from core.variables import Variable from core.workflow.entities.node_entities import NodeRunResult +from core.workflow.entities.node_execution_entities import NodeExecution, NodeExecutionStatus from core.workflow.errors import WorkflowNodeRunFailedError from core.workflow.graph_engine.entities.event import InNodeEvent from core.workflow.nodes import NodeType @@ -21,12 +22,10 @@ from core.workflow.nodes.enums import ErrorStrategy from core.workflow.nodes.event import RunCompletedEvent from core.workflow.nodes.event.types import NodeEvent from core.workflow.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING -from core.workflow.repository import RepositoryFactory from core.workflow.workflow_entry import WorkflowEntry from events.app_event import app_draft_workflow_was_synced, app_published_workflow_was_updated from extensions.ext_database import db from models.account import Account -from models.enums import CreatedByRole from models.model import App, AppMode from models.tools import WorkflowToolProvider from models.workflow import ( @@ -268,37 +267,37 @@ class WorkflowService: # run draft workflow node start_at = time.perf_counter() - workflow_node_execution = self._handle_node_run_result( - getter=lambda: WorkflowEntry.single_step_run( + node_execution = self._handle_node_run_result( + invoke_node_fn=lambda: WorkflowEntry.single_step_run( workflow=draft_workflow, node_id=node_id, user_inputs=user_inputs, user_id=account.id, ), start_at=start_at, - tenant_id=app_model.tenant_id, node_id=node_id, ) - workflow_node_execution.app_id = app_model.id - workflow_node_execution.created_by = account.id - workflow_node_execution.workflow_id = draft_workflow.id + # Set workflow_id on the NodeExecution + node_execution.workflow_id = draft_workflow.id - # Use the repository to save the workflow node execution - repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": app_model.tenant_id, - "app_id": app_model.id, - "session_factory": db.session.get_bind(), - } + # Create repository and save the node execution + repository = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=db.engine, + user=account, + app_id=app_model.id, + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, ) - repository.save(workflow_node_execution) + repository.save(node_execution) + + # Convert node_execution to WorkflowNodeExecution after save + workflow_node_execution = repository.to_db_model(node_execution) return workflow_node_execution def run_free_workflow_node( self, node_data: dict, tenant_id: str, user_id: str, node_id: str, user_inputs: dict[str, Any] - ) -> WorkflowNodeExecution: + ) -> NodeExecution: """ Run draft workflow node """ @@ -306,7 +305,7 @@ class WorkflowService: start_at = time.perf_counter() workflow_node_execution = self._handle_node_run_result( - getter=lambda: WorkflowEntry.run_free_node( + invoke_node_fn=lambda: WorkflowEntry.run_free_node( node_id=node_id, node_data=node_data, tenant_id=tenant_id, @@ -314,7 +313,6 @@ class WorkflowService: user_inputs=user_inputs, ), start_at=start_at, - tenant_id=tenant_id, node_id=node_id, ) @@ -322,21 +320,12 @@ class WorkflowService: def _handle_node_run_result( self, - getter: Callable[[], tuple[BaseNode, Generator[NodeEvent | InNodeEvent, None, None]]], + invoke_node_fn: Callable[[], tuple[BaseNode, Generator[NodeEvent | InNodeEvent, None, None]]], start_at: float, - tenant_id: str, node_id: str, - ) -> WorkflowNodeExecution: - """ - Handle node run result - - :param getter: Callable[[], tuple[BaseNode, Generator[RunEvent | InNodeEvent, None, None]]] - :param start_at: float - :param tenant_id: str - :param node_id: str - """ + ) -> NodeExecution: try: - node_instance, generator = getter() + node_instance, generator = invoke_node_fn() node_run_result: NodeRunResult | None = None for event in generator: @@ -385,20 +374,21 @@ class WorkflowService: node_run_result = None error = e.error - workflow_node_execution = WorkflowNodeExecution() - workflow_node_execution.id = str(uuid4()) - workflow_node_execution.tenant_id = tenant_id - workflow_node_execution.triggered_from = WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP.value - workflow_node_execution.index = 1 - workflow_node_execution.node_id = node_id - workflow_node_execution.node_type = node_instance.node_type - workflow_node_execution.title = node_instance.node_data.title - workflow_node_execution.elapsed_time = time.perf_counter() - start_at - workflow_node_execution.created_by_role = CreatedByRole.ACCOUNT.value - workflow_node_execution.created_at = datetime.now(UTC).replace(tzinfo=None) - workflow_node_execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + # Create a NodeExecution domain model + node_execution = NodeExecution( + id=str(uuid4()), + workflow_id="", # This is a single-step execution, so no workflow ID + index=1, + node_id=node_id, + node_type=node_instance.node_type, + title=node_instance.node_data.title, + elapsed_time=time.perf_counter() - start_at, + created_at=datetime.now(UTC).replace(tzinfo=None), + finished_at=datetime.now(UTC).replace(tzinfo=None), + ) + if run_succeeded and node_run_result: - # create workflow node execution + # Set inputs, process_data, and outputs as dictionaries (not JSON strings) inputs = WorkflowEntry.handle_special_values(node_run_result.inputs) if node_run_result.inputs else None process_data = ( WorkflowEntry.handle_special_values(node_run_result.process_data) @@ -407,23 +397,23 @@ class WorkflowService: ) outputs = WorkflowEntry.handle_special_values(node_run_result.outputs) if node_run_result.outputs else None - workflow_node_execution.inputs = json.dumps(inputs) - workflow_node_execution.process_data = json.dumps(process_data) - workflow_node_execution.outputs = json.dumps(outputs) - workflow_node_execution.execution_metadata = ( - json.dumps(jsonable_encoder(node_run_result.metadata)) if node_run_result.metadata else None - ) - if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED: - workflow_node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED.value - elif node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION: - workflow_node_execution.status = WorkflowNodeExecutionStatus.EXCEPTION.value - workflow_node_execution.error = node_run_result.error - else: - # create workflow node execution - workflow_node_execution.status = WorkflowNodeExecutionStatus.FAILED.value - workflow_node_execution.error = error + node_execution.inputs = inputs + node_execution.process_data = process_data + node_execution.outputs = outputs + node_execution.metadata = node_run_result.metadata - return workflow_node_execution + # Map status from WorkflowNodeExecutionStatus to NodeExecutionStatus + if node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED: + node_execution.status = NodeExecutionStatus.SUCCEEDED + elif node_run_result.status == WorkflowNodeExecutionStatus.EXCEPTION: + node_execution.status = NodeExecutionStatus.EXCEPTION + node_execution.error = node_run_result.error + else: + # Set failed status and error + node_execution.status = NodeExecutionStatus.FAILED + node_execution.error = error + + return node_execution def convert_to_workflow(self, app_model: App, account: Account, args: dict) -> App: """ @@ -518,11 +508,11 @@ class WorkflowService: raise DraftWorkflowDeletionError("Cannot delete draft workflow versions") # Check if this workflow is currently referenced by an app - stmt = select(App).where(App.workflow_id == workflow_id) - app = session.scalar(stmt) + app_stmt = select(App).where(App.workflow_id == workflow_id) + app = session.scalar(app_stmt) if app: # Cannot delete a workflow that's currently in use by an app - raise WorkflowInUseError(f"Cannot delete workflow that is currently in use by app '{app.name}'") + raise WorkflowInUseError(f"Cannot delete workflow that is currently in use by app '{app.id}'") # Don't use workflow.tool_published as it's not accurate for specific workflow versions # Check if there's a tool provider using this specific workflow version diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index be88881efc..75d648e1b7 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -111,7 +111,7 @@ def add_document_to_index_task(dataset_document_id: str): logging.exception("add document to index failed") dataset_document.enabled = False dataset_document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - dataset_document.status = "error" + dataset_document.indexing_status = "error" dataset_document.error = str(e) db.session.commit() finally: diff --git a/api/tasks/create_segment_to_index_task.py b/api/tasks/create_segment_to_index_task.py index 4500b2a44b..a3f811faa1 100644 --- a/api/tasks/create_segment_to_index_task.py +++ b/api/tasks/create_segment_to_index_task.py @@ -41,7 +41,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] DocumentSegment.status: "indexing", DocumentSegment.indexing_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), } - DocumentSegment.query.filter_by(id=segment.id).update(update_params) + db.session.query(DocumentSegment).filter_by(id=segment.id).update(update_params) db.session.commit() document = Document( page_content=segment.content, @@ -78,7 +78,7 @@ def create_segment_to_index_task(segment_id: str, keywords: Optional[list[str]] DocumentSegment.status: "completed", DocumentSegment.completed_at: datetime.datetime.now(datetime.UTC).replace(tzinfo=None), } - DocumentSegment.query.filter_by(id=segment.id).update(update_params) + db.session.query(DocumentSegment).filter_by(id=segment.id).update(update_params) db.session.commit() end_at = time.perf_counter() diff --git a/api/tasks/deal_dataset_vector_index_task.py b/api/tasks/deal_dataset_vector_index_task.py index 075453e283..a27207f2f1 100644 --- a/api/tasks/deal_dataset_vector_index_task.py +++ b/api/tasks/deal_dataset_vector_index_task.py @@ -24,7 +24,7 @@ def deal_dataset_vector_index_task(dataset_id: str, action: str): start_at = time.perf_counter() try: - dataset = Dataset.query.filter_by(id=dataset_id).first() + dataset = db.session.query(Dataset).filter_by(id=dataset_id).first() if not dataset: raise Exception("Dataset not found") diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 2e68dcb0fb..fd1f6265b4 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -44,14 +44,18 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): page_id = data_source_info["notion_page_id"] page_type = data_source_info["type"] page_edited_time = data_source_info["last_edited_time"] - data_source_binding = DataSourceOauthBinding.query.filter( - db.and_( - DataSourceOauthBinding.tenant_id == document.tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + data_source_binding = ( + db.session.query(DataSourceOauthBinding) + .filter( + db.and_( + DataSourceOauthBinding.tenant_id == document.tenant_id, + DataSourceOauthBinding.provider == "notion", + DataSourceOauthBinding.disabled == False, + DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', + ) ) - ).first() + .first() + ) if not data_source_binding: raise ValueError("Data source binding not found.") diff --git a/api/tasks/mail_email_code_login.py b/api/tasks/mail_email_code_login.py index 5dc935548f..ddad331725 100644 --- a/api/tasks/mail_email_code_login.py +++ b/api/tasks/mail_email_code_login.py @@ -6,6 +6,7 @@ from celery import shared_task # type: ignore from flask import render_template from extensions.ext_mail import mail +from services.feature_service import FeatureService @shared_task(queue="mail") @@ -25,10 +26,24 @@ def send_email_code_login_mail_task(language: str, to: str, code: str): # send email code login mail using different languages try: if language == "zh-Hans": - html_content = render_template("email_code_login_mail_template_zh-CN.html", to=to, code=code) + template = "email_code_login_mail_template_zh-CN.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/email_code_login_mail_template_zh-CN.html" + html_content = render_template(template, to=to, code=code, application_title=application_title) + else: + html_content = render_template(template, to=to, code=code) mail.send(to=to, subject="邮箱验证码", html=html_content) else: - html_content = render_template("email_code_login_mail_template_en-US.html", to=to, code=code) + template = "email_code_login_mail_template_en-US.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/email_code_login_mail_template_en-US.html" + html_content = render_template(template, to=to, code=code, application_title=application_title) + else: + html_content = render_template(template, to=to, code=code) mail.send(to=to, subject="Email Code", html=html_content) end_at = time.perf_counter() diff --git a/api/tasks/mail_enterprise_task.py b/api/tasks/mail_enterprise_task.py new file mode 100644 index 0000000000..b9d8fd55df --- /dev/null +++ b/api/tasks/mail_enterprise_task.py @@ -0,0 +1,33 @@ +import logging +import time + +import click +from celery import shared_task # type: ignore +from flask import render_template_string + +from extensions.ext_mail import mail + + +@shared_task(queue="mail") +def send_enterprise_email_task(to, subject, body, substitutions): + if not mail.is_inited(): + return + + logging.info(click.style("Start enterprise mail to {} with subject {}".format(to, subject), fg="green")) + start_at = time.perf_counter() + + try: + html_content = render_template_string(body, **substitutions) + + if isinstance(to, list): + for t in to: + mail.send(to=t, subject=subject, html=html_content) + else: + mail.send(to=to, subject=subject, html=html_content) + + end_at = time.perf_counter() + logging.info( + click.style("Send enterprise mail to {} succeeded: latency: {}".format(to, end_at - start_at), fg="green") + ) + except Exception: + logging.exception("Send enterprise mail to {} failed".format(to)) diff --git a/api/tasks/mail_invite_member_task.py b/api/tasks/mail_invite_member_task.py index 3094527fd4..7ca85c7f2d 100644 --- a/api/tasks/mail_invite_member_task.py +++ b/api/tasks/mail_invite_member_task.py @@ -7,6 +7,7 @@ from flask import render_template from configs import dify_config from extensions.ext_mail import mail +from services.feature_service import FeatureService @shared_task(queue="mail") @@ -33,23 +34,45 @@ def send_invite_member_mail_task(language: str, to: str, token: str, inviter_nam try: url = f"{dify_config.CONSOLE_WEB_URL}/activate?token={token}" if language == "zh-Hans": - html_content = render_template( - "invite_member_mail_template_zh-CN.html", - to=to, - inviter_name=inviter_name, - workspace_name=workspace_name, - url=url, - ) - mail.send(to=to, subject="立即加入 Dify 工作空间", html=html_content) + template = "invite_member_mail_template_zh-CN.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/invite_member_mail_template_zh-CN.html" + html_content = render_template( + template, + to=to, + inviter_name=inviter_name, + workspace_name=workspace_name, + url=url, + application_title=application_title, + ) + mail.send(to=to, subject=f"立即加入 {application_title} 工作空间", html=html_content) + else: + html_content = render_template( + template, to=to, inviter_name=inviter_name, workspace_name=workspace_name, url=url + ) + mail.send(to=to, subject="立即加入 Dify 工作空间", html=html_content) else: - html_content = render_template( - "invite_member_mail_template_en-US.html", - to=to, - inviter_name=inviter_name, - workspace_name=workspace_name, - url=url, - ) - mail.send(to=to, subject="Join Dify Workspace Now", html=html_content) + template = "invite_member_mail_template_en-US.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/invite_member_mail_template_en-US.html" + html_content = render_template( + template, + to=to, + inviter_name=inviter_name, + workspace_name=workspace_name, + url=url, + application_title=application_title, + ) + mail.send(to=to, subject=f"Join {application_title} Workspace Now", html=html_content) + else: + html_content = render_template( + template, to=to, inviter_name=inviter_name, workspace_name=workspace_name, url=url + ) + mail.send(to=to, subject="Join Dify Workspace Now", html=html_content) end_at = time.perf_counter() logging.info( diff --git a/api/tasks/mail_reset_password_task.py b/api/tasks/mail_reset_password_task.py index d5be94431b..d4f4482a48 100644 --- a/api/tasks/mail_reset_password_task.py +++ b/api/tasks/mail_reset_password_task.py @@ -6,6 +6,7 @@ from celery import shared_task # type: ignore from flask import render_template from extensions.ext_mail import mail +from services.feature_service import FeatureService @shared_task(queue="mail") @@ -25,11 +26,27 @@ def send_reset_password_mail_task(language: str, to: str, code: str): # send reset password mail using different languages try: if language == "zh-Hans": - html_content = render_template("reset_password_mail_template_zh-CN.html", to=to, code=code) - mail.send(to=to, subject="设置您的 Dify 密码", html=html_content) + template = "reset_password_mail_template_zh-CN.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/reset_password_mail_template_zh-CN.html" + html_content = render_template(template, to=to, code=code, application_title=application_title) + mail.send(to=to, subject=f"设置您的 {application_title} 密码", html=html_content) + else: + html_content = render_template(template, to=to, code=code) + mail.send(to=to, subject="设置您的 Dify 密码", html=html_content) else: - html_content = render_template("reset_password_mail_template_en-US.html", to=to, code=code) - mail.send(to=to, subject="Set Your Dify Password", html=html_content) + template = "reset_password_mail_template_en-US.html" + system_features = FeatureService.get_system_features() + if system_features.branding.enabled: + application_title = system_features.branding.application_title + template = "without-brand/reset_password_mail_template_en-US.html" + html_content = render_template(template, to=to, code=code, application_title=application_title) + mail.send(to=to, subject=f"Set Your {application_title} Password", html=html_content) + else: + html_content = render_template(template, to=to, code=code) + mail.send(to=to, subject="Set Your Dify Password", html=html_content) end_at = time.perf_counter() logging.info( diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index f8b804d93e..2690eda467 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -7,13 +7,12 @@ from celery import shared_task # type: ignore from sqlalchemy import delete from sqlalchemy.exc import SQLAlchemyError -from core.workflow.repository import RepositoryFactory from extensions.ext_database import db -from models.dataset import AppDatasetJoin -from models.model import ( +from models import ( ApiToken, AppAnnotationHitHistory, AppAnnotationSetting, + AppDatasetJoin, AppMCPServer, AppModelConfig, Conversation, @@ -32,7 +31,7 @@ from models.model import ( ) from models.tools import WorkflowToolProvider from models.web import PinnedConversation, SavedMessage -from models.workflow import ConversationVariable, Workflow, WorkflowAppLog, WorkflowRun +from models.workflow import ConversationVariable, Workflow, WorkflowAppLog, WorkflowNodeExecution, WorkflowRun @shared_task(queue="app_deletion", bind=True, max_retries=3) @@ -202,20 +201,18 @@ def _delete_app_workflow_runs(tenant_id: str, app_id: str): def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): - # Create a repository instance for WorkflowNodeExecution - repository = RepositoryFactory.create_workflow_node_execution_repository( - params={ - "tenant_id": tenant_id, - "app_id": app_id, - "session_factory": db.session.get_bind(), - } + def del_workflow_node_execution(workflow_node_execution_id: str): + db.session.query(WorkflowNodeExecution).filter(WorkflowNodeExecution.id == workflow_node_execution_id).delete( + synchronize_session=False + ) + + _delete_records( + """select id from workflow_node_executions where tenant_id=:tenant_id and app_id=:app_id limit 1000""", + {"tenant_id": tenant_id, "app_id": app_id}, + del_workflow_node_execution, + "workflow node execution", ) - # Use the clear method to delete all records for this tenant_id and app_id - repository.clear() - - logging.info(click.style(f"Deleted workflow node executions for tenant {tenant_id} and app {app_id}", fg="green")) - def _delete_app_workflow_app_logs(tenant_id: str, app_id: str): def del_workflow_app_log(workflow_app_log_id: str): diff --git a/api/templates/clean_document_job_mail_template-US.html b/api/templates/clean_document_job_mail_template-US.html index 0f7ddc62a9..2d8f78b46a 100644 --- a/api/templates/clean_document_job_mail_template-US.html +++ b/api/templates/clean_document_job_mail_template-US.html @@ -69,7 +69,7 @@