diff --git a/README.md b/README.md index 5d8221bcbd..0dd0d86254 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ README tlhIngan Hol README in Korean README بالعربية + Türkçe README

diff --git a/README_AR.md b/README_AR.md index c91602721e..25229ef460 100644 --- a/README_AR.md +++ b/README_AR.md @@ -37,6 +37,7 @@ README tlhIngan Hol README in Korean README بالعربية + Türkçe README

diff --git a/README_CN.md b/README_CN.md index 8224001f1a..ebbbe25902 100644 --- a/README_CN.md +++ b/README_CN.md @@ -36,6 +36,7 @@ 上个月的提交次数 上个月的提交次数 上个月的提交次数 + Türkçe README
diff --git a/README_ES.md b/README_ES.md index 84c06a2503..52b78da47d 100644 --- a/README_ES.md +++ b/README_ES.md @@ -36,6 +36,7 @@ Actividad de Commits el último mes Actividad de Commits el último mes Actividad de Commits el último mes + Türkçe README

# diff --git a/README_FR.md b/README_FR.md index 768c9390d8..17a0881284 100644 --- a/README_FR.md +++ b/README_FR.md @@ -36,6 +36,7 @@ Commits le mois dernier Commits le mois dernier Commits le mois dernier + Türkçe README

# diff --git a/README_JA.md b/README_JA.md index f4cccd5271..5828379a74 100644 --- a/README_JA.md +++ b/README_JA.md @@ -36,6 +36,7 @@ 先月のコミット 先月のコミット 先月のコミット + Türkçe README

# diff --git a/README_KL.md b/README_KL.md index 6a15f39bc6..64d2d24858 100644 --- a/README_KL.md +++ b/README_KL.md @@ -36,6 +36,7 @@ Commits last month Commits last month Commits last month + Türkçe README

# diff --git a/README_KR.md b/README_KR.md index bb15fac8ef..2d7db3df4c 100644 --- a/README_KR.md +++ b/README_KR.md @@ -36,6 +36,7 @@ README en Français README tlhIngan Hol 한국어 README + Türkçe README

diff --git a/README_TR.md b/README_TR.md new file mode 100644 index 0000000000..2ae7d440a8 --- /dev/null +++ b/README_TR.md @@ -0,0 +1,253 @@ +![cover-v5-optimized](https://github.com/langgenius/dify/assets/13230914/f9e19af5-61ba-4119-b926-d10c4c06ebab) + +

+ Dify Bulut · + Kendi Sunucunuzda Barındırma · + Dokümantasyon · + Kurumsal Sorgu +

+ +

+ + Statik Rozet + + Statik Rozet + + Discord'da sohbet et + + Twitter'da takip et + + Docker Çekmeleri + + Geçen ay yapılan commitler + + Kapatılan sorunlar + + Tartışma gönderileri +

+ +

+ README in English + 简体中文版自述文件 + 日本語のREADME + README en Español + README en Français + README tlhIngan Hol + README in Korean + README بالعربية + Türkçe README +

+ + +Dify, açık kaynaklı bir LLM uygulama geliştirme platformudur. Sezgisel arayüzü, AI iş akışı, RAG pipeline'ı, ajan yetenekleri, model yönetimi, gözlemlenebilirlik özellikleri ve daha fazlasını birleştirerek, prototipten üretime hızlıca geçmenizi sağlar. İşte temel özelliklerin bir listesi: +

+ +**1. Workflow**: +Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edin, aşağıdaki tüm özellikleri ve daha fazlasını kullanarak. + + + https://github.com/langgenius/dify/assets/13230914/356df23e-1604-483d-80a6-9517ece318aa + + + +**2. Kapsamlı model desteği**: +Çok sayıda çıkarım sağlayıcısı ve kendi kendine barındırılan çözümlerden yüzlerce özel / açık kaynaklı LLM ile sorunsuz entegrasyon sağlar. GPT, Mistral, Llama3 ve OpenAI API uyumlu tüm modelleri kapsar. Desteklenen model sağlayıcılarının tam listesine [buradan](https://docs.dify.ai/getting-started/readme/model-providers) ulaşabilirsiniz. + +![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3) + + +Özür dilerim, haklısınız. Daha anlamlı ve akıcı bir çeviri yapmaya çalışayım. İşte güncellenmiş çeviri: + +**3. Prompt IDE**: + Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz. + +**4. RAG Pipeline**: + Belge alımından bilgi çekmeye kadar geniş kapsamlı RAG yetenekleri. PDF'ler, PPT'ler ve diğer yaygın belge formatlarından metin çıkarma için hazır destek sunar. + +**5. Ajan yetenekleri**: + LLM Fonksiyon Çağırma veya ReAct'a dayalı ajanlar tanımlayabilir ve bu ajanlara önceden hazırlanmış veya özel araçlar ekleyebilirsiniz. Dify, AI ajanları için Google Arama, DALL·E, Stable Diffusion ve WolframAlpha gibi 50'den fazla yerleşik araç sağlar. + +**6. LLMOps**: + Uygulama loglarını ve performans metriklerini zaman içinde izleme ve analiz etme imkanı. Üretim ortamından elde edilen verilere ve kullanıcı geri bildirimlerine dayanarak, prompt'ları, veri setlerini ve modelleri sürekli olarak optimize edebilirsiniz. Bu sayede, AI uygulamanızın performansını ve doğruluğunu sürekli olarak artırabilirsiniz. + +**7. Hizmet Olarak Backend**: + Dify'ın tüm özellikleri ilgili API'lerle birlikte gelir, böylece Dify'ı kendi iş mantığınıza kolayca entegre edebilirsiniz. + + +## Özellik karşılaştırması + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ÖzellikDify.AILangChainFlowiseOpenAI Assistants API
Programlama YaklaşımıAPI + Uygulama odaklıPython KoduUygulama odaklıAPI odaklı
Desteklenen LLM'lerZengin ÇeşitlilikZengin ÇeşitlilikZengin ÇeşitlilikYalnızca OpenAI
RAG Motoru
Ajan
İş Akışı
Gözlemlenebilirlik
Kurumsal Özellikler (SSO/Erişim kontrolü)
Yerel Dağıtım
+ +## Dify'ı Kullanma + +- **Cloud
** +İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde: +- +Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir. + +- **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma
** +Bu [başlangıç kılavuzu](#quick-start) ile Dify'ı kendi ortamınızda hızlıca çalıştırın. +Daha fazla referans ve detaylı talimatlar için [dokümantasyonumuzu](https://docs.dify.ai) kullanın. + +- **Kurumlar / organizasyonlar için Dify
** +Ek kurumsal odaklı özellikler sunuyoruz. Kurumsal ihtiyaçları görüşmek için [bizimle bir toplantı planlayın](https://cal.com/guchenhe/30min) veya [bize bir e-posta gönderin](mailto:business@dify.ai?subject=[GitHub]Business%20License%20Inquiry).
+ > AWS kullanan startuplar ve küçük işletmeler için, [AWS Marketplace'deki Dify Premium'a](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) göz atın ve tek tıklamayla kendi AWS VPC'nize dağıtın. Bu, özel logo ve marka ile uygulamalar oluşturma seçeneğine sahip uygun fiyatlı bir AMI teklifdir. + +## Güncel Kalma + +GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun. + +![bizi-yıldızlayın](https://github.com/langgenius/dify/assets/13230914/b823edc1-6388-4e25-ad45-2f6b187adbb4) + + + +## Hızlı başlangıç +> Dify'ı kurmadan önce, makinenizin aşağıdaki minimum sistem gereksinimlerini karşıladığından emin olun: +> +>- CPU >= 2 Çekirdek +>- RAM >= 4GB + +
+İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde: + +Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun: + +```bash +cd docker +cp .env.example .env +docker compose up -d +``` + +Çalıştırdıktan sonra, tarayıcınızda [http://localhost/install](http://localhost/install) adresinden Dify kontrol paneline erişebilir ve başlangıç ayarları sürecini başlatabilirsiniz. + +> Eğer Dify'a katkıda bulunmak veya ek geliştirmeler yapmak isterseniz, [kaynak koddan dağıtım kılavuzumuza](https://docs.dify.ai/getting-started/install-self-hosted/local-source-code) başvurun. + +## Sonraki adımlar + +Yapılandırmayı özelleştirmeniz gerekiyorsa, lütfen [.env.example](docker/.env.example) dosyamızdaki yorumlara bakın ve `.env` dosyanızdaki ilgili değerleri güncelleyin. Ayrıca, spesifik dağıtım ortamınıza ve gereksinimlerinize bağlı olarak `docker-compose.yaml` dosyasının kendisinde de, imaj sürümlerini, port eşlemelerini veya hacim bağlantılarını değiştirmek gibi ayarlamalar yapmanız gerekebilir. Herhangi bir değişiklik yaptıktan sonra, lütfen `docker-compose up -d` komutunu tekrar çalıştırın. Kullanılabilir tüm ortam değişkenlerinin tam listesini [burada](https://docs.dify.ai/getting-started/install-self-hosted/environments) bulabilirsiniz. + +Yüksek kullanılabilirliğe sahip bir kurulum yapılandırmak isterseniz, Dify'ın Kubernetes üzerine dağıtılmasına olanak tanıyan topluluk katkılı [Helm Charts](https://helm.sh/) ve YAML dosyaları mevcuttur. + +- [@LeoQuote tarafından Helm Chart](https://github.com/douban/charts/tree/master/charts/dify) +- [@BorisPolonsky tarafından Helm Chart](https://github.com/BorisPolonsky/dify-helm) +- [@Winson-030 tarafından YAML dosyası](https://github.com/Winson-030/dify-kubernetes) + +#### Dağıtım için Terraform Kullanımı + +##### Azure Global +[Terraform](https://www.terraform.io/) kullanarak Dify'ı Azure'a tek tıklamayla dağıtın. +- [@nikawang tarafından Azure Terraform](https://github.com/nikawang/dify-azure-terraform) + +## Katkıda Bulunma + +Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz. +Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda paylaşarak desteklemeyi düşünün. + +> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın. + +**Katkıda Bulunanlar** + + + + + +## Topluluk & iletişim + +* [Github Tartışmaları](https://github.com/langgenius/dify/discussions). En uygun: geri bildirim paylaşmak ve soru sormak için. +* [GitHub Sorunları](https://github.com/langgenius/dify/issues). En uygun: Dify.AI kullanırken karşılaştığınız hatalar ve özellik önerileri için. [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakın. +* [Discord](https://discord.gg/FngNHpbcY7). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. +* [Twitter](https://twitter.com/dify_ai). En uygun: uygulamalarınızı paylaşmak ve toplulukla vakit geçirmek için. + +Veya doğrudan bir ekip üyesiyle toplantı planlayın: + + + + + + + + + + + + + + +
İletişim NoktasıAmaç
Git-Hub-README-Button-3xİş sorgulamaları & ürün geri bildirimleri
Git-Hub-README-Button-2xKatkılar, sorunlar & özellik istekleri
+ +## Star history + +[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date) + +## Güvenlik açıklaması + +Gizliliğinizi korumak için, lütfen güvenlik sorunlarını GitHub'da paylaşmaktan kaçının. Bunun yerine, sorularınızı security@dify.ai adresine gönderin ve size daha detaylı bir cevap vereceğiz. + +## Lisans + +Bu depo, temel olarak Apache 2.0 lisansı ve birkaç ek kısıtlama içeren [Dify Açık Kaynak Lisansı](LICENSE) altında kullanıma sunulmuştur. diff --git a/api/constants/languages.py b/api/constants/languages.py index efc668d4ee..023d2f18a6 100644 --- a/api/constants/languages.py +++ b/api/constants/languages.py @@ -15,6 +15,7 @@ language_timezone_mapping = { 'ro-RO': 'Europe/Bucharest', 'pl-PL': 'Europe/Warsaw', 'hi-IN': 'Asia/Kolkata', + 'tr-TR': 'Europe/Istanbul', } languages = list(language_timezone_mapping.keys()) diff --git a/api/controllers/inner_api/wraps.py b/api/controllers/inner_api/wraps.py index 2c3c870bce..5c37f5276f 100644 --- a/api/controllers/inner_api/wraps.py +++ b/api/controllers/inner_api/wraps.py @@ -19,7 +19,7 @@ def inner_api_only(view): # get header 'X-Inner-Api-Key' inner_api_key = request.headers.get('X-Inner-Api-Key') if not inner_api_key or inner_api_key != dify_config.INNER_API_KEY: - abort(404) + abort(401) return view(*args, **kwargs) diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index 02158f8b56..44bda8e771 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -53,7 +53,7 @@ class ConversationDetailApi(Resource): ConversationService.delete(app_model, conversation_id, end_user) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"}, 204 + return {'result': 'success'}, 200 class ConversationRenameApi(Resource): diff --git a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py index 13da5514d1..ec17db5f06 100644 --- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py @@ -91,7 +91,8 @@ class DatasetConfigManager: top_k=dataset_configs.get('top_k', 4), score_threshold=dataset_configs.get('score_threshold'), reranking_model=dataset_configs.get('reranking_model'), - weights=dataset_configs.get('weights') + weights=dataset_configs.get('weights'), + reranking_enabled=dataset_configs.get('reranking_enabled', True), ) ) diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 9133a35c08..a490ddd670 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -158,10 +158,11 @@ class DatasetRetrieveConfigEntity(BaseModel): retrieve_strategy: RetrieveStrategy top_k: Optional[int] = None - score_threshold: Optional[float] = None + score_threshold: Optional[float] = .0 rerank_mode: Optional[str] = 'reranking_model' reranking_model: Optional[dict] = None weights: Optional[dict] = None + reranking_enabled: Optional[bool] = True diff --git a/api/core/helper/code_executor/code_executor.py b/api/core/helper/code_executor/code_executor.py index 5b69d3af4b..afb2bbbbf3 100644 --- a/api/core/helper/code_executor/code_executor.py +++ b/api/core/helper/code_executor/code_executor.py @@ -107,11 +107,11 @@ class CodeExecutor: response = response.json() except: raise CodeExecutionException('Failed to parse response') + + if (code := response.get('code')) != 0: + raise CodeExecutionException(f"Got error code: {code}. Got error msg: {response.get('message')}") response = CodeExecutionResponse(**response) - - if response.code != 0: - raise CodeExecutionException(response.message) if response.data.error: raise CodeExecutionException(response.data.error) diff --git a/api/core/hosting_configuration.py b/api/core/hosting_configuration.py index 45ad1b51bf..5f7fec5833 100644 --- a/api/core/hosting_configuration.py +++ b/api/core/hosting_configuration.py @@ -73,6 +73,8 @@ class HostingConfiguration: quota_limit=hosted_quota_limit, restrict_models=[ RestrictModel(model="gpt-4", base_model_name="gpt-4", model_type=ModelType.LLM), + RestrictModel(model="gpt-4o", base_model_name="gpt-4o", model_type=ModelType.LLM), + RestrictModel(model="gpt-4o-mini", base_model_name="gpt-4o-mini", model_type=ModelType.LLM), RestrictModel(model="gpt-4-32k", base_model_name="gpt-4-32k", model_type=ModelType.LLM), RestrictModel(model="gpt-4-1106-preview", base_model_name="gpt-4-1106-preview", model_type=ModelType.LLM), RestrictModel(model="gpt-4-vision-preview", base_model_name="gpt-4-vision-preview", model_type=ModelType.LLM), diff --git a/api/core/model_runtime/model_providers/anthropic/llm/llm.py b/api/core/model_runtime/model_providers/anthropic/llm/llm.py index 107efe4867..19ce401999 100644 --- a/api/core/model_runtime/model_providers/anthropic/llm/llm.py +++ b/api/core/model_runtime/model_providers/anthropic/llm/llm.py @@ -116,7 +116,8 @@ class AnthropicLargeLanguageModel(LargeLanguageModel): # Add the new header for claude-3-5-sonnet-20240620 model extra_headers = {} if model == "claude-3-5-sonnet-20240620": - extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15" + if model_parameters.get('max_tokens') > 4096: + extra_headers["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15" if tools: extra_model_kwargs['tools'] = [ diff --git a/api/core/model_runtime/model_providers/azure_openai/_constant.py b/api/core/model_runtime/model_providers/azure_openai/_constant.py index 63a0b5c8be..984cca3744 100644 --- a/api/core/model_runtime/model_providers/azure_openai/_constant.py +++ b/api/core/model_runtime/model_providers/azure_openai/_constant.py @@ -496,6 +496,158 @@ LLM_BASE_MODELS = [ ) ) ), + AzureBaseModel( + base_model_name='gpt-4o-mini', + entity=AIModelEntity( + model='fake-deployment-name', + label=I18nObject( + en_US='fake-deployment-name-label', + ), + model_type=ModelType.LLM, + features=[ + ModelFeature.AGENT_THOUGHT, + ModelFeature.VISION, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: LLMMode.CHAT.value, + ModelPropertyKey.CONTEXT_SIZE: 128000, + }, + parameter_rules=[ + ParameterRule( + name='temperature', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name='top_p', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( + name='presence_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY], + ), + ParameterRule( + name='frequency_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY], + ), + _get_max_tokens(default=512, min_val=1, max_val=16384), + ParameterRule( + name='seed', + label=I18nObject( + zh_Hans='种子', + en_US='Seed' + ), + type='int', + help=I18nObject( + zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。', + en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.' + ), + required=False, + precision=2, + min=0, + max=1, + ), + ParameterRule( + name='response_format', + label=I18nObject( + zh_Hans='回复格式', + en_US='response_format' + ), + type='string', + help=I18nObject( + zh_Hans='指定模型必须输出的格式', + en_US='specifying the format that the model must output' + ), + required=False, + options=['text', 'json_object'] + ), + ], + pricing=PriceConfig( + input=0.150, + output=0.600, + unit=0.000001, + currency='USD', + ) + ) + ), + AzureBaseModel( + base_model_name='gpt-4o-mini-2024-07-18', + entity=AIModelEntity( + model='fake-deployment-name', + label=I18nObject( + en_US='fake-deployment-name-label', + ), + model_type=ModelType.LLM, + features=[ + ModelFeature.AGENT_THOUGHT, + ModelFeature.VISION, + ModelFeature.MULTI_TOOL_CALL, + ModelFeature.STREAM_TOOL_CALL, + ], + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ + ModelPropertyKey.MODE: LLMMode.CHAT.value, + ModelPropertyKey.CONTEXT_SIZE: 128000, + }, + parameter_rules=[ + ParameterRule( + name='temperature', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE], + ), + ParameterRule( + name='top_p', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P], + ), + ParameterRule( + name='presence_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY], + ), + ParameterRule( + name='frequency_penalty', + **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY], + ), + _get_max_tokens(default=512, min_val=1, max_val=16384), + ParameterRule( + name='seed', + label=I18nObject( + zh_Hans='种子', + en_US='Seed' + ), + type='int', + help=I18nObject( + zh_Hans='如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint 响应参数来监视变化。', + en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.' + ), + required=False, + precision=2, + min=0, + max=1, + ), + ParameterRule( + name='response_format', + label=I18nObject( + zh_Hans='回复格式', + en_US='response_format' + ), + type='string', + help=I18nObject( + zh_Hans='指定模型必须输出的格式', + en_US='specifying the format that the model must output' + ), + required=False, + options=['text', 'json_object'] + ), + ], + pricing=PriceConfig( + input=0.150, + output=0.600, + unit=0.000001, + currency='USD', + ) + ) + ), AzureBaseModel( base_model_name='gpt-4o', entity=AIModelEntity( diff --git a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml index 875e94167d..be4d4651d7 100644 --- a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml +++ b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml @@ -114,6 +114,18 @@ model_credential_schema: show_on: - variable: __model_type value: llm + - label: + en_US: gpt-4o-mini + value: gpt-4o-mini + show_on: + - variable: __model_type + value: llm + - label: + en_US: gpt-4o-mini-2024-07-18 + value: gpt-4o-mini-2024-07-18 + show_on: + - variable: __model_type + value: llm - label: en_US: gpt-4o value: gpt-4o diff --git a/api/core/model_runtime/model_providers/azure_openai/tts/tts.py b/api/core/model_runtime/model_providers/azure_openai/tts/tts.py index 50c125b873..3d2bac1c31 100644 --- a/api/core/model_runtime/model_providers/azure_openai/tts/tts.py +++ b/api/core/model_runtime/model_providers/azure_openai/tts/tts.py @@ -1,12 +1,8 @@ import concurrent.futures import copy -from functools import reduce -from io import BytesIO from typing import Optional -from flask import Response from openai import AzureOpenAI -from pydub import AudioSegment from core.model_runtime.entities.model_entities import AIModelEntity from core.model_runtime.errors.invoke import InvokeBadRequestError @@ -51,7 +47,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel): :return: text translated to audio file """ try: - self._tts_invoke( + self._tts_invoke_streaming( model=model, credentials=credentials, content_text='Hello Dify!', @@ -60,45 +56,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel): except Exception as ex: raise CredentialsValidateFailedError(str(ex)) - def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response: - """ - _tts_invoke text2speech model - - :param model: model name - :param credentials: model credentials - :param content_text: text content to be translated - :param voice: model timbre - :return: text translated to audio file - """ - audio_type = self._get_model_audio_type(model, credentials) - word_limit = self._get_model_word_limit(model, credentials) - max_workers = self._get_model_workers_limit(model, credentials) - try: - sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit)) - audio_bytes_list = [] - - # Create a thread pool and map the function to the list of sentences - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice, - credentials=credentials) for sentence in sentences] - for future in futures: - try: - if future.result(): - audio_bytes_list.append(future.result()) - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - - if len(audio_bytes_list) > 0: - audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in - audio_bytes_list if audio_bytes] - combined_segment = reduce(lambda x, y: x + y, audio_segments) - buffer: BytesIO = BytesIO() - combined_segment.export(buffer, format=audio_type) - buffer.seek(0) - return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}") - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> any: """ @@ -144,7 +101,6 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel): :param sentence: text content to be translated :return: text translated to audio file """ - # transform credentials to kwargs for model instance credentials_kwargs = self._to_credential_kwargs(credentials) client = AzureOpenAI(**credentials_kwargs) response = client.audio.speech.create(model=model, voice=voice, input=sentence.strip()) diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-pro.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-pro.yaml index d3b1b6d8b6..b173ffbe77 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-pro.yaml +++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-pro.yaml @@ -21,6 +21,16 @@ parameter_rules: default: 1024 min: 1 max: 32000 + - name: enable_enhance + label: + zh_Hans: 功能增强 + en_US: Enable Enhancement + type: boolean + help: + zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false + default: true pricing: input: '0.03' output: '0.10' diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard-256k.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard-256k.yaml index 3b28317497..1f94a8623b 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard-256k.yaml +++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard-256k.yaml @@ -21,6 +21,16 @@ parameter_rules: default: 1024 min: 1 max: 256000 + - name: enable_enhance + label: + zh_Hans: 功能增强 + en_US: Enable Enhancement + type: boolean + help: + zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false + default: true pricing: input: '0.015' output: '0.06' diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard.yaml b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard.yaml index 88b27f51c4..1db25930fc 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard.yaml +++ b/api/core/model_runtime/model_providers/hunyuan/llm/hunyuan-standard.yaml @@ -21,6 +21,16 @@ parameter_rules: default: 1024 min: 1 max: 32000 + - name: enable_enhance + label: + zh_Hans: 功能增强 + en_US: Enable Enhancement + type: boolean + help: + zh_Hans: 功能增强(如搜索)开关,关闭时将直接由主模型生成回复内容,可以降低响应时延(对于流式输出时的首字时延尤为明显)。但在少数场景里,回复效果可能会下降。 + en_US: Allow the model to perform external search to enhance the generation results. + required: false + default: true pricing: input: '0.0045' output: '0.0005' diff --git a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py index 8859dd72bd..6d22f9d2d6 100644 --- a/api/core/model_runtime/model_providers/hunyuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/hunyuan/llm/llm.py @@ -36,7 +36,8 @@ class HunyuanLargeLanguageModel(LargeLanguageModel): custom_parameters = { 'Temperature': model_parameters.get('temperature', 0.0), - 'TopP': model_parameters.get('top_p', 1.0) + 'TopP': model_parameters.get('top_p', 1.0), + 'EnableEnhancement': model_parameters.get('enable_enhance', True) } params = { diff --git a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml index 2401f2a890..6cc197b70b 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml +++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml @@ -2,6 +2,9 @@ - google/codegemma-7b - google/recurrentgemma-2b - meta/llama2-70b +- meta/llama-3.1-8b-instruct +- meta/llama-3.1-70b-instruct +- meta/llama-3.1-405b-instruct - meta/llama3-8b-instruct - meta/llama3-70b-instruct - mistralai/mistral-large diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml new file mode 100644 index 0000000000..5472de9902 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-405b-instruct +label: + zh_Hans: meta/llama-3.1-405b-instruct + en_US: meta/llama-3.1-405b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalt + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml new file mode 100644 index 0000000000..16af0554a1 --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-70b-instruct +label: + zh_Hans: meta/llama-3.1-70b-instruct + en_US: meta/llama-3.1-70b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml new file mode 100644 index 0000000000..f2d43dc30e --- /dev/null +++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml @@ -0,0 +1,36 @@ +model: meta/llama-3.1-8b-instruct +label: + zh_Hans: meta/llama-3.1-8b-instruct + en_US: meta/llama-3.1-8b-instruct +model_type: llm +features: + - agent-thought +model_properties: + mode: chat + context_size: 131072 +parameter_rules: + - name: temperature + use_template: temperature + min: 0 + max: 1 + default: 0.5 + - name: top_p + use_template: top_p + min: 0 + max: 1 + default: 1 + - name: max_tokens + use_template: max_tokens + min: 1 + max: 4096 + default: 1024 + - name: frequency_penalty + use_template: frequency_penalty + min: -2 + max: 2 + default: 0 + - name: presence_penalty + use_template: presence_penalty + min: -2 + max: 2 + default: 0 diff --git a/api/core/model_runtime/model_providers/nvidia/llm/llm.py b/api/core/model_runtime/model_providers/nvidia/llm/llm.py index 11252b9211..494b7374f5 100644 --- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py +++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py @@ -31,6 +31,9 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel): 'meta/llama2-70b': '', 'meta/llama3-8b-instruct': '', 'meta/llama3-70b-instruct': '', + 'meta/llama-3.1-8b-instruct': '', + 'meta/llama-3.1-70b-instruct': '', + 'meta/llama-3.1-405b-instruct': '', 'google/recurrentgemma-2b': '' } diff --git a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py index fd73728b78..069de9acec 100644 --- a/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py +++ b/api/core/model_runtime/model_providers/ollama/text_embedding/text_embedding.py @@ -59,7 +59,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel): if not endpoint_url.endswith('/'): endpoint_url += '/' - endpoint_url = urljoin(endpoint_url, 'api/embeddings') + endpoint_url = urljoin(endpoint_url, 'api/embed') # get model properties context_size = self._get_context_size(model, credentials) @@ -78,32 +78,28 @@ class OllamaEmbeddingModel(TextEmbeddingModel): else: inputs.append(text) - batched_embeddings = [] + # Prepare the payload for the request + payload = { + 'input': inputs, + 'model': model, + } - for text in inputs: - # Prepare the payload for the request - payload = { - 'prompt': text, - 'model': model, - } + # Make the request to the OpenAI API + response = requests.post( + endpoint_url, + headers=headers, + data=json.dumps(payload), + timeout=(10, 300) + ) - # Make the request to the OpenAI API - response = requests.post( - endpoint_url, - headers=headers, - data=json.dumps(payload), - timeout=(10, 300) - ) + response.raise_for_status() # Raise an exception for HTTP errors + response_data = response.json() - response.raise_for_status() # Raise an exception for HTTP errors - response_data = response.json() + # Extract embeddings and used tokens from the response + embeddings = response_data['embeddings'] + embedding_used_tokens = self.get_num_tokens(model, credentials, inputs) - # Extract embeddings and used tokens from the response - embeddings = response_data['embedding'] - embedding_used_tokens = self.get_num_tokens(model, credentials, [text]) - - used_tokens += embedding_used_tokens - batched_embeddings.append(embeddings) + used_tokens += embedding_used_tokens # calc usage usage = self._calc_response_usage( @@ -113,7 +109,7 @@ class OllamaEmbeddingModel(TextEmbeddingModel): ) return TextEmbeddingResult( - embeddings=batched_embeddings, + embeddings=embeddings, usage=usage, model=model ) diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py index d3fcf731f1..afa5d4b88a 100644 --- a/api/core/model_runtime/model_providers/openai/tts/tts.py +++ b/api/core/model_runtime/model_providers/openai/tts/tts.py @@ -1,11 +1,7 @@ import concurrent.futures -from functools import reduce -from io import BytesIO from typing import Optional -from flask import Response from openai import OpenAI -from pydub import AudioSegment from core.model_runtime.errors.invoke import InvokeBadRequestError from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -32,7 +28,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): :return: text translated to audio file """ - if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]: + if not voice or voice not in [d['value'] for d in + self.get_tts_model_voices(model=model, credentials=credentials)]: voice = self._get_model_default_voice(model, credentials) # if streaming: return self._tts_invoke_streaming(model=model, @@ -50,7 +47,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): :return: text translated to audio file """ try: - self._tts_invoke( + self._tts_invoke_streaming( model=model, credentials=credentials, content_text='Hello Dify!', @@ -59,46 +56,6 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): except Exception as ex: raise CredentialsValidateFailedError(str(ex)) - def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response: - """ - _tts_invoke text2speech model - - :param model: model name - :param credentials: model credentials - :param content_text: text content to be translated - :param voice: model timbre - :return: text translated to audio file - """ - audio_type = self._get_model_audio_type(model, credentials) - word_limit = self._get_model_word_limit(model, credentials) - max_workers = self._get_model_workers_limit(model, credentials) - try: - sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit)) - audio_bytes_list = [] - - # Create a thread pool and map the function to the list of sentences - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [executor.submit(self._process_sentence, sentence=sentence, model=model, voice=voice, - credentials=credentials) for sentence in sentences] - for future in futures: - try: - if future.result(): - audio_bytes_list.append(future.result()) - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - - if len(audio_bytes_list) > 0: - audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in - audio_bytes_list if audio_bytes] - combined_segment = reduce(lambda x, y: x + y, audio_segments) - buffer: BytesIO = BytesIO() - combined_segment.export(buffer, format=audio_type) - buffer.seek(0) - return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}") - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - - def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> any: """ @@ -114,7 +71,8 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): # doc: https://platform.openai.com/docs/guides/text-to-speech credentials_kwargs = self._to_credential_kwargs(credentials) client = OpenAI(**credentials_kwargs) - model_support_voice = [x.get("value") for x in self.get_tts_model_voices(model=model, credentials=credentials)] + model_support_voice = [x.get("value") for x in + self.get_tts_model_voices(model=model, credentials=credentials)] if not voice or voice not in model_support_voice: voice = self._get_model_default_voice(model, credentials) word_limit = self._get_model_word_limit(model, credentials) diff --git a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml index 7d68e708b7..a489ce1b5a 100644 --- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml +++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-405b-instruct.yaml @@ -4,7 +4,7 @@ label: model_type: llm model_properties: mode: chat - context_size: 128000 + context_size: 131072 parameter_rules: - name: temperature use_template: temperature @@ -15,9 +15,9 @@ parameter_rules: required: true default: 512 min: 1 - max: 128000 + max: 131072 pricing: - input: "3" - output: "3" + input: "2.7" + output: "2.7" unit: "0.000001" currency: USD diff --git a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml index 78e3b45435..12037411b1 100644 --- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml +++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-70b-instruct.yaml @@ -4,7 +4,7 @@ label: model_type: llm model_properties: mode: chat - context_size: 128000 + context_size: 131072 parameter_rules: - name: temperature use_template: temperature @@ -15,9 +15,9 @@ parameter_rules: required: true default: 512 min: 1 - max: 128000 + max: 131072 pricing: - input: "0.9" - output: "0.9" + input: "0.52" + output: "0.75" unit: "0.000001" currency: USD diff --git a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml index 6e69b7deb7..6f06493f29 100644 --- a/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml +++ b/api/core/model_runtime/model_providers/openrouter/llm/llama-3.1-8b-instruct.yaml @@ -4,7 +4,7 @@ label: model_type: llm model_properties: mode: chat - context_size: 128000 + context_size: 131072 parameter_rules: - name: temperature use_template: temperature @@ -15,9 +15,9 @@ parameter_rules: required: true default: 512 min: 1 - max: 128000 + max: 131072 pricing: - input: "0.2" - output: "0.2" + input: "0.06" + output: "0.06" unit: "0.000001" currency: USD diff --git a/api/core/model_runtime/model_providers/tongyi/llm/llm.py b/api/core/model_runtime/model_providers/tongyi/llm/llm.py index 6f768131fb..a75db78d8c 100644 --- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py +++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py @@ -497,12 +497,13 @@ You should also complete the text started with ``` but not tell ``` directly. content = prompt_message.content if not content: content = ' ' - tongyi_messages.append({ + message = { 'role': 'assistant', - 'content': content if not rich_content else [{"text": content}], - 'tool_calls': [tool_call.model_dump() for tool_call in - prompt_message.tool_calls] if prompt_message.tool_calls else None - }) + 'content': content if not rich_content else [{"text": content}] + } + if prompt_message.tool_calls: + message['tool_calls'] = [tool_call.model_dump() for tool_call in prompt_message.tool_calls] + tongyi_messages.append(message) elif isinstance(prompt_message, ToolPromptMessage): tongyi_messages.append({ "role": "tool", diff --git a/api/core/model_runtime/model_providers/tongyi/tts/tts.py b/api/core/model_runtime/model_providers/tongyi/tts/tts.py index 655ed2d1d0..664b02cd92 100644 --- a/api/core/model_runtime/model_providers/tongyi/tts/tts.py +++ b/api/core/model_runtime/model_providers/tongyi/tts/tts.py @@ -1,7 +1,4 @@ -import concurrent.futures import threading -from functools import reduce -from io import BytesIO from queue import Queue from typing import Optional @@ -9,8 +6,6 @@ import dashscope from dashscope import SpeechSynthesizer from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse from dashscope.audio.tts import ResultCallback, SpeechSynthesisResult -from flask import Response -from pydub import AudioSegment from core.model_runtime.errors.invoke import InvokeBadRequestError from core.model_runtime.errors.validate import CredentialsValidateFailedError @@ -55,7 +50,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): :return: text translated to audio file """ try: - self._tts_invoke( + self._tts_invoke_streaming( model=model, credentials=credentials, content_text='Hello Dify!', @@ -64,46 +59,6 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): except Exception as ex: raise CredentialsValidateFailedError(str(ex)) - def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> Response: - """ - _tts_invoke text2speech model - - :param model: model name - :param credentials: model credentials - :param voice: model timbre - :param content_text: text content to be translated - :return: text translated to audio file - """ - audio_type = self._get_model_audio_type(model, credentials) - word_limit = self._get_model_word_limit(model, credentials) - max_workers = self._get_model_workers_limit(model, credentials) - try: - sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit)) - audio_bytes_list = [] - - # Create a thread pool and map the function to the list of sentences - with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [executor.submit(self._process_sentence, sentence=sentence, - credentials=credentials, voice=voice, audio_type=audio_type) for sentence in - sentences] - for future in futures: - try: - if future.result(): - audio_bytes_list.append(future.result()) - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - - if len(audio_bytes_list) > 0: - audio_segments = [AudioSegment.from_file(BytesIO(audio_bytes), format=audio_type) for audio_bytes in - audio_bytes_list if audio_bytes] - combined_segment = reduce(lambda x, y: x + y, audio_segments) - buffer: BytesIO = BytesIO() - combined_segment.export(buffer, format=audio_type) - buffer.seek(0) - return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}") - except Exception as ex: - raise InvokeBadRequestError(str(ex)) - def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> any: """ diff --git a/api/core/model_runtime/model_providers/xinference/rerank/rerank.py b/api/core/model_runtime/model_providers/xinference/rerank/rerank.py index 649898f47a..b361806bcd 100644 --- a/api/core/model_runtime/model_providers/xinference/rerank/rerank.py +++ b/api/core/model_runtime/model_providers/xinference/rerank/rerank.py @@ -57,6 +57,7 @@ class XinferenceRerankModel(RerankModel): documents=docs, query=query, top_n=top_n, + return_documents=True ) except RuntimeError as e: raise InvokeServerUnavailableError(str(e)) @@ -66,7 +67,7 @@ class XinferenceRerankModel(RerankModel): for idx, result in enumerate(response['results']): # format document index = result['index'] - page_content = result['document'] + page_content = result['document'] if isinstance(result['document'], str) else result['document']['text'] rerank_document = RerankDocument( index=index, text=page_content, diff --git a/api/core/model_runtime/model_providers/xinference/tts/__init__.py b/api/core/model_runtime/model_providers/xinference/tts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/model_runtime/model_providers/xinference/tts/tts.py b/api/core/model_runtime/model_providers/xinference/tts/tts.py new file mode 100644 index 0000000000..c106e38781 --- /dev/null +++ b/api/core/model_runtime/model_providers/xinference/tts/tts.py @@ -0,0 +1,240 @@ +import concurrent.futures +from functools import reduce +from io import BytesIO +from typing import Optional + +from flask import Response +from pydub import AudioSegment +from xinference_client.client.restful.restful_client import Client, RESTfulAudioModelHandle + +from core.model_runtime.entities.common_entities import I18nObject +from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType +from core.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from core.model_runtime.errors.validate import CredentialsValidateFailedError +from core.model_runtime.model_providers.__base.tts_model import TTSModel + + +class XinferenceText2SpeechModel(TTSModel): + + def __init__(self): + # preset voices, need support custom voice + self.model_voices = { + 'chattts': { + 'all': [ + {'name': 'Alloy', 'value': 'alloy'}, + {'name': 'Echo', 'value': 'echo'}, + {'name': 'Fable', 'value': 'fable'}, + {'name': 'Onyx', 'value': 'onyx'}, + {'name': 'Nova', 'value': 'nova'}, + {'name': 'Shimmer', 'value': 'shimmer'}, + ] + }, + 'cosyvoice': { + 'zh-Hans': [ + {'name': '中文男', 'value': '中文男'}, + {'name': '中文女', 'value': '中文女'}, + {'name': '粤语女', 'value': '粤语女'}, + ], + 'zh-Hant': [ + {'name': '中文男', 'value': '中文男'}, + {'name': '中文女', 'value': '中文女'}, + {'name': '粤语女', 'value': '粤语女'}, + ], + 'en-US': [ + {'name': '英文男', 'value': '英文男'}, + {'name': '英文女', 'value': '英文女'}, + ], + 'ja-JP': [ + {'name': '日语男', 'value': '日语男'}, + ], + 'ko-KR': [ + {'name': '韩语女', 'value': '韩语女'}, + ] + } + } + + def validate_credentials(self, model: str, credentials: dict) -> None: + """ + Validate model credentials + + :param model: model name + :param credentials: model credentials + :return: + """ + try: + if ("/" in credentials['model_uid'] or + "?" in credentials['model_uid'] or + "#" in credentials['model_uid']): + raise CredentialsValidateFailedError("model_uid should not contain /, ?, or #") + + if credentials['server_url'].endswith('/'): + credentials['server_url'] = credentials['server_url'][:-1] + + # initialize client + client = Client( + base_url=credentials['server_url'] + ) + + xinference_client = client.get_model(model_uid=credentials['model_uid']) + + if not isinstance(xinference_client, RESTfulAudioModelHandle): + raise InvokeBadRequestError( + 'please check model type, the model you want to invoke is not a audio model') + + self._tts_invoke( + model=model, + credentials=credentials, + content_text='Hello Dify!', + voice=self._get_model_default_voice(model, credentials), + ) + except Exception as ex: + raise CredentialsValidateFailedError(str(ex)) + + def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, + user: Optional[str] = None): + """ + _invoke text2speech model + + :param model: model name + :param tenant_id: user tenant id + :param credentials: model credentials + :param voice: model timbre + :param content_text: text content to be translated + :param user: unique user id + :return: text translated to audio file + """ + return self._tts_invoke(model, credentials, content_text, voice) + + def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None: + """ + used to define customizable model schema + """ + + entity = AIModelEntity( + model=model, + label=I18nObject( + en_US=model + ), + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_type=ModelType.TTS, + model_properties={}, + parameter_rules=[] + ) + + return entity + + @property + def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: + """ + Map model invoke error to unified error + The key is the error type thrown to the caller + The value is the error type thrown by the model, + which needs to be converted into a unified error type for the caller. + + :return: Invoke error mapping + """ + return { + InvokeConnectionError: [ + InvokeConnectionError + ], + InvokeServerUnavailableError: [ + InvokeServerUnavailableError + ], + InvokeRateLimitError: [ + InvokeRateLimitError + ], + InvokeAuthorizationError: [ + InvokeAuthorizationError + ], + InvokeBadRequestError: [ + InvokeBadRequestError, + KeyError, + ValueError + ] + } + + def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list: + for key, voices in self.model_voices.items(): + if key in model.lower(): + if language in voices: + return voices[language] + elif 'all' in voices: + return voices['all'] + return [] + + def _get_model_default_voice(self, model: str, credentials: dict) -> any: + return "" + + def _get_model_word_limit(self, model: str, credentials: dict) -> int: + return 3500 + + def _get_model_audio_type(self, model: str, credentials: dict) -> str: + return "mp3" + + def _get_model_workers_limit(self, model: str, credentials: dict) -> int: + return 5 + + def _tts_invoke(self, model: str, credentials: dict, content_text: str, voice: str) -> any: + """ + _tts_invoke text2speech model + + :param model: model name + :param credentials: model credentials + :param voice: model timbre + :param content_text: text content to be translated + :return: text translated to audio file + """ + if credentials['server_url'].endswith('/'): + credentials['server_url'] = credentials['server_url'][:-1] + + word_limit = self._get_model_word_limit(model, credentials) + audio_type = self._get_model_audio_type(model, credentials) + handle = RESTfulAudioModelHandle(credentials['model_uid'], credentials['server_url'], auth_headers={}) + + try: + sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit)) + audio_bytes_list = [] + + with concurrent.futures.ThreadPoolExecutor(max_workers=min((3, len(sentences)))) as executor: + futures = [executor.submit( + handle.speech, input=sentence, voice=voice, response_format="mp3", speed=1.0, stream=False) + for sentence in sentences] + for future in futures: + try: + if future.result(): + audio_bytes_list.append(future.result()) + except Exception as ex: + raise InvokeBadRequestError(str(ex)) + + if len(audio_bytes_list) > 0: + audio_segments = [AudioSegment.from_file( + BytesIO(audio_bytes), format=audio_type) for audio_bytes in + audio_bytes_list if audio_bytes] + combined_segment = reduce(lambda x, y: x + y, audio_segments) + buffer: BytesIO = BytesIO() + combined_segment.export(buffer, format=audio_type) + buffer.seek(0) + return Response(buffer.read(), status=200, mimetype=f"audio/{audio_type}") + except Exception as ex: + raise InvokeBadRequestError(str(ex)) + + def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> any: + """ + _tts_invoke_streaming text2speech model + + Attention: stream api may return error [Parallel generation is not supported by ggml] + + :param model: model name + :param credentials: model credentials + :param voice: model timbre + :param content_text: text content to be translated + :return: text translated to audio file + """ + pass diff --git a/api/core/model_runtime/model_providers/xinference/xinference.yaml b/api/core/model_runtime/model_providers/xinference/xinference.yaml index 9496c66fdd..aca076b6e1 100644 --- a/api/core/model_runtime/model_providers/xinference/xinference.yaml +++ b/api/core/model_runtime/model_providers/xinference/xinference.yaml @@ -17,6 +17,7 @@ supported_model_types: - text-embedding - rerank - speech2text + - tts configurate_methods: - customizable-model model_credential_schema: diff --git a/api/core/rag/data_post_processor/data_post_processor.py b/api/core/rag/data_post_processor/data_post_processor.py index 2ed6d74187..ad9ee4f7cf 100644 --- a/api/core/rag/data_post_processor/data_post_processor.py +++ b/api/core/rag/data_post_processor/data_post_processor.py @@ -37,7 +37,6 @@ class DataPostProcessor: return WeightRerankRunner( tenant_id, Weights( - weight_type=weights['weight_type'], vector_setting=VectorSetting( vector_weight=weights['vector_setting']['vector_weight'], embedding_provider_name=weights['vector_setting']['embedding_provider_name'], diff --git a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py index 82bdc5d4b9..a48224070f 100644 --- a/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +++ b/api/core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py @@ -4,7 +4,7 @@ from typing import Any from uuid import UUID, uuid4 from numpy import ndarray -from pgvecto_rs.sqlalchemy import Vector +from pgvecto_rs.sqlalchemy import VECTOR from pydantic import BaseModel, model_validator from sqlalchemy import Float, String, create_engine, insert, select, text from sqlalchemy import text as sql_text @@ -67,7 +67,7 @@ class PGVectoRS(BaseVector): ) text: Mapped[str] = mapped_column(String) meta: Mapped[dict] = mapped_column(postgresql.JSONB) - vector: Mapped[ndarray] = mapped_column(Vector(dim)) + vector: Mapped[ndarray] = mapped_column(VECTOR(dim)) self._table = _Table self._distance_op = "<=>" diff --git a/api/core/rag/datasource/vdb/relyt/relyt_vector.py b/api/core/rag/datasource/vdb/relyt/relyt_vector.py index 2e0bd6f303..63ad0682d7 100644 --- a/api/core/rag/datasource/vdb/relyt/relyt_vector.py +++ b/api/core/rag/datasource/vdb/relyt/relyt_vector.py @@ -105,7 +105,7 @@ class RelytVector(BaseVector): redis_client.set(collection_exist_cache_key, 1, ex=3600) def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): - from pgvecto_rs.sqlalchemy import Vector + from pgvecto_rs.sqlalchemy import VECTOR ids = [str(uuid.uuid1()) for _ in documents] metadatas = [d.metadata for d in documents] @@ -118,7 +118,7 @@ class RelytVector(BaseVector): self._collection_name, Base.metadata, Column("id", TEXT, primary_key=True), - Column("embedding", Vector(len(embeddings[0]))), + Column("embedding", VECTOR(len(embeddings[0]))), Column("document", String, nullable=True), Column("metadata", JSON, nullable=True), extend_existing=True, @@ -169,7 +169,7 @@ class RelytVector(BaseVector): Args: ids: List of ids to delete. """ - from pgvecto_rs.sqlalchemy import Vector + from pgvecto_rs.sqlalchemy import VECTOR if ids is None: raise ValueError("No ids provided to delete.") @@ -179,7 +179,7 @@ class RelytVector(BaseVector): self._collection_name, Base.metadata, Column("id", TEXT, primary_key=True), - Column("embedding", Vector(self.embedding_dimension)), + Column("embedding", VECTOR(self.embedding_dimension)), Column("document", String, nullable=True), Column("metadata", JSON, nullable=True), extend_existing=True, diff --git a/api/core/rag/rerank/entity/weight.py b/api/core/rag/rerank/entity/weight.py index 36afc89a21..6dbbad2f8d 100644 --- a/api/core/rag/rerank/entity/weight.py +++ b/api/core/rag/rerank/entity/weight.py @@ -16,8 +16,6 @@ class KeywordSetting(BaseModel): class Weights(BaseModel): """Model for weighted rerank.""" - weight_type: str - vector_setting: VectorSetting keyword_setting: KeywordSetting diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index d51ea2942a..a69fcffbb4 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -138,6 +138,7 @@ class DatasetRetrieval: retrieve_config.rerank_mode, retrieve_config.reranking_model, retrieve_config.weights, + retrieve_config.reranking_enabled, message_id, ) @@ -606,7 +607,7 @@ class DatasetRetrieval: top_k: int, score_threshold: float) -> list[Document]: filter_documents = [] for document in all_documents: - if document.metadata['score'] >= score_threshold: + if score_threshold and document.metadata['score'] >= score_threshold: filter_documents.append(document) if not filter_documents: return [] diff --git a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.py b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.py index ed873cdcf6..bca53f6b4b 100644 --- a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.py +++ b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_img.py @@ -2,6 +2,7 @@ from typing import Any from duckduckgo_search import DDGS +from core.file.file_obj import FileTransferMethod from core.tools.entities.tool_entities import ToolInvokeMessage from core.tools.tool.builtin_tool import BuiltinTool @@ -21,6 +22,7 @@ class DuckDuckGoImageSearchTool(BuiltinTool): response = DDGS().images(**query_dict) result = [] for res in response: + res['transfer_method'] = FileTransferMethod.REMOTE_URL msg = ToolInvokeMessage(type=ToolInvokeMessage.MessageType.IMAGE_LINK, message=res.get('image'), save_as='', diff --git a/api/core/workflow/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py index 5758b895f3..7cf392277c 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -33,7 +33,6 @@ class WeightedScoreConfig(BaseModel): """ Weighted score Config. """ - weight_type: str vector_setting: VectorSetting keyword_setting: KeywordSetting @@ -49,7 +48,6 @@ class MultipleRetrievalConfig(BaseModel): reranking_model: Optional[RerankingModelConfig] = None weights: Optional[WeightedScoreConfig] = None - class ModelConfig(BaseModel): """ Model Config. diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 7a2b3c8512..01bf6e16e6 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -147,7 +147,6 @@ class KnowledgeRetrievalNode(BaseNode): elif node_data.multiple_retrieval_config.reranking_mode == 'weighted_score': reranking_model = None weights = { - 'weight_type': node_data.multiple_retrieval_config.weights.weight_type, 'vector_setting': { "vector_weight": node_data.multiple_retrieval_config.weights.vector_setting.vector_weight, "embedding_provider_name": node_data.multiple_retrieval_config.weights.vector_setting.embedding_provider_name, diff --git a/api/core/workflow/nodes/tool/tool_node.py b/api/core/workflow/nodes/tool/tool_node.py index c03a17468a..969b1c241e 100644 --- a/api/core/workflow/nodes/tool/tool_node.py +++ b/api/core/workflow/nodes/tool/tool_node.py @@ -174,13 +174,14 @@ class ToolNode(BaseNode): ext = path.splitext(url)[1] mimetype = response.meta.get('mime_type', 'image/jpeg') filename = response.save_as or url.split('/')[-1] + transfer_method = response.meta.get('transfer_method', FileTransferMethod.TOOL_FILE) # get tool file id tool_file_id = url.split('/')[-1].split('.')[0] result.append(FileVar( tenant_id=self.tenant_id, type=FileType.IMAGE, - transfer_method=FileTransferMethod.TOOL_FILE, + transfer_method=transfer_method, url=url, related_id=tool_file_id, filename=filename, diff --git a/api/extensions/ext_database.py b/api/extensions/ext_database.py index 9121c6ead9..c248e173a2 100644 --- a/api/extensions/ext_database.py +++ b/api/extensions/ext_database.py @@ -1,6 +1,16 @@ from flask_sqlalchemy import SQLAlchemy +from sqlalchemy import MetaData -db = SQLAlchemy() +POSTGRES_INDEXES_NAMING_CONVENTION = { + 'ix': '%(column_0_label)s_idx', + 'uq': '%(table_name)s_%(column_0_name)s_key', + 'ck': '%(table_name)s_%(constraint_name)s_check', + 'fk': '%(table_name)s_%(column_0_name)s_fkey', + 'pk': '%(table_name)s_pkey', +} + +metadata = MetaData(naming_convention=POSTGRES_INDEXES_NAMING_CONVENTION) +db = SQLAlchemy(metadata=metadata) def init_app(app): diff --git a/api/fields/dataset_fields.py b/api/fields/dataset_fields.py index 120b66a92d..a9f79b5c67 100644 --- a/api/fields/dataset_fields.py +++ b/api/fields/dataset_fields.py @@ -29,7 +29,6 @@ vector_setting_fields = { } weighted_score_fields = { - 'weight_type': fields.String, 'keyword_setting': fields.Nested(keyword_setting_fields), 'vector_setting': fields.Nested(vector_setting_fields), } diff --git a/api/migrations/alembic.ini b/api/migrations/alembic.ini index ec9d45c26a..aa21ecabcd 100644 --- a/api/migrations/alembic.ini +++ b/api/migrations/alembic.ini @@ -3,6 +3,7 @@ [alembic] # template used to generate migration files # file_template = %%(rev)s_%%(slug)s +file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s # set to 'true' to run the environment during # the 'revision' command, regardless of autogenerate diff --git a/api/poetry.lock b/api/poetry.lock index abde108a7a..b30cc8ce68 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -5619,23 +5619,25 @@ files = [ [[package]] name = "pgvecto-rs" -version = "0.1.4" +version = "0.2.1" description = "Python binding for pgvecto.rs" optional = false -python-versions = ">=3.8" +python-versions = "<3.13,>=3.8" files = [ - {file = "pgvecto_rs-0.1.4-py3-none-any.whl", hash = "sha256:9b08a9e612f0cd65d1cc6e17a35b9bb5956187e0e3981bf6e997ff9e615c6116"}, - {file = "pgvecto_rs-0.1.4.tar.gz", hash = "sha256:078b96cff1f3d417169ad46cacef7fc4d644978bbd6725a5c24c0675de5030ab"}, + {file = "pgvecto_rs-0.2.1-py3-none-any.whl", hash = "sha256:b3ee2c465219469ad537b3efea2916477c6c576b3d6fd4298980d0733d12bb27"}, + {file = "pgvecto_rs-0.2.1.tar.gz", hash = "sha256:07046eaad2c4f75745f76de9ba483541909f1c595aced8d3434224a4f933daca"}, ] [package.dependencies] numpy = ">=1.23" +SQLAlchemy = {version = ">=2.0.23", optional = true, markers = "extra == \"sqlalchemy\""} toml = ">=0.10" [package.extras] +django = ["Django (>=4.2)"] psycopg3 = ["psycopg[binary] (>=3.1.12)"] sdk = ["openai (>=1.2.2)", "pgvecto_rs[sqlalchemy]"] -sqlalchemy = ["SQLAlchemy (>=2.0.23)", "pgvecto_rs[psycopg3]"] +sqlalchemy = ["SQLAlchemy (>=2.0.23)"] [[package]] name = "pgvector" @@ -6131,10 +6133,7 @@ files = [ [package.dependencies] annotated-types = ">=0.4.0" pydantic-core = "2.20.1" -typing-extensions = [ - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, -] +typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] email = ["email-validator (>=2.0.0)"] @@ -9098,13 +9097,13 @@ h11 = ">=0.9.0,<1" [[package]] name = "xinference-client" -version = "0.9.4" +version = "0.13.3" description = "Client for Xinference" optional = false python-versions = "*" files = [ - {file = "xinference-client-0.9.4.tar.gz", hash = "sha256:21934bc9f3142ade66aaed33c2b6cf244c274d5b4b3163f9981bebdddacf205f"}, - {file = "xinference_client-0.9.4-py3-none-any.whl", hash = "sha256:6d3f1df3537a011f0afee5f9c9ca4f3ff564ca32cc999cf7038b324c0b907d0c"}, + {file = "xinference-client-0.13.3.tar.gz", hash = "sha256:822b722100affdff049c27760be7d62ac92de58c87a40d3361066df446ba648f"}, + {file = "xinference_client-0.13.3-py3-none-any.whl", hash = "sha256:f0eff3858b1ebcef2129726f82b09259c177e11db466a7ca23def3d4849c419f"}, ] [package.dependencies] @@ -9501,5 +9500,5 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" -python-versions = "^3.10" -content-hash = "a8b61d74d9322302b7447b6f8728ad606abc160202a8a122a05a8ef3cec7055b" +python-versions = ">=3.10,<3.13" +content-hash = "50acbb78f2a273dfa8683d9d292596e89d13a420c6ecb1afad331f2c38dd1423" diff --git a/api/pyproject.toml b/api/pyproject.toml index 25778f323d..c2c1d56403 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -154,7 +154,7 @@ pydantic_extra_types = "~2.9.0" pydub = "~0.25.1" pyjwt = "~2.8.0" pypdfium2 = "~4.17.0" -python = "^3.10" +python = ">=3.10,<3.13" python-docx = "~1.1.0" python-dotenv = "1.0.0" pyyaml = "~6.0.1" @@ -173,7 +173,7 @@ transformers = "~4.35.0" unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] } websocket-client = "~1.7.0" werkzeug = "~3.0.1" -xinference-client = "0.9.4" +xinference-client = "0.13.3" yarl = "~1.9.4" zhipuai = "1.0.7" rank-bm25 = "~0.2.2" @@ -204,7 +204,7 @@ cloudscraper = "1.2.71" [tool.poetry.group.vdb.dependencies] chromadb = "0.5.1" oracledb = "~2.2.1" -pgvecto-rs = "0.1.4" +pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] } pgvector = "0.2.5" pymilvus = "~2.4.4" pymysql = "1.1.1" diff --git a/api/tests/integration_tests/model_runtime/__mock/xinference.py b/api/tests/integration_tests/model_runtime/__mock/xinference.py index ddb18fe919..7cb0a1318e 100644 --- a/api/tests/integration_tests/model_runtime/__mock/xinference.py +++ b/api/tests/integration_tests/model_runtime/__mock/xinference.py @@ -106,7 +106,7 @@ class MockXinferenceClass: def _check_cluster_authenticated(self): self._cluster_authed = True - def rerank(self: RESTfulRerankModelHandle, documents: list[str], query: str, top_n: int) -> dict: + def rerank(self: RESTfulRerankModelHandle, documents: list[str], query: str, top_n: int, return_documents: bool) -> dict: # check if self._model_uid is a valid uuid if not re.match(r'[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}', self._model_uid) and \ self._model_uid != 'rerank': diff --git a/api/tests/unit_tests/services/workflow/test_workflow_converter.py b/api/tests/unit_tests/services/workflow/test_workflow_converter.py index 29d55df8c3..f589cd2097 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_converter.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_converter.py @@ -208,7 +208,8 @@ def test__convert_to_knowledge_retrieval_node_for_chatbot(): reranking_model={ 'reranking_provider_name': 'cohere', 'reranking_model_name': 'rerank-english-v2.0' - } + }, + reranking_enabled=True ) ) @@ -251,7 +252,8 @@ def test__convert_to_knowledge_retrieval_node_for_workflow_app(): reranking_model={ 'reranking_provider_name': 'cohere', 'reranking_model_name': 'rerank-english-v2.0' - } + }, + reranking_enabled=True ) ) diff --git a/docker/.env.example b/docker/.env.example index b4a55bbba5..c463bf1bec 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -655,6 +655,7 @@ SSRF_SANDBOX_HOST=sandbox # docker env var for specifying vector db type at startup # (based on the vector db type, the corresponding docker # compose profile will be used) +# if you want to use unstructured, add ',unstructured' to the end # ------------------------------ COMPOSE_PROFILES=${VECTOR_STORE:-weaviate} diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 4df015e1b8..1caf244fb1 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -418,7 +418,7 @@ services: # pgvecto-rs vector store pgvecto-rs: - image: tensorchord/pgvecto-rs:pg16-v0.2.0 + image: tensorchord/pgvecto-rs:pg16-v0.3.0 profiles: - pgvecto-rs restart: always @@ -583,6 +583,16 @@ services: ports: - "${MYSCALE_PORT:-8123}:${MYSCALE_PORT:-8123}" + # unstructured . + # (if used, you need to set ETL_TYPE to Unstructured in the api & worker service.) + unstructured: + image: downloads.unstructured.io/unstructured-io/unstructured-api:latest + profiles: + - unstructured + restart: always + volumes: + - ./volumes/unstructured:/app/data + networks: # create a network between sandbox, api and ssrf_proxy, and can not access outside. ssrf_proxy_network: diff --git a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx index 683617bf25..3656bf6ea7 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/config-content.tsx @@ -4,6 +4,7 @@ import { memo, useMemo } from 'react' import type { FC } from 'react' import { useTranslation } from 'react-i18next' import { + RiAlertFill, RiQuestionLine, } from '@remixicon/react' import WeightedScore from './weighted-score' @@ -26,7 +27,6 @@ import TooltipPlus from '@/app/components/base/tooltip-plus' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import type { DataSet, - WeightedScoreEnum, } from '@/models/datasets' import { RerankingModeEnum } from '@/models/datasets' import cn from '@/utils/classnames' @@ -112,12 +112,11 @@ const ConfigContent: FC = ({ }) } - const handleWeightedScoreChange = (value: { type: WeightedScoreEnum; value: number[] }) => { + const handleWeightedScoreChange = (value: { value: number[] }) => { const configs = { ...datasetConfigs, weights: { ...datasetConfigs.weights!, - weight_type: value.type, vector_setting: { ...datasetConfigs.weights!.vector_setting!, vector_weight: value.value[0], @@ -178,14 +177,6 @@ const ConfigContent: FC = ({ popupContent={(
{t('dataset.nTo1RetrievalLegacy')} - - ({t('dataset.nTo1RetrievalLegacyLink')}) -
)} > @@ -196,6 +187,22 @@ const ConfigContent: FC = ({ description={t('appDebug.datasetConfig.retrieveOneWay.description')} isChosen={type === RETRIEVE_TYPE.oneWay} onChosen={() => { setType(RETRIEVE_TYPE.oneWay) }} + extra={( +
+ +
+ {t('dataset.nTo1RetrievalLegacyLinkText')} + + {t('dataset.nTo1RetrievalLegacyLink')} + +
+
+ )} /> } @@ -302,7 +309,6 @@ const ConfigContent: FC = ({
{ } type Value = { - type: WeightedScoreEnum value: number[] } @@ -30,78 +26,31 @@ const WeightedScore = ({ onChange = () => {}, }: WeightedScoreProps) => { const { t } = useTranslation() - const options = [ - { - value: WeightedScoreEnum.SemanticFirst, - label: t('dataset.weightedScore.semanticFirst'), - }, - { - value: WeightedScoreEnum.KeywordFirst, - label: t('dataset.weightedScore.keywordFirst'), - }, - { - value: WeightedScoreEnum.Customized, - label: t('dataset.weightedScore.customized'), - }, - ] - - const disabled = value.type !== WeightedScoreEnum.Customized - - const handleTypeChange = useCallback((type: WeightedScoreEnum) => { - const result = { ...value, type } - - if (type === WeightedScoreEnum.SemanticFirst) - result.value = [DEFAULT_WEIGHTED_SCORE.semanticFirst.semantic, DEFAULT_WEIGHTED_SCORE.semanticFirst.keyword] - - if (type === WeightedScoreEnum.KeywordFirst) - result.value = [DEFAULT_WEIGHTED_SCORE.keywordFirst.semantic, DEFAULT_WEIGHTED_SCORE.keywordFirst.keyword] - - onChange(result) - }, [value, onChange]) return (
-
- { - options.map(option => ( -
handleTypeChange(option.value)} - > -
-
{option.label}
-
- )) - } -
-
-
-
- {t('dataset.weightedScore.semantic')} -
- {formatNumber(value.value[0])} -
+
onChange({ type: value.type, value: [v, (10 - v * 10) / 10] })} - disabled={disabled} - thumbClassName={cn(disabled && '!cursor-not-allowed')} - trackClassName='!bg-transparent' + onChange={v => onChange({ value: [v, (10 - v * 10) / 10] })} + trackClassName='weightedScoreSliderTrack' /> -
- {formatNumber(value.value[1])} -
- {t('dataset.weightedScore.keyword')} +
+
+
+ {t('dataset.weightedScore.semantic')} +
+ {formatNumber(value.value[0])} +
+
+ {formatNumber(value.value[1])} +
+ {t('dataset.weightedScore.keyword')} +
diff --git a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx index 9dcd4cc7ab..8eda66c52a 100644 --- a/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx +++ b/web/app/components/base/chat/chat-with-history/chat-wrapper.tsx @@ -30,6 +30,7 @@ const ChatWrapper = () => { handleFeedback, currentChatInstanceRef, appData, + themeBuilder, } = useChatWithHistoryContext() const appConfig = useMemo(() => { const config = appParams || {} @@ -143,6 +144,7 @@ const ChatWrapper = () => { onFeedback={handleFeedback} suggestedQuestions={suggestedQuestions} hideProcessDetail + themeBuilder={themeBuilder} /> ) } diff --git a/web/app/components/base/chat/chat-with-history/context.tsx b/web/app/components/base/chat/chat-with-history/context.tsx index 7808b73b12..e3c144bcd2 100644 --- a/web/app/components/base/chat/chat-with-history/context.tsx +++ b/web/app/components/base/chat/chat-with-history/context.tsx @@ -8,6 +8,7 @@ import type { ChatItem, Feedback, } from '../types' +import type { ThemeBuilder } from '../embedded-chatbot/theme/theme-context' import type { AppConversationData, AppData, @@ -46,6 +47,7 @@ export type ChatWithHistoryContextValue = { appId?: string handleFeedback: (messageId: string, feedback: Feedback) => void currentChatInstanceRef: RefObject<{ handleStop: () => void }> + themeBuilder?: ThemeBuilder } export const ChatWithHistoryContext = createContext({ diff --git a/web/app/components/base/chat/chat-with-history/index.tsx b/web/app/components/base/chat/chat-with-history/index.tsx index b02091231d..5910a7f949 100644 --- a/web/app/components/base/chat/chat-with-history/index.tsx +++ b/web/app/components/base/chat/chat-with-history/index.tsx @@ -4,6 +4,7 @@ import { useState, } from 'react' import { useAsyncEffect } from 'ahooks' +import { useThemeContext } from '../embedded-chatbot/theme/theme-context' import { ChatWithHistoryContext, useChatWithHistoryContext, @@ -34,6 +35,7 @@ const ChatWithHistory: FC = ({ appChatListDataLoading, chatShouldReloadKey, isMobile, + themeBuilder, } = useChatWithHistoryContext() const chatReady = (!showConfigPanelBeforeChat || !!appPrevChatList.length) @@ -41,13 +43,14 @@ const ChatWithHistory: FC = ({ const site = appData?.site useEffect(() => { + themeBuilder?.buildTheme(site?.chat_color_theme, site?.chat_color_theme_inverted) if (site) { if (customConfig) document.title = `${site.title}` else document.title = `${site.title} - Powered by Dify` } - }, [site, customConfig]) + }, [site, customConfig, themeBuilder]) if (appInfoLoading) { return ( @@ -106,6 +109,7 @@ const ChatWithHistoryWrap: FC = ({ }) => { const media = useBreakpoints() const isMobile = media === MediaType.mobile + const themeBuilder = useThemeContext() const { appInfoError, @@ -171,6 +175,7 @@ const ChatWithHistoryWrap: FC = ({ appId, handleFeedback, currentChatInstanceRef, + themeBuilder, }}> diff --git a/web/app/components/base/chat/embedded-chatbot/index.tsx b/web/app/components/base/chat/embedded-chatbot/index.tsx index 6d144f3f3b..d34fe164d1 100644 --- a/web/app/components/base/chat/embedded-chatbot/index.tsx +++ b/web/app/components/base/chat/embedded-chatbot/index.tsx @@ -51,7 +51,7 @@ const Chatbot = () => { else document.title = `${site.title} - Powered by Dify` } - }, [site, customConfig]) + }, [site, customConfig, themeBuilder]) if (appInfoLoading) { return ( diff --git a/web/app/components/base/radio-card/simple/index.tsx b/web/app/components/base/radio-card/simple/index.tsx index 6893b4c2e2..926fc02523 100644 --- a/web/app/components/base/radio-card/simple/index.tsx +++ b/web/app/components/base/radio-card/simple/index.tsx @@ -12,6 +12,7 @@ type Props = { onChosen: () => void chosenConfig?: React.ReactNode icon?: JSX.Element + extra?: React.ReactNode } const RadioCard: FC = ({ @@ -20,20 +21,24 @@ const RadioCard: FC = ({ isChosen, onChosen, icon, + extra, }) => { return (
- {icon} -
-
-
{title}
-
+
+ {icon} +
+
+
{title}
+
+
+
{description}
-
{description}
+ {extra}
) } diff --git a/web/app/components/base/radio-card/simple/style.module.css b/web/app/components/base/radio-card/simple/style.module.css index 7b9871cc17..58a87086bc 100644 --- a/web/app/components/base/radio-card/simple/style.module.css +++ b/web/app/components/base/radio-card/simple/style.module.css @@ -1,5 +1,5 @@ .item { - @apply relative p-4 rounded-xl border border-gray-100 cursor-pointer; + @apply relative rounded-xl border border-gray-100 cursor-pointer; background-color: #fcfcfd; } diff --git a/web/app/components/datasets/common/retrieval-param-config/index.tsx b/web/app/components/datasets/common/retrieval-param-config/index.tsx index b52e3a60b2..98676f2e83 100644 --- a/web/app/components/datasets/common/retrieval-param-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-param-config/index.tsx @@ -16,6 +16,7 @@ import ModelSelector from '@/app/components/header/account-setting/model-provide import { useModelListAndDefaultModel } from '@/app/components/header/account-setting/model-provider-page/hooks' import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import { + DEFAULT_WEIGHTED_SCORE, RerankingModeEnum, WeightedScoreEnum, } from '@/models/datasets' @@ -69,12 +70,12 @@ const RetrievalParamConfig: FC = ({ result.weights = { weight_type: WeightedScoreEnum.Customized, vector_setting: { - vector_weight: 0.5, + vector_weight: DEFAULT_WEIGHTED_SCORE.other.semantic, embedding_provider_name: '', embedding_model_name: '', }, keyword_setting: { - keyword_weight: 0.5, + keyword_weight: DEFAULT_WEIGHTED_SCORE.other.keyword, }, } } @@ -202,7 +203,6 @@ const RetrievalParamConfig: FC = ({ value.reranking_mode === RerankingModeEnum.WeightedScore && ( = ({ ...value, weights: { ...value.weights!, - weight_type: v.type, vector_setting: { ...value.weights!.vector_setting, vector_weight: v.value[0], diff --git a/web/app/components/datasets/settings/form/index.tsx b/web/app/components/datasets/settings/form/index.tsx index 7b007f8dd0..f7519248e6 100644 --- a/web/app/components/datasets/settings/form/index.tsx +++ b/web/app/components/datasets/settings/form/index.tsx @@ -264,20 +264,18 @@ const Form = () => { )}
- {currentDataset?.embedding_available && ( -
-
-
- -
+
+
+
+
- )} +
) } diff --git a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx index a206290408..57ea4bdd11 100644 --- a/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx +++ b/web/app/components/header/account-setting/model-provider-page/model-parameter-modal/parameter-item.tsx @@ -56,7 +56,7 @@ const ParameterItem: FC = ({ const handleInputChange = (newValue: ParameterValue) => { setLocalValue(newValue) - if (onChange && (parameterRule.name === 'stop' || !isNullOrUndefined(value))) + if (onChange && (parameterRule.name === 'stop' || !isNullOrUndefined(value) || parameterRule.required)) onChange(newValue) } diff --git a/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx b/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx index 3be088a502..b2dfe4bfe4 100644 --- a/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx +++ b/web/app/components/header/account-setting/model-provider-page/system-model-selector/index.tsx @@ -249,6 +249,7 @@ const SystemModel: FC = ({ {t('common.operation.cancel')}