mirror of
https://git.mirrors.martin98.com/https://github.com/langgenius/dify.git
synced 2025-05-25 23:58:15 +08:00

Signed-off-by: yihong0618 <zouzou0208@gmail.com> Signed-off-by: -LAN- <laipz8200@outlook.com> Signed-off-by: xhe <xw897002528@gmail.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: takatost <takatost@gmail.com> Co-authored-by: kurokobo <kuro664@gmail.com> Co-authored-by: Novice Lee <novicelee@NoviPro.local> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: AkaraChen <akarachen@outlook.com> Co-authored-by: Yi <yxiaoisme@gmail.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: Hiroshi Fujita <fujita-h@users.noreply.github.com> Co-authored-by: AkaraChen <85140972+AkaraChen@users.noreply.github.com> Co-authored-by: NFish <douxc512@gmail.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Novice <857526207@qq.com> Co-authored-by: Hiroki Nagai <82458324+nagaihiroki-git@users.noreply.github.com> Co-authored-by: Gen Sato <52241300+halogen22@users.noreply.github.com> Co-authored-by: eux <euxuuu@gmail.com> Co-authored-by: huangzhuo1949 <167434202+huangzhuo1949@users.noreply.github.com> Co-authored-by: huangzhuo <huangzhuo1@xiaomi.com> Co-authored-by: lotsik <lotsik@mail.ru> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: gakkiyomi <gakkiyomi@aliyun.com> Co-authored-by: CN-P5 <heibai2006@gmail.com> Co-authored-by: CN-P5 <heibai2006@qq.com> Co-authored-by: Chuehnone <1897025+chuehnone@users.noreply.github.com> Co-authored-by: yihong <zouzou0208@gmail.com> Co-authored-by: Kevin9703 <51311316+Kevin9703@users.noreply.github.com> Co-authored-by: -LAN- <laipz8200@outlook.com> Co-authored-by: Boris Feld <lothiraldan@gmail.com> Co-authored-by: mbo <himabo@gmail.com> Co-authored-by: mabo <mabo@aeyes.ai> Co-authored-by: Warren Chen <warren.chen830@gmail.com> Co-authored-by: JzoNgKVO <27049666+JzoNgKVO@users.noreply.github.com> Co-authored-by: jiandanfeng <chenjh3@wangsu.com> Co-authored-by: zhu-an <70234959+xhdd123321@users.noreply.github.com> Co-authored-by: zhaoqingyu.1075 <zhaoqingyu.1075@bytedance.com> Co-authored-by: 海狸大師 <86974027+yenslife@users.noreply.github.com> Co-authored-by: Xu Song <xusong.vip@gmail.com> Co-authored-by: rayshaw001 <396301947@163.com> Co-authored-by: Ding Jiatong <dingjiatong@gmail.com> Co-authored-by: Bowen Liang <liangbowen@gf.com.cn> Co-authored-by: JasonVV <jasonwangiii@outlook.com> Co-authored-by: le0zh <newlight@qq.com> Co-authored-by: zhuxinliang <zhuxinliang@didiglobal.com> Co-authored-by: k-zaku <zaku99@outlook.jp> Co-authored-by: luckylhb90 <luckylhb90@gmail.com> Co-authored-by: hobo.l <hobo.l@binance.com> Co-authored-by: jiangbo721 <365065261@qq.com> Co-authored-by: 刘江波 <jiangbo721@163.com> Co-authored-by: Shun Miyazawa <34241526+miya@users.noreply.github.com> Co-authored-by: EricPan <30651140+Egfly@users.noreply.github.com> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: sino <sino2322@gmail.com> Co-authored-by: Jhvcc <37662342+Jhvcc@users.noreply.github.com> Co-authored-by: lowell <lowell.hu@zkteco.in> Co-authored-by: Boris Polonsky <BorisPolonsky@users.noreply.github.com> Co-authored-by: Ademílson Tonato <ademilsonft@outlook.com> Co-authored-by: Ademílson Tonato <ademilson.tonato@refurbed.com> Co-authored-by: IWAI, Masaharu <iwaim.sub@gmail.com> Co-authored-by: Yueh-Po Peng (Yabi) <94939112+y10ab1@users.noreply.github.com> Co-authored-by: Jason <ggbbddjm@gmail.com> Co-authored-by: Xin Zhang <sjhpzx@gmail.com> Co-authored-by: yjc980121 <3898524+yjc980121@users.noreply.github.com> Co-authored-by: heyszt <36215648+hieheihei@users.noreply.github.com> Co-authored-by: Abdullah AlOsaimi <osaimiacc@gmail.com> Co-authored-by: Abdullah AlOsaimi <189027247+osaimi@users.noreply.github.com> Co-authored-by: Yingchun Lai <laiyingchun@apache.org> Co-authored-by: Hash Brown <hi@xzd.me> Co-authored-by: zuodongxu <192560071+zuodongxu@users.noreply.github.com> Co-authored-by: Masashi Tomooka <tmokmss@users.noreply.github.com> Co-authored-by: aplio <ryo.091219@gmail.com> Co-authored-by: Obada Khalili <54270856+obadakhalili@users.noreply.github.com> Co-authored-by: Nam Vu <zuzoovn@gmail.com> Co-authored-by: Kei YAMAZAKI <1715090+kei-yamazaki@users.noreply.github.com> Co-authored-by: TechnoHouse <13776377+deephbz@users.noreply.github.com> Co-authored-by: Riddhimaan-Senapati <114703025+Riddhimaan-Senapati@users.noreply.github.com> Co-authored-by: MaFee921 <31881301+2284730142@users.noreply.github.com> Co-authored-by: te-chan <t-nakanome@sakura-is.co.jp> Co-authored-by: HQidea <HQidea@users.noreply.github.com> Co-authored-by: Joshbly <36315710+Joshbly@users.noreply.github.com> Co-authored-by: xhe <xw897002528@gmail.com> Co-authored-by: weiwenyan-dev <154779315+weiwenyan-dev@users.noreply.github.com> Co-authored-by: ex_wenyan.wei <ex_wenyan.wei@tcl.com> Co-authored-by: engchina <12236799+engchina@users.noreply.github.com> Co-authored-by: engchina <atjapan2015@gmail.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: 呆萌闷油瓶 <253605712@qq.com> Co-authored-by: Kemal <kemalmeler@outlook.com> Co-authored-by: Lazy_Frog <4590648+lazyFrogLOL@users.noreply.github.com> Co-authored-by: Yi Xiao <54782454+YIXIAO0@users.noreply.github.com> Co-authored-by: Steven sun <98230804+Tuyohai@users.noreply.github.com> Co-authored-by: steven <sunzwj@digitalchina.com> Co-authored-by: Kalo Chin <91766386+fdb02983rhy@users.noreply.github.com> Co-authored-by: Katy Tao <34019945+KatyTao@users.noreply.github.com> Co-authored-by: depy <42985524+h4ckdepy@users.noreply.github.com> Co-authored-by: 胡春东 <gycm520@gmail.com> Co-authored-by: Junjie.M <118170653@qq.com> Co-authored-by: MuYu <mr.muzea@gmail.com> Co-authored-by: Naoki Takashima <39912547+takatea@users.noreply.github.com> Co-authored-by: Summer-Gu <37869445+gubinjie@users.noreply.github.com> Co-authored-by: Fei He <droxer.he@gmail.com> Co-authored-by: ybalbert001 <120714773+ybalbert001@users.noreply.github.com> Co-authored-by: Yuanbo Li <ybalbert@amazon.com> Co-authored-by: douxc <7553076+douxc@users.noreply.github.com> Co-authored-by: liuzhenghua <1090179900@qq.com> Co-authored-by: Wu Jiayang <62842862+Wu-Jiayang@users.noreply.github.com> Co-authored-by: Your Name <you@example.com> Co-authored-by: kimjion <45935338+kimjion@users.noreply.github.com> Co-authored-by: AugNSo <song.tiankai@icloud.com> Co-authored-by: llinvokerl <38915183+llinvokerl@users.noreply.github.com> Co-authored-by: liusurong.lsr <liusurong.lsr@alibaba-inc.com> Co-authored-by: Vasu Negi <vasu-negi@users.noreply.github.com> Co-authored-by: Hundredwz <1808096180@qq.com> Co-authored-by: Xiyuan Chen <52963600+GareArc@users.noreply.github.com>
243 lines
7.0 KiB
TypeScript
243 lines
7.0 KiB
TypeScript
import {
|
|
uniq,
|
|
xorBy,
|
|
} from 'lodash-es'
|
|
import type { MultipleRetrievalConfig } from './types'
|
|
import type {
|
|
DataSet,
|
|
SelectedDatasetsMode,
|
|
} from '@/models/datasets'
|
|
import {
|
|
DEFAULT_WEIGHTED_SCORE,
|
|
RerankingModeEnum,
|
|
} from '@/models/datasets'
|
|
import { RETRIEVE_METHOD } from '@/types/app'
|
|
import { DATASET_DEFAULT } from '@/config'
|
|
|
|
export const checkNodeValid = () => {
|
|
return true
|
|
}
|
|
|
|
export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => {
|
|
if (datasets === null)
|
|
datasets = []
|
|
let allHighQuality = true
|
|
let allHighQualityVectorSearch = true
|
|
let allHighQualityFullTextSearch = true
|
|
let allEconomic = true
|
|
let mixtureHighQualityAndEconomic = true
|
|
let allExternal = true
|
|
let allInternal = true
|
|
let mixtureInternalAndExternal = true
|
|
let inconsistentEmbeddingModel = false
|
|
if (!datasets.length) {
|
|
allHighQuality = false
|
|
allHighQualityVectorSearch = false
|
|
allHighQualityFullTextSearch = false
|
|
allEconomic = false
|
|
mixtureHighQualityAndEconomic = false
|
|
inconsistentEmbeddingModel = false
|
|
allExternal = false
|
|
allInternal = false
|
|
mixtureInternalAndExternal = false
|
|
}
|
|
datasets.forEach((dataset) => {
|
|
if (dataset.indexing_technique === 'economy') {
|
|
allHighQuality = false
|
|
allHighQualityVectorSearch = false
|
|
allHighQualityFullTextSearch = false
|
|
}
|
|
if (dataset.indexing_technique === 'high_quality') {
|
|
allEconomic = false
|
|
|
|
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
|
|
allHighQualityVectorSearch = false
|
|
|
|
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
|
|
allHighQualityFullTextSearch = false
|
|
}
|
|
if (dataset.provider !== 'external') {
|
|
allExternal = false
|
|
}
|
|
else {
|
|
allInternal = false
|
|
allHighQuality = false
|
|
allHighQualityVectorSearch = false
|
|
allHighQualityFullTextSearch = false
|
|
mixtureHighQualityAndEconomic = false
|
|
}
|
|
})
|
|
|
|
if (allExternal || allInternal)
|
|
mixtureInternalAndExternal = false
|
|
|
|
if (allHighQuality || allEconomic)
|
|
mixtureHighQualityAndEconomic = false
|
|
|
|
if (allHighQuality)
|
|
inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
|
|
|
|
return {
|
|
allHighQuality,
|
|
allHighQualityVectorSearch,
|
|
allHighQualityFullTextSearch,
|
|
allEconomic,
|
|
mixtureHighQualityAndEconomic,
|
|
allInternal,
|
|
allExternal,
|
|
mixtureInternalAndExternal,
|
|
inconsistentEmbeddingModel,
|
|
} as SelectedDatasetsMode
|
|
}
|
|
|
|
export const getMultipleRetrievalConfig = (
|
|
multipleRetrievalConfig: MultipleRetrievalConfig,
|
|
selectedDatasets: DataSet[],
|
|
originalDatasets: DataSet[],
|
|
validRerankModel?: { provider?: string; model?: string },
|
|
) => {
|
|
const shouldSetWeightDefaultValue = xorBy(selectedDatasets, originalDatasets, 'id').length > 0
|
|
const rerankModelIsValid = validRerankModel?.provider && validRerankModel?.model
|
|
|
|
const {
|
|
allHighQuality,
|
|
allHighQualityVectorSearch,
|
|
allHighQualityFullTextSearch,
|
|
allEconomic,
|
|
mixtureHighQualityAndEconomic,
|
|
allInternal,
|
|
allExternal,
|
|
mixtureInternalAndExternal,
|
|
inconsistentEmbeddingModel,
|
|
} = getSelectedDatasetsMode(selectedDatasets)
|
|
|
|
const {
|
|
top_k = DATASET_DEFAULT.top_k,
|
|
score_threshold,
|
|
reranking_mode,
|
|
reranking_model,
|
|
weights,
|
|
reranking_enable,
|
|
} = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
|
|
|
|
const result = {
|
|
top_k,
|
|
score_threshold,
|
|
reranking_mode,
|
|
reranking_model,
|
|
weights,
|
|
reranking_enable: ((allInternal && allEconomic) || allExternal) ? reranking_enable : shouldSetWeightDefaultValue,
|
|
}
|
|
|
|
const setDefaultWeights = () => {
|
|
result.weights = {
|
|
vector_setting: {
|
|
vector_weight: allHighQualityVectorSearch
|
|
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
|
|
: allHighQualityFullTextSearch
|
|
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
|
|
: DEFAULT_WEIGHTED_SCORE.other.semantic,
|
|
embedding_provider_name: selectedDatasets[0].embedding_model_provider,
|
|
embedding_model_name: selectedDatasets[0].embedding_model,
|
|
},
|
|
keyword_setting: {
|
|
keyword_weight: allHighQualityVectorSearch
|
|
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
|
|
: allHighQualityFullTextSearch
|
|
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
|
|
: DEFAULT_WEIGHTED_SCORE.other.keyword,
|
|
},
|
|
}
|
|
}
|
|
|
|
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal) {
|
|
result.reranking_mode = RerankingModeEnum.RerankingModel
|
|
if (!result.reranking_model?.provider || !result.reranking_model?.model) {
|
|
if (rerankModelIsValid) {
|
|
result.reranking_enable = true
|
|
result.reranking_model = {
|
|
provider: validRerankModel?.provider || '',
|
|
model: validRerankModel?.model || '',
|
|
}
|
|
}
|
|
else {
|
|
result.reranking_model = {
|
|
provider: '',
|
|
model: '',
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
result.reranking_enable = true
|
|
}
|
|
}
|
|
|
|
if (allHighQuality && !inconsistentEmbeddingModel && allInternal) {
|
|
if (!reranking_mode) {
|
|
if (validRerankModel?.provider && validRerankModel?.model) {
|
|
result.reranking_mode = RerankingModeEnum.RerankingModel
|
|
result.reranking_enable = true
|
|
result.reranking_model = {
|
|
provider: validRerankModel.provider,
|
|
model: validRerankModel.model,
|
|
}
|
|
}
|
|
else {
|
|
result.reranking_mode = RerankingModeEnum.WeightedScore
|
|
setDefaultWeights()
|
|
}
|
|
}
|
|
|
|
if (reranking_mode === RerankingModeEnum.WeightedScore && !weights)
|
|
setDefaultWeights()
|
|
|
|
if (reranking_mode === RerankingModeEnum.WeightedScore && weights && shouldSetWeightDefaultValue) {
|
|
if (rerankModelIsValid) {
|
|
result.reranking_mode = RerankingModeEnum.RerankingModel
|
|
result.reranking_enable = true
|
|
result.reranking_model = {
|
|
provider: validRerankModel.provider || '',
|
|
model: validRerankModel.model || '',
|
|
}
|
|
}
|
|
else {
|
|
setDefaultWeights()
|
|
}
|
|
}
|
|
if (reranking_mode === RerankingModeEnum.RerankingModel && !rerankModelIsValid && shouldSetWeightDefaultValue) {
|
|
result.reranking_mode = RerankingModeEnum.WeightedScore
|
|
setDefaultWeights()
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
export const checkoutRerankModelConfigedInRetrievalSettings = (
|
|
datasets: DataSet[],
|
|
multipleRetrievalConfig?: MultipleRetrievalConfig,
|
|
) => {
|
|
if (!multipleRetrievalConfig)
|
|
return true
|
|
|
|
const {
|
|
allEconomic,
|
|
allExternal,
|
|
} = getSelectedDatasetsMode(datasets)
|
|
|
|
const {
|
|
reranking_enable,
|
|
reranking_mode,
|
|
reranking_model,
|
|
} = multipleRetrievalConfig
|
|
|
|
if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model)) {
|
|
if ((allEconomic || allExternal) && !reranking_enable)
|
|
return true
|
|
|
|
return false
|
|
}
|
|
|
|
return true
|
|
}
|