dify
This commit is contained in:
@@ -0,0 +1,288 @@
|
||||
import {
|
||||
uniq,
|
||||
xorBy,
|
||||
} from 'lodash-es'
|
||||
import type { MultipleRetrievalConfig } from './types'
|
||||
import type {
|
||||
DataSet,
|
||||
SelectedDatasetsMode,
|
||||
} from '@/models/datasets'
|
||||
import {
|
||||
DEFAULT_WEIGHTED_SCORE,
|
||||
RerankingModeEnum,
|
||||
WeightedScoreEnum,
|
||||
} from '@/models/datasets'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
import { DATASET_DEFAULT } from '@/config'
|
||||
|
||||
export const checkNodeValid = () => {
|
||||
return true
|
||||
}
|
||||
|
||||
export const getSelectedDatasetsMode = (datasets: DataSet[] = []) => {
|
||||
if (datasets === null)
|
||||
datasets = []
|
||||
let allHighQuality = true
|
||||
let allHighQualityVectorSearch = true
|
||||
let allHighQualityFullTextSearch = true
|
||||
let allEconomic = true
|
||||
let mixtureHighQualityAndEconomic = true
|
||||
let allExternal = true
|
||||
let allInternal = true
|
||||
let mixtureInternalAndExternal = true
|
||||
let inconsistentEmbeddingModel = false
|
||||
if (!datasets.length) {
|
||||
allHighQuality = false
|
||||
allHighQualityVectorSearch = false
|
||||
allHighQualityFullTextSearch = false
|
||||
allEconomic = false
|
||||
mixtureHighQualityAndEconomic = false
|
||||
allExternal = false
|
||||
allInternal = false
|
||||
mixtureInternalAndExternal = false
|
||||
}
|
||||
datasets.forEach((dataset) => {
|
||||
if (dataset.indexing_technique === 'economy') {
|
||||
allHighQuality = false
|
||||
allHighQualityVectorSearch = false
|
||||
allHighQualityFullTextSearch = false
|
||||
}
|
||||
if (dataset.indexing_technique === 'high_quality') {
|
||||
allEconomic = false
|
||||
|
||||
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.semantic)
|
||||
allHighQualityVectorSearch = false
|
||||
|
||||
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
|
||||
allHighQualityFullTextSearch = false
|
||||
}
|
||||
if (dataset.provider !== 'external') {
|
||||
allExternal = false
|
||||
}
|
||||
else {
|
||||
allInternal = false
|
||||
allHighQuality = false
|
||||
allHighQualityVectorSearch = false
|
||||
allHighQualityFullTextSearch = false
|
||||
mixtureHighQualityAndEconomic = false
|
||||
}
|
||||
})
|
||||
|
||||
if (allExternal || allInternal)
|
||||
mixtureInternalAndExternal = false
|
||||
|
||||
if (allHighQuality || allEconomic)
|
||||
mixtureHighQualityAndEconomic = false
|
||||
|
||||
if (allHighQuality)
|
||||
inconsistentEmbeddingModel = uniq(datasets.map(item => item.embedding_model)).length > 1
|
||||
|
||||
return {
|
||||
allHighQuality,
|
||||
allHighQualityVectorSearch,
|
||||
allHighQualityFullTextSearch,
|
||||
allEconomic,
|
||||
mixtureHighQualityAndEconomic,
|
||||
allInternal,
|
||||
allExternal,
|
||||
mixtureInternalAndExternal,
|
||||
inconsistentEmbeddingModel,
|
||||
} as SelectedDatasetsMode
|
||||
}
|
||||
|
||||
export const getMultipleRetrievalConfig = (
|
||||
multipleRetrievalConfig: MultipleRetrievalConfig,
|
||||
selectedDatasets: DataSet[],
|
||||
originalDatasets: DataSet[],
|
||||
fallbackRerankModel?: { provider?: string; model?: string }, // fallback rerank model
|
||||
) => {
|
||||
// Check if the selected datasets are different from the original datasets
|
||||
const isDatasetsChanged = xorBy(selectedDatasets, originalDatasets, 'id').length > 0
|
||||
// Check if the rerank model is valid
|
||||
const isFallbackRerankModelValid = !!(fallbackRerankModel?.provider && fallbackRerankModel?.model)
|
||||
|
||||
const {
|
||||
allHighQuality,
|
||||
allHighQualityVectorSearch,
|
||||
allHighQualityFullTextSearch,
|
||||
allEconomic,
|
||||
mixtureHighQualityAndEconomic,
|
||||
allInternal,
|
||||
allExternal,
|
||||
mixtureInternalAndExternal,
|
||||
inconsistentEmbeddingModel,
|
||||
} = getSelectedDatasetsMode(selectedDatasets)
|
||||
|
||||
const {
|
||||
top_k = DATASET_DEFAULT.top_k,
|
||||
score_threshold,
|
||||
reranking_mode,
|
||||
reranking_model,
|
||||
weights,
|
||||
reranking_enable,
|
||||
} = multipleRetrievalConfig || { top_k: DATASET_DEFAULT.top_k }
|
||||
|
||||
const result = {
|
||||
top_k,
|
||||
score_threshold,
|
||||
reranking_mode,
|
||||
reranking_model,
|
||||
weights,
|
||||
reranking_enable,
|
||||
}
|
||||
|
||||
const setDefaultWeights = () => {
|
||||
result.weights = {
|
||||
weight_type: WeightedScoreEnum.Customized,
|
||||
vector_setting: {
|
||||
vector_weight: allHighQualityVectorSearch
|
||||
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.semantic
|
||||
// eslint-disable-next-line sonarjs/no-nested-conditional
|
||||
: allHighQualityFullTextSearch
|
||||
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.semantic
|
||||
: DEFAULT_WEIGHTED_SCORE.other.semantic,
|
||||
embedding_provider_name: selectedDatasets[0].embedding_model_provider,
|
||||
embedding_model_name: selectedDatasets[0].embedding_model,
|
||||
},
|
||||
keyword_setting: {
|
||||
keyword_weight: allHighQualityVectorSearch
|
||||
? DEFAULT_WEIGHTED_SCORE.allHighQualityVectorSearch.keyword
|
||||
// eslint-disable-next-line sonarjs/no-nested-conditional
|
||||
: allHighQualityFullTextSearch
|
||||
? DEFAULT_WEIGHTED_SCORE.allHighQualityFullTextSearch.keyword
|
||||
: DEFAULT_WEIGHTED_SCORE.other.keyword,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, user can manually toggle reranking
|
||||
* So should keep the reranking_enable value
|
||||
* But the default reranking_model should be set
|
||||
*/
|
||||
if ((allEconomic && allInternal) || allExternal) {
|
||||
result.reranking_mode = RerankingModeEnum.RerankingModel
|
||||
// Need to check if the reranking model should be set to default when first time initialized
|
||||
if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
|
||||
result.reranking_model = {
|
||||
provider: fallbackRerankModel.provider || '',
|
||||
model: fallbackRerankModel.model || '',
|
||||
}
|
||||
}
|
||||
result.reranking_enable = reranking_enable
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, reranking_enable must be true
|
||||
* And if rerank model is not set, should set the default rerank model
|
||||
*/
|
||||
if (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || mixtureInternalAndExternal) {
|
||||
result.reranking_mode = RerankingModeEnum.RerankingModel
|
||||
// Need to check if the reranking model should be set to default when first time initialized
|
||||
if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
|
||||
result.reranking_model = {
|
||||
provider: fallbackRerankModel.provider || '',
|
||||
model: fallbackRerankModel.model || '',
|
||||
}
|
||||
}
|
||||
result.reranking_enable = true
|
||||
}
|
||||
|
||||
/**
|
||||
* In this case, user can choose to use weighted score or rerank model
|
||||
* But if the reranking_mode is not initialized, should set the default rerank model and reranking_enable to true
|
||||
* and set reranking_mode to reranking_model
|
||||
*/
|
||||
if (allHighQuality && !inconsistentEmbeddingModel && allInternal) {
|
||||
// If not initialized, check if the default rerank model is valid
|
||||
if (!reranking_mode) {
|
||||
if (isFallbackRerankModelValid) {
|
||||
result.reranking_mode = RerankingModeEnum.RerankingModel
|
||||
result.reranking_enable = true
|
||||
|
||||
result.reranking_model = {
|
||||
provider: fallbackRerankModel.provider || '',
|
||||
model: fallbackRerankModel.model || '',
|
||||
}
|
||||
}
|
||||
else {
|
||||
result.reranking_mode = RerankingModeEnum.WeightedScore
|
||||
result.reranking_enable = false
|
||||
setDefaultWeights()
|
||||
}
|
||||
}
|
||||
|
||||
// After initialization, if datasets has no change, make sure the config has correct value
|
||||
if (reranking_mode === RerankingModeEnum.WeightedScore) {
|
||||
result.reranking_enable = false
|
||||
if (!weights)
|
||||
setDefaultWeights()
|
||||
}
|
||||
if (reranking_mode === RerankingModeEnum.RerankingModel) {
|
||||
if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
|
||||
result.reranking_model = {
|
||||
provider: fallbackRerankModel.provider || '',
|
||||
model: fallbackRerankModel.model || '',
|
||||
}
|
||||
}
|
||||
result.reranking_enable = true
|
||||
}
|
||||
|
||||
// Need to check if reranking_mode should be set to reranking_model when datasets changed
|
||||
if (reranking_mode === RerankingModeEnum.WeightedScore && weights && isDatasetsChanged) {
|
||||
if ((result.reranking_model?.provider && result.reranking_model?.model) || isFallbackRerankModelValid) {
|
||||
result.reranking_mode = RerankingModeEnum.RerankingModel
|
||||
result.reranking_enable = true
|
||||
|
||||
// eslint-disable-next-line sonarjs/nested-control-flow
|
||||
if ((!result.reranking_model?.provider || !result.reranking_model?.model) && isFallbackRerankModelValid) {
|
||||
result.reranking_model = {
|
||||
provider: fallbackRerankModel.provider || '',
|
||||
model: fallbackRerankModel.model || '',
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
setDefaultWeights()
|
||||
}
|
||||
}
|
||||
// Need to switch to weighted score when reranking model is not valid and datasets changed
|
||||
if (
|
||||
reranking_mode === RerankingModeEnum.RerankingModel
|
||||
&& (!result.reranking_model?.provider || !result.reranking_model?.model)
|
||||
&& !isFallbackRerankModelValid
|
||||
&& isDatasetsChanged
|
||||
) {
|
||||
result.reranking_mode = RerankingModeEnum.WeightedScore
|
||||
result.reranking_enable = false
|
||||
setDefaultWeights()
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
export const checkoutRerankModelConfiguredInRetrievalSettings = (
|
||||
datasets: DataSet[],
|
||||
multipleRetrievalConfig?: MultipleRetrievalConfig,
|
||||
) => {
|
||||
if (!multipleRetrievalConfig)
|
||||
return true
|
||||
|
||||
const {
|
||||
allEconomic,
|
||||
allExternal,
|
||||
allInternal,
|
||||
} = getSelectedDatasetsMode(datasets)
|
||||
|
||||
const {
|
||||
reranking_enable,
|
||||
reranking_mode,
|
||||
reranking_model,
|
||||
} = multipleRetrievalConfig
|
||||
|
||||
if (reranking_mode === RerankingModeEnum.RerankingModel && (!reranking_model?.provider || !reranking_model?.model))
|
||||
return ((allEconomic && allInternal) || allExternal) && !reranking_enable
|
||||
|
||||
return true
|
||||
}
|
||||
Reference in New Issue
Block a user