This commit is contained in:
2025-11-04 18:49:37 +08:00
parent b95fff224b
commit 8850a06fea
103 changed files with 15337 additions and 771 deletions

View File

@@ -0,0 +1,588 @@
# Dify指定知识库实现方案
## 🎯 问题分析
**需求**在调用Dify对话API时动态指定使用哪些知识库Dataset
**场景**:不同部门用户使用同一个智能体,但只能访问各自授权的知识库
---
## 📋 Dify官方支持的方式
### 方式1知识库检索API + LLM组合推荐⭐⭐
**优点**
- ✅ 完全控制知识库选择
- ✅ 可以实现复杂的权限逻辑
- ✅ 灵活性最高
**缺点**
- ⚠️ 需要自己组合API调用
- ⚠️ 无法使用Dify的完整对话管理功能
#### 步骤1检索相关知识
```http
POST https://api.dify.ai/v1/datasets/{dataset_id}/retrieve
Authorization: Bearer {api_key}
Content-Type: application/json
{
"query": "如何申请奖学金?",
"top_k": 3,
"score_threshold": 0.7
}
```
**响应示例:**
```json
{
"records": [
{
"content": "申请奖学金需要满足以下条件...",
"score": 0.95,
"metadata": {
"document_id": "doc-123",
"document_name": "奖学金管理办法.pdf"
}
}
]
}
```
#### 步骤2多知识库并行检索
```java
@Service
public class DifyKnowledgeService {
/**
* 从多个知识库检索相关内容
*/
public List<RetrievalRecord> retrieveFromMultipleDatasets(
String query,
List<String> datasetIds,
int topK) {
List<CompletableFuture<List<RetrievalRecord>>> futures = datasetIds.stream()
.map(datasetId -> CompletableFuture.supplyAsync(() ->
retrieveFromDataset(datasetId, query, topK)
))
.collect(Collectors.toList());
// 等待所有检索完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
// 合并结果并按分数排序
return futures.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.sorted((a, b) -> Double.compare(b.getScore(), a.getScore()))
.limit(topK)
.collect(Collectors.toList());
}
/**
* 从单个知识库检索
*/
private List<RetrievalRecord> retrieveFromDataset(
String datasetId,
String query,
int topK) {
String url = difyConfig.getFullApiUrl("/datasets/" + datasetId + "/retrieve");
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("query", query);
requestBody.put("top_k", topK);
requestBody.put("score_threshold", 0.7);
// HTTP请求
HttpResponse response = httpClient.post(url)
.header("Authorization", "Bearer " + apiKey)
.body(requestBody)
.execute();
return parseRetrievalResponse(response);
}
}
```
#### 步骤3组合上下文调用LLM
```java
/**
* 使用检索到的知识回答问题
*/
public String chatWithRetrievedKnowledge(
String query,
List<RetrievalRecord> records,
String conversationId) {
// 构建上下文
String context = records.stream()
.map(r -> "【" + r.getMetadata().get("document_name") + "】\n" + r.getContent())
.collect(Collectors.joining("\n\n"));
// 构建Prompt
String prompt = String.format(
"请基于以下知识库内容回答用户问题。如果知识库中没有相关信息,请明确告知用户。\n\n" +
"知识库内容:\n%s\n\n" +
"用户问题:%s\n\n" +
"回答:",
context, query
);
// 调用Dify Completion API或直接调用LLM
return callLLM(prompt, conversationId);
}
```
---
### 方式2Dify Workflow工作流
**原理**:创建工作流,使用变量控制知识库选择
#### Dify工作流配置
```yaml
workflow_nodes:
- id: start
type: start
outputs:
- query # 用户问题
- dataset_ids # 知识库ID列表变量
- id: kb_retrieval
type: knowledge-retrieval
inputs:
query: "{{#start.query#}}"
datasets: "{{#start.dataset_ids#}}" # 从输入变量读取
top_k: 3
outputs:
- result
- id: llm
type: llm
inputs:
prompt: |
基于以下知识库内容回答问题:
{{#kb_retrieval.result#}}
用户问题:{{#start.query#}}
outputs:
- answer
- id: end
type: end
outputs:
- answer: "{{#llm.answer#}}"
```
#### API调用示例
```java
/**
* 调用Dify Workflow
*/
public void chatWithWorkflow(
String query,
List<String> datasetIds,
String userId,
SseEmitter emitter) {
String url = difyConfig.getFullApiUrl("/workflows/run");
Map<String, Object> inputs = new HashMap<>();
inputs.put("query", query);
inputs.put("dataset_ids", datasetIds); // ⭐ 动态传入知识库列表
Map<String, Object> requestBody = new HashMap<>();
requestBody.put("inputs", inputs);
requestBody.put("response_mode", "streaming");
requestBody.put("user", userId);
// 流式请求
httpClient.postStream(url, requestBody, new StreamCallback() {
@Override
public void onChunk(String chunk) {
emitter.send(chunk);
}
});
}
```
**HTTP请求示例**
```http
POST /v1/workflows/run
Authorization: Bearer {api_key}
Content-Type: application/json
{
"inputs": {
"query": "如何申请奖学金?",
"dataset_ids": ["dataset-edu-001", "dataset-public-001"]
},
"response_mode": "streaming",
"user": "user-123"
}
```
---
### 方式3多应用切换不推荐
为不同部门创建不同的Dify应用
```
部门A -> App A绑定知识库A1, A2
部门B -> App B绑定知识库B1, B2
```
**缺点**
- ❌ 管理复杂
- ❌ 无法共享公共知识库
- ❌ 扩展性差
---
## 🎨 推荐实现方案
### 方案知识库检索API + 自定义LLM调用
#### 完整实现代码
```java
@Service
public class AiChatServiceImpl implements AiChatService {
@Autowired
private AiKnowledgeMapper knowledgeMapper;
@Autowired
private DifyApiClient difyApiClient;
@Autowired
private AiMessageMapper messageMapper;
/**
* 流式对话(带知识库权限隔离)
*/
@Override
public void streamChat(
String message,
String conversationId,
String userId,
SseEmitter emitter) {
try {
// 1. 获取当前登录用户的部门角色信息通过LoginUtil
List<UserDeptRoleVO> userDeptRoles = LoginUtil.getCurrentDeptRole();
// 2. 查询用户有权限的知识库(自动权限过滤✅)
TbAiKnowledge filter = new TbAiKnowledge();
filter.setStatus(1); // 只查询启用的
List<TbAiKnowledge> authorizedKnowledges =
knowledgeMapper.selectAiKnowledges(
filter,
userDeptRoles // 直接传入LoginUtil获取的用户权限信息
);
// 3. 提取Dify Dataset IDs
List<String> datasetIds = authorizedKnowledges.stream()
.map(TbAiKnowledge::getDifyDatasetId)
.filter(Objects::nonNull)
.collect(Collectors.toList());
if (datasetIds.isEmpty()) {
emitter.send("您当前没有可访问的知识库,无法进行对话。");
emitter.complete();
return;
}
// 4. 从多个知识库检索相关内容
List<RetrievalRecord> retrievalRecords =
difyApiClient.retrieveFromMultipleDatasets(
message,
datasetIds,
5 // Top K
);
// 5. 构建上下文
String context = buildContext(retrievalRecords, authorizedKnowledges);
// 6. 调用LLM流式对话
difyApiClient.streamChatWithContext(
message,
context,
conversationId,
userId,
new StreamCallback() {
private StringBuilder fullAnswer = new StringBuilder();
@Override
public void onChunk(String chunk) {
fullAnswer.append(chunk);
emitter.send(chunk);
}
@Override
public void onComplete() {
// 保存消息
saveMessages(
conversationId,
userId,
message,
fullAnswer.toString(),
retrievalRecords
);
emitter.complete();
}
@Override
public void onError(Throwable error) {
log.error("对话失败", error);
emitter.completeWithError(error);
}
}
);
} catch (Exception e) {
log.error("流式对话异常", e);
emitter.completeWithError(e);
}
}
/**
* 构建上下文
*/
private String buildContext(
List<RetrievalRecord> records,
List<TbAiKnowledge> knowledges) {
Map<String, String> knowledgeTitles = knowledges.stream()
.collect(Collectors.toMap(
TbAiKnowledge::getDifyDatasetId,
TbAiKnowledge::getTitle
));
return records.stream()
.map(r -> {
String datasetId = r.getDatasetId();
String knowledgeTitle = knowledgeTitles.getOrDefault(datasetId, "未知知识库");
return String.format(
"【来源:%s - %s】\n%s",
knowledgeTitle,
r.getDocumentName(),
r.getContent()
);
})
.collect(Collectors.joining("\n\n---\n\n"));
}
}
```
#### DifyApiClient实现
```java
@Component
public class DifyApiClient {
@Autowired
private DifyConfig difyConfig;
private final OkHttpClient httpClient;
public DifyApiClient(DifyConfig difyConfig) {
this.difyConfig = difyConfig;
this.httpClient = new OkHttpClient.Builder()
.connectTimeout(difyConfig.getConnectTimeout(), TimeUnit.SECONDS)
.readTimeout(difyConfig.getReadTimeout(), TimeUnit.SECONDS)
.build();
}
/**
* 从多个知识库检索
*/
public List<RetrievalRecord> retrieveFromMultipleDatasets(
String query,
List<String> datasetIds,
int topK) {
// 并行检索所有知识库
List<CompletableFuture<List<RetrievalRecord>>> futures =
datasetIds.stream()
.map(id -> CompletableFuture.supplyAsync(() ->
retrieveFromDataset(id, query, topK)
))
.collect(Collectors.toList());
// 等待完成
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
// 合并并排序
return futures.stream()
.map(CompletableFuture::join)
.flatMap(List::stream)
.sorted((a, b) -> Double.compare(b.getScore(), a.getScore()))
.limit(topK)
.collect(Collectors.toList());
}
/**
* 从单个知识库检索
*/
private List<RetrievalRecord> retrieveFromDataset(
String datasetId,
String query,
int topK) {
String url = String.format(
"%s/datasets/%s/retrieve",
difyConfig.getApiBaseUrl(),
datasetId
);
JSONObject body = new JSONObject();
body.put("query", query);
body.put("top_k", topK);
body.put("score_threshold", 0.7);
Request request = new Request.Builder()
.url(url)
.header("Authorization", "Bearer " + difyConfig.getApiKey())
.header("Content-Type", "application/json")
.post(RequestBody.create(
body.toString(),
MediaType.parse("application/json")
))
.build();
try (Response response = httpClient.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new DifyException("知识库检索失败: " + response.message());
}
String responseBody = response.body().string();
return parseRetrievalResponse(datasetId, responseBody);
} catch (IOException e) {
throw new DifyException("知识库检索异常", e);
}
}
/**
* 流式对话(带上下文)
*/
public void streamChatWithContext(
String query,
String context,
String conversationId,
String userId,
StreamCallback callback) {
String url = difyConfig.getApiBaseUrl() + "/chat-messages";
// 构建完整Prompt
String fullPrompt = String.format(
"请基于以下知识库内容回答用户问题。" +
"如果知识库中没有相关信息,请明确告知用户。\n\n" +
"知识库内容:\n%s\n\n" +
"用户问题:%s",
context, query
);
JSONObject body = new JSONObject();
body.put("query", fullPrompt);
body.put("conversation_id", conversationId);
body.put("user", userId);
body.put("response_mode", "streaming");
body.put("inputs", new JSONObject());
Request request = new Request.Builder()
.url(url)
.header("Authorization", "Bearer " + difyConfig.getApiKey())
.header("Content-Type", "application/json")
.post(RequestBody.create(
body.toString(),
MediaType.parse("application/json")
))
.build();
// SSE流式处理
httpClient.newCall(request).enqueue(new Callback() {
@Override
public void onResponse(Call call, Response response) {
if (!response.isSuccessful()) {
callback.onError(new DifyException("对话失败: " + response.message()));
return;
}
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(response.body().byteStream()))) {
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith("data: ")) {
String data = line.substring(6);
if (!"[DONE]".equals(data)) {
JSONObject json = new JSONObject(data);
String chunk = json.optString("answer", "");
if (!chunk.isEmpty()) {
callback.onChunk(chunk);
}
}
}
}
callback.onComplete();
} catch (Exception e) {
callback.onError(e);
}
}
@Override
public void onFailure(Call call, IOException e) {
callback.onError(e);
}
});
}
}
```
---
## 📊 三种方式对比
| 方案 | 灵活性 | 实现难度 | 性能 | 推荐度 |
|------|--------|----------|------|--------|
| 检索API + 自定义LLM | ⭐⭐⭐⭐⭐ | ⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
| Workflow工作流 | ⭐⭐⭐⭐ | ⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
| 多应用切换 | ⭐⭐ | ⭐ | ⭐⭐⭐ | ⭐ |
---
## 🎯 最终推荐方案
**使用"检索API + 自定义LLM"方案**
**理由**
1. ✅ 完全控制知识库访问权限
2. ✅ 可以实现复杂的部门隔离逻辑
3. ✅ 支持并行检索多个知识库
4. ✅ 可以自定义Prompt和上下文
5. ✅ 灵活性最高,适合企业级应用
**实现步骤**
1. 用户发起对话
2. 根据用户权限查询可访问的知识库Mapper已实现✅
3. 并行调用Dify检索API获取相关内容
4. 合并结果构建上下文
5. 调用LLM流式生成答案
6. 保存对话记录(含知识来源)
这样既利用了Dify的知识库能力又保持了完全的控制权🎉