feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目

This commit is contained in:
User
2026-03-13 13:06:46 +08:00
parent 9dab61345c
commit 5521b673f5
215 changed files with 7626 additions and 1876 deletions

6
realtime_dialog/java/.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,6 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
/target/

View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="com.codeverse.userSettings.CodeverseWorkspaceAppSettingsState">
<option name="progress" value="1.0" />
</component>
</project>

13
realtime_dialog/java/.idea/compiler.xml generated Normal file
View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="realtimedialog" />
</profile>
</annotationProcessing>
</component>
</project>

View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>

View File

@@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="bytedance-snapshots" />
<option name="name" value="bytedance-snapshots" />
<option name="url" value="https://maven.byted.org/repository/public" />
</remote-repository>
<remote-repository>
<option name="id" value="bytedance-releases" />
<option name="name" value="bytedance-releases" />
<option name="url" value="https://maven.byted.org/repository/public" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://maven.byted.org/repository/public" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

12
realtime_dialog/java/.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="corretto-1.8" project-jdk-type="JavaSDK" />
</project>

6
realtime_dialog/java/.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

View File

@@ -0,0 +1,63 @@
# RealtimeDialog Java客户端
## 项目简介
Java版本的RealtimeDialog客户端支持实时语音对话功能。
## 环境要求
- Java 1.8 或更高版本
- Maven 3.6 或更高版本
## 快速开始
### 1. 编译项目
```bash
cd java
mvn clean compile
```
### 2. 运行应用
#### 麦克风模式(默认)
```bash
mvn exec:java
```
#### 音频文件模式
```bash
mvn exec:java -Dexec.args="--audio=whoareyou.wav"
```
#### 文本模式
```bash
mvn exec:java -Dexec.args="--mod=text"
```
#### 指定音频格式
```bash
mvn exec:java -Dexec.args="--format=pcm_s16le"
```
### 3. 打包可执行JAR
```bash
mvn clean package
java -jar target/realtimedialog-1.0.0.jar --audio=whoareyou.wav
```
## 配置说明
在使用前,需要在`Config.java`中配置以下参数:
- `X-Api-App-ID`: 你的应用ID
- `X-Api-Access-Key`: 你的访问密钥
## 功能特性
- 支持麦克风实时语音输入
- 支持音频文件输入
- 支持文本输入模式
- 支持音频输出播放
- 支持外部RAG功能
- 支持多种音频格式pcm, pcm_s16le
## 命令行参数
- `--format`: 音频格式,默认为"pcm"
- `--audio`: 音频文件路径,如果不设置则使用麦克风输入
- `--mod`: 输入模式audio默认或text

View File

@@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.volcengine</groupId>
<artifactId>realtimedialog</artifactId>
<name>RealtimeDialog Java Client</name>
<version>1.0.0</version>
<description>Java client for Volcengine RealtimeDialog service</description>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer>
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>1.8</maven.compiler.target>
<java-websocket.version>1.5.3</java-websocket.version>
<jackson.version>2.15.2</jackson.version>
<maven.compiler.source>1.8</maven.compiler.source>
</properties>
</project>

View File

@@ -0,0 +1,128 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.volcengine</groupId>
<artifactId>realtimedialog</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>RealtimeDialog Java Client</name>
<description>Java client for Volcengine RealtimeDialog service</description>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jackson.version>2.15.2</jackson.version>
<java-websocket.version>1.5.3</java-websocket.version>
</properties>
<dependencies>
<!-- WebSocket client -->
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>${java-websocket.version}</version>
</dependency>
<!-- JSON processing -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>${jackson.version}</version>
</dependency>
<!-- Logging -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.36</version>
</dependency>
<!-- Apache Commons CLI for command line parsing -->
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.5.0</version>
</dependency>
<!-- Audio processing -->
<dependency>
<groupId>com.googlecode.soundlibs</groupId>
<artifactId>mp3spi</artifactId>
<version>1.9.5.4</version>
</dependency>
<!-- UUID generation -->
<dependency>
<groupId>com.fasterxml.uuid</groupId>
<artifactId>java-uuid-generator</artifactId>
<version>4.2.0</version>
</dependency>
<!-- Base64 encoding -->
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.15</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Compiler plugin -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
<!-- Shade plugin for creating fat jar -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.4.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<!-- Exec plugin for running the application -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,114 @@
package com.volcengine.realtimedialog;
import javax.sound.sampled.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
public class AudioCapture {
private static final int BUFFER_SIZE = 4096;
private TargetDataLine targetLine;
private volatile boolean isCapturing = false;
private Thread captureThread;
private final BlockingQueue<byte[]> audioQueue;
public AudioCapture() {
this.audioQueue = new ArrayBlockingQueue<>(100);
}
public void startCapture() throws LineUnavailableException {
AudioFormat format = new AudioFormat(
Config.INPUT_SAMPLE_RATE,
16,
Config.CHANNELS,
true,
false // little endian
);
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
if (!AudioSystem.isLineSupported(info)) {
throw new LineUnavailableException("音频输入设备不支持指定格式");
}
targetLine = (TargetDataLine) AudioSystem.getLine(info);
targetLine.open(format);
targetLine.start();
isCapturing = true;
captureThread = new Thread(this::captureLoop);
captureThread.setName("AudioCapture");
captureThread.start();
}
private void captureLoop() {
byte[] buffer = new byte[Config.AUDIO_CHUNK_SIZE];
while (isCapturing) {
int bytesRead = targetLine.read(buffer, 0, buffer.length);
if (bytesRead > 0) {
byte[] audioData = new byte[bytesRead];
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
try {
audioQueue.put(audioData);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
}
}
public byte[] readAudioData() throws InterruptedException {
return audioQueue.poll();
}
public void stopCapture() {
isCapturing = false;
if (captureThread != null) {
captureThread.interrupt();
}
if (targetLine != null) {
targetLine.stop();
targetLine.close();
}
}
public boolean isCapturing() {
return isCapturing;
}
public static byte[] readWavFile(String filePath) throws IOException {
File file = new File(filePath);
if (!file.exists()) {
throw new FileNotFoundException("音频文件不存在: " + filePath);
}
try (FileInputStream fis = new FileInputStream(file)) {
byte[] fileData = new byte[(int) file.length()];
fis.read(fileData);
// 跳过WAV文件头44字节
if (filePath.toLowerCase().endsWith(".wav") && fileData.length > Config.WAV_HEADER_SIZE) {
byte[] audioData = new byte[fileData.length - Config.WAV_HEADER_SIZE];
System.arraycopy(fileData, Config.WAV_HEADER_SIZE, audioData, 0, audioData.length);
return audioData;
}
return fileData;
}
}
public static short[] bytesToInt16Samples(byte[] data) {
short[] samples = new short[data.length / 2];
ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(samples);
return samples;
}
public static byte[] int16SamplesToBytes(short[] samples) {
byte[] bytes = new byte[samples.length * 2];
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(samples);
return bytes;
}
}

View File

@@ -0,0 +1,398 @@
package com.volcengine.realtimedialog;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
public class CallManager {
private final String sessionId;
private NetClient netClient;
private AudioCapture audioCapture;
private Thread audioSendThread;
private Thread textInputThread;
private final AtomicBoolean isRunning;
private final AtomicBoolean isAudioMode;
private static final BlockingQueue<Object> queryChan = new LinkedBlockingQueue<>();
private static volatile CallManager currentInstance;
public CallManager() {
this.sessionId = Protocol.generateSessionId();
this.isRunning = new AtomicBoolean(false);
this.isAudioMode = new AtomicBoolean(true);
}
public void start() throws Exception {
System.out.println("启动实时通话管理器会话ID: " + sessionId);
// 设置当前实例
currentInstance = this;
// 建立WebSocket连接
connectWebSocket();
isRunning.set(true);
// 启动音频模式或文本模式
if (Config.mod.equals("text")) {
isAudioMode.set(false);
startTextMode();
} else {
isAudioMode.set(true);
startAudioMode();
}
// 等待运行结束
waitForCompletion();
}
private void connectWebSocket() throws Exception {
URI uri = new URI(Config.WS_URL);
Map<String, String> headers = new HashMap<>();
headers.put("X-Api-Resource-Id", Config.API_RESOURCE_ID);
headers.put("X-Api-Access-Key", Config.API_ACCESS_KEY);
headers.put("X-Api-App-Key", Config.API_APP_KEY);
headers.put("X-Api-App-ID", Config.API_APP_ID);
headers.put("X-Api-Connect-Id", sessionId);
netClient = new NetClient(uri, headers);
netClient.connectBlocking(30, TimeUnit.SECONDS);
if (!netClient.isConnected()) {
throw new IOException("WebSocket连接失败");
}
System.out.println("WebSocket连接成功");
// 发送连接开始消息事件1
startConnection();
// 发送会话开始消息事件100
startSession();
}
private void startConnection() throws Exception {
System.out.println("发送连接开始消息...");
// 使用正确的协议格式发送事件1
netClient.sendProtocolMessage(sessionId, "{}", 1);
System.out.println("连接开始消息发送完成");
}
private void startSession() throws Exception {
System.out.println("发送会话开始消息...");
RequestPayloads.StartSessionPayload payload = new RequestPayloads.StartSessionPayload();
// 根据模式设置参数
if (Config.mod.equals("text")) {
payload.dialog.extra = createExtraMap("text");
} else if (!Config.audioFilePath.isEmpty()) {
payload.dialog.extra = createExtraMap("audio_file");
} else {
payload.dialog.extra = createExtraMap("audio");
}
// 发送会话开始消息
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
netClient.sendProtocolMessage(sessionId, jsonPayload, 100); // 事件100的载荷
System.out.println("会话开始消息发送完成,等待服务器响应...");
// 等待会话启动响应事件150对齐Go实现
long startTime = System.currentTimeMillis();
long timeout = 30000; // 30秒超时
boolean sessionStarted = false;
while (System.currentTimeMillis() - startTime < timeout) {
Protocol.Message message = netClient.pollIncomingMessage(1, TimeUnit.SECONDS);
if (message != null && message.type == Protocol.MsgType.FULL_SERVER && message.event == 150) {
// 解析响应payload获取dialog_id
try {
String responseJson = new String(message.payload, "UTF-8");
com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper();
java.util.Map<String, Object> response = mapper.readValue(responseJson, java.util.Map.class);
String dialogId = (String) response.get("dialog_id");
if (dialogId != null && !dialogId.isEmpty()) {
System.out.println("会话启动成功dialog_id: " + dialogId);
sessionStarted = true;
break;
}
} catch (Exception e) {
System.err.println("解析会话启动响应失败: " + e.getMessage());
}
}
}
if (!sessionStarted) {
throw new IOException("会话启动超时或失败,未收到服务器的会话启动确认");
}
System.out.println("会话开始完成\n" + jsonPayload);
}
private Map<String, Object> createExtraMap(String inputMod) {
Map<String, Object> extra = new HashMap<>();
extra.put("strict_audit", false);
extra.put("audit_response", "抱歉这个问题我无法回答,你可以换个其他话题,我会尽力为你提供帮助。");
extra.put("input_mod", inputMod);
extra.put("model", "O");
return extra;
}
private void startAudioMode() throws Exception {
System.out.println("启动音频模式");
if (Config.audioFilePath.isEmpty()) {
// 麦克风模式
sendGreetingMessage();
startMicrophoneCapture();
startMessageReceiver();
} else {
// 音频文件模式 - 不启动麦克风,只启动消息接收
startFilePlayback();
startMessageReceiver();
}
}
private void startTextMode() throws Exception {
System.out.println("启动文本模式");
// 发送问候语对齐Golang版本
sendGreetingMessage();
// 启动文本输入线程
startTextInput();
// 启动消息接收线程
startMessageReceiver();
}
private void startMicrophoneCapture() throws Exception {
audioCapture = new AudioCapture();
audioCapture.startCapture();
// 启动音频发送线程
audioSendThread = new Thread(this::microphoneSendLoop);
audioSendThread.setName("MicrophoneAudioSend");
audioSendThread.start();
System.out.println("麦克风采集已启动");
}
private void startFilePlayback() throws Exception {
System.out.println("开始发送音频文件: " + Config.audioFilePath);
// 读取音频文件
byte[] audioData = AudioCapture.readWavFile(Config.audioFilePath);
// 启动文件发送线程
audioSendThread = new Thread(() -> fileSendLoop(audioData));
audioSendThread.setName("FileAudioSend");
audioSendThread.start();
}
private void microphoneSendLoop() {
try {
while (isRunning.get() && audioCapture.isCapturing()) {
byte[] audioData = audioCapture.readAudioData();
if (audioData != null) {
netClient.sendAudioData(sessionId, audioData);
}
// 模拟实时发送间隔
Thread.sleep(Config.AUDIO_SEND_INTERVAL);
}
} catch (Exception e) {
System.err.println("麦克风发送线程错误: " + e.getMessage());
}
}
private void fileSendLoop(byte[] audioData) {
try {
int chunkSize = Config.AUDIO_CHUNK_SIZE; // 640字节与Go实现保持一致
int totalSize = audioData.length;
int position = 0;
int chunkCount = 0;
System.out.println("开始发送音频文件,总大小: " + totalSize + " 字节, 块大小: " + chunkSize + " 字节");
while (isRunning.get() && position < totalSize) {
int remaining = totalSize - position;
int currentChunkSize = Math.min(chunkSize, remaining);
byte[] chunk = new byte[currentChunkSize];
System.arraycopy(audioData, position, chunk, 0, currentChunkSize);
System.out.println("发送音频块 #" + (++chunkCount) + ": 位置=" + position + ", 大小=" + currentChunkSize + " 字节");
netClient.sendAudioData(sessionId, chunk);
position += currentChunkSize;
// 模拟实时发送间隔 - 每20ms发送一块与Go实现保持一致
Thread.sleep(Config.AUDIO_SEND_INTERVAL);
}
System.out.println("音频文件发送完成,共发送 " + chunkCount + "");
// 发送音频结束标记 - 发送一段静音数据提示服务器音频输入结束
System.out.println("发送音频结束标记...");
byte[] silenceChunk = new byte[chunkSize]; // 静音数据
netClient.sendAudioData(sessionId, silenceChunk);
System.out.println("音频文件发送完成,等待服务器响应...");
// 文件发送完成后等待服务器通过事件359通知退出
} catch (Exception e) {
System.err.println("文件发送线程错误: " + e.getMessage());
e.printStackTrace();
}
}
private void sendGreetingMessage() throws Exception {
System.out.println("发送问候语...");
// 创建SayHello载荷对齐Golang版本使用事件300
RequestPayloads.SayHelloPayload payload = new RequestPayloads.SayHelloPayload("你好,我是豆包,有什么可以帮助你的吗?");
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
netClient.sendProtocolMessage(sessionId, jsonPayload, 300); // 事件300 - SayHello对齐Golang版本
System.out.println("问候语发送完成");
}
private void startTextInput() {
textInputThread = new Thread(this::textInputLoop);
textInputThread.setName("TextInput");
textInputThread.start();
}
private void textInputLoop() {
Scanner scanner = new Scanner(System.in);
System.out.println("请输入文本 (输入 'quit' 退出):");
try {
while (isRunning.get()) {
String text = scanner.nextLine();
if (text.equalsIgnoreCase("quit")) {
stop();
break;
}
if (!text.trim().isEmpty()) {
// 使用事件501发送文本查询对齐Golang版本
netClient.sendChatTextQuery(sessionId, text);
}
}
} catch (Exception e) {
System.err.println("文本输入线程错误: " + e.getMessage());
}
}
private void startMessageReceiver() {
Thread receiverThread = new Thread(this::messageReceiveLoop);
receiverThread.setName("MessageReceiver");
receiverThread.start();
}
private void messageReceiveLoop() {
try {
while (isRunning.get()) {
Protocol.Message message = netClient.pollIncomingMessage(1, TimeUnit.SECONDS);
if (message != null) {
// 消息已在NetClient中处理这里可以添加额外的逻辑
if (message.type == Protocol.MsgType.ERROR) {
String error = new String(message.payload);
System.err.println("服务器错误: " + error);
}
}
}
} catch (Exception e) {
System.err.println("消息接收线程错误: " + e.getMessage());
}
}
private void waitForCompletion() throws InterruptedException {
while (isRunning.get()) {
Thread.sleep(100);
// 对于音频文件模式,文件发送完成后等待服务器响应
if (isAudioMode.get() && !Config.audioFilePath.isEmpty() && audioSendThread != null && !audioSendThread.isAlive()) {
// 音频文件已发送完成,继续等待服务器响应
System.out.println("音频文件发送完成,等待服务器响应...");
// 不退出,继续等待消息接收线程处理服务器响应
// 服务器会通过事件359通知可以退出
}
}
}
public void stop() {
System.out.println("停止通话管理器");
isRunning.set(false);
try {
// 发送会话结束消息事件102- 参考Go实现
if (netClient != null && netClient.isConnected()) {
System.out.println("发送会话结束消息...");
finishSession();
Thread.sleep(100); // 给服务器处理时间
}
} catch (Exception e) {
System.err.println("发送会话结束消息失败: " + e.getMessage());
}
// 停止音频采集
if (audioCapture != null) {
audioCapture.stopCapture();
}
// 关闭WebSocket连接并打印logid
if (netClient != null) {
String logid = netClient.getLogid();
if (logid != null && !logid.isEmpty()) {
System.out.println("通话结束logid: " + logid);
}
netClient.close();
}
// 等待线程结束
try {
if (audioSendThread != null) {
audioSendThread.join(1000);
}
if (textInputThread != null) {
textInputThread.join(1000);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
System.out.println("通话管理器已停止");
}
private void finishSession() throws Exception {
if (netClient != null && sessionId != null) {
netClient.sendProtocolMessage(sessionId, "{}", 102); // 事件102 - FinishSession
System.out.println("会话结束消息已发送");
}
}
// 通知用户查询事件
public static void notifyUserQuery() {
queryChan.offer(new Object());
}
// 从处理器停止CallManager
public static void stopFromHandler() {
if (currentInstance != null) {
currentInstance.stop();
}
}
}

View File

@@ -0,0 +1,59 @@
package com.volcengine.realtimedialog;
public class Config {
// WebSocket连接配置
public static final String WS_URL = "wss://openspeech.bytedance.com/api/v3/realtime/dialogue";
public static final String API_RESOURCE_ID = "volc.speech.dialog";
// 用户需要配置的参数
public static String API_APP_ID = "";
public static String API_ACCESS_KEY = "";
public static String API_APP_KEY = "PlgvMymc7f3tQnJ6";
// 音频参数配置
public static final int INPUT_SAMPLE_RATE = 16000;
public static final int OUTPUT_SAMPLE_RATE = 24000;
public static final int CHANNELS = 1;
public static final int INPUT_FRAMES_PER_BUFFER = 160;
public static final int OUTPUT_FRAMES_PER_BUFFER = 512;
public static final int BUFFER_SECONDS = 100;
// 音频格式
public static final String DEFAULT_PCM = "pcm";
public static final String PCM_S16LE = "pcm_s16le";
// TTS配置
public static final String DEFAULT_SPEAKER = "zh_female_vv_jupiter_bigtts";
// 网络配置
public static final int AUDIO_CHUNK_SIZE = 640; // 字节对应20ms音频数据
public static final long AUDIO_SEND_INTERVAL = 20; // 毫秒
// WAV文件配置
public static final int WAV_HEADER_SIZE = 44; // WAV文件头大小
// 命令行参数默认值
public static String audioFilePath = "";
public static String mod = "audio";
public static String pcmFormat = PCM_S16LE;
public static void setAppId(String appId) {
API_APP_ID = appId;
}
public static void setAccessKey(String accessKey) {
API_ACCESS_KEY = accessKey;
}
public static void setAudioFilePath(String path) {
audioFilePath = path;
}
public static void setMod(String mode) {
mod = mode;
}
public static void setPcmFormat(String format) {
pcmFormat = format;
}
}

View File

@@ -0,0 +1,179 @@
package com.volcengine.realtimedialog;
import org.apache.commons.cli.*;
public class Main {
public static void main(String[] args) {
// 解析命令行参数
CommandLine cmd = parseCommandLine(args);
if (cmd == null) {
System.exit(1);
}
// 应用配置
applyConfiguration(cmd);
// 验证必要的配置
if (!validateConfiguration()) {
System.exit(1);
}
// 启动通话管理器
CallManager callManager = new CallManager();
try {
// 添加关闭钩子
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
System.out.println("正在关闭应用...");
callManager.stop();
}));
// 开始通话
callManager.start();
System.out.println("通话结束");
} catch (Exception e) {
System.err.println("运行错误: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
private static CommandLine parseCommandLine(String[] args) {
Options options = new Options();
// 音频文件路径
Option audioOption = Option.builder("a")
.longOpt("audio")
.desc("音频文件路径,如果不设置则使用麦克风输入")
.hasArg()
.argName("FILE")
.build();
options.addOption(audioOption);
// 输入模式
Option modOption = Option.builder("m")
.longOpt("mod")
.desc("输入模式audio默认或text")
.hasArg()
.argName("MODE")
.build();
options.addOption(modOption);
// 音频格式
Option formatOption = Option.builder("f")
.longOpt("format")
.desc("音频格式默认为pcm可选pcm_s16le")
.hasArg()
.argName("FORMAT")
.build();
options.addOption(formatOption);
// 应用ID
Option appIdOption = Option.builder()
.longOpt("app_id")
.desc("应用ID如果不设置则使用Config中的默认值")
.hasArg()
.argName("APP_ID")
.build();
options.addOption(appIdOption);
// 访问密钥
Option accessKeyOption = Option.builder()
.longOpt("access_key")
.desc("访问密钥如果不设置则使用Config中的默认值")
.hasArg()
.argName("ACCESS_KEY")
.build();
options.addOption(accessKeyOption);
// 帮助
Option helpOption = Option.builder("h")
.longOpt("help")
.desc("显示帮助信息")
.build();
options.addOption(helpOption);
CommandLineParser parser = new DefaultParser();
HelpFormatter formatter = new HelpFormatter();
try {
CommandLine cmd = parser.parse(options, args);
if (cmd.hasOption("help")) {
formatter.printHelp("java -jar realtimelog-1.0.0.jar", options);
return null;
}
return cmd;
} catch (ParseException e) {
System.err.println("参数解析错误: " + e.getMessage());
formatter.printHelp("java -jar realtimelog-1.0.0.jar", options);
return null;
}
}
private static void applyConfiguration(CommandLine cmd) {
// 应用音频文件路径
if (cmd.hasOption("audio")) {
Config.setAudioFilePath(cmd.getOptionValue("audio"));
}
// 应用输入模式
if (cmd.hasOption("mod")) {
String mode = cmd.getOptionValue("mod");
if (!mode.equals("audio") && !mode.equals("text")) {
System.err.println("错误mod参数必须是audio或text");
System.exit(1);
}
Config.setMod(mode);
}
// 应用音频格式
if (cmd.hasOption("format")) {
String format = cmd.getOptionValue("format");
if (!format.equals("pcm") && !format.equals("pcm_s16le")) {
System.err.println("错误format参数必须是pcm或pcm_s16le");
System.exit(1);
}
Config.setPcmFormat(format);
}
// 应用应用ID
if (cmd.hasOption("app_id")) {
Config.setAppId(cmd.getOptionValue("app_id"));
}
// 应用访问密钥
if (cmd.hasOption("access_key")) {
Config.setAccessKey(cmd.getOptionValue("access_key"));
}
}
private static boolean validateConfiguration() {
// 检查必要的配置
if (Config.API_APP_ID.equals("your_app_id")) {
System.err.println("错误必须设置应用ID");
System.err.println("请在Config.java中设置API_APP_ID或使用--app_id参数");
return false;
}
if (Config.API_ACCESS_KEY.equals("your_access_key")) {
System.err.println("错误:必须设置访问密钥");
System.err.println("请在Config.java中设置API_ACCESS_KEY或使用--access_key参数");
return false;
}
// 检查音频文件是否存在
if (!Config.audioFilePath.isEmpty()) {
java.io.File file = new java.io.File(Config.audioFilePath);
if (!file.exists()) {
System.err.println("错误:音频文件不存在: " + Config.audioFilePath);
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,402 @@
package com.volcengine.realtimedialog;
import org.java_websocket.client.WebSocketClient;
import org.java_websocket.drafts.Draft_6455;
import org.java_websocket.handshake.ServerHandshake;
import javax.sound.sampled.*;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
public class NetClient extends WebSocketClient {
private final BlockingQueue<Protocol.Message> incomingMessages;
private volatile boolean isConnected = false;
private volatile boolean shouldStop = false;
private SourceDataLine audioOutputLine;
private Thread audioPlaybackThread;
private final BlockingQueue<byte[]> audioQueue;
private volatile String logid; // 保存logid用于通话结束时打印
public NetClient(URI serverUri, Map<String, String> headers) {
super(serverUri, new Draft_6455(), headers, 0);
this.incomingMessages = new LinkedBlockingQueue<>();
this.audioQueue = new LinkedBlockingQueue<>();
// 只在非录音文件模式下初始化音频输出(文本模式需要播放器)
if (!isAudioFileInput()) {
initializeAudioOutput();
}
}
private void initializeAudioOutput() {
try {
AudioFormat format = new AudioFormat(
Config.OUTPUT_SAMPLE_RATE,
16,
Config.CHANNELS,
true,
false // little endian
);
DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
if (!AudioSystem.isLineSupported(info)) {
System.err.println("不支持音频输出格式");
return;
}
audioOutputLine = (SourceDataLine) AudioSystem.getLine(info);
audioOutputLine.open(format);
audioOutputLine.start();
// 启动音频播放线程
audioPlaybackThread = new Thread(this::audioPlaybackLoop);
audioPlaybackThread.setName("AudioPlayback");
audioPlaybackThread.start();
} catch (LineUnavailableException e) {
System.err.println("音频输出初始化失败: " + e.getMessage());
}
}
// 播放状态枚举
private enum PlaybackState {
IDLE, // 空闲状态
PLAYING, // 正在播放
WAITING_DATA // 等待数据
}
private void audioPlaybackLoop() {
PlaybackState state = PlaybackState.IDLE;
int emptyCount = 0;
final int maxEmptyCount = 20; // 1秒没有数据
final boolean isTextMode = Config.mod.equals("text");
final boolean isAudioFileMode = isAudioFileInput();
System.out.println("音频播放线程启动 - 模式: " + Config.mod +
", 文本模式: " + isTextMode +
", 音频文件模式: " + isAudioFileMode);
while (!shouldStop) {
try {
byte[] audioData = audioQueue.poll(50, TimeUnit.MILLISECONDS);
if (audioData != null && audioOutputLine != null) {
// 状态转换:接收到数据 -> 播放状态
if (state != PlaybackState.PLAYING) {
state = PlaybackState.PLAYING;
if (!isTextMode && !isAudioFileMode) {
System.out.println("🎵 开始播放音频...");
}
}
// 写入音频数据到播放设备
audioOutputLine.write(audioData, 0, audioData.length);
emptyCount = 0;
// 调试信息控制
if (!isTextMode && !isAudioFileMode && audioData.length > 0) {
System.out.println("播放音频数据: " + audioData.length + " 字节");
}
} else {
// 没有数据到达
if (state == PlaybackState.PLAYING) {
// 从播放状态转换到等待数据状态
state = PlaybackState.WAITING_DATA;
emptyCount = 0;
} else if (state == PlaybackState.WAITING_DATA) {
emptyCount++;
if (emptyCount > maxEmptyCount) {
// 转换到空闲状态
state = PlaybackState.IDLE;
if (!isTextMode && !isAudioFileMode) {
System.out.println("⏸️ 音频播放暂停,等待数据...");
}
emptyCount = 0;
}
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
} catch (Exception e) {
System.err.println("❌ 音频播放错误: " + e.getMessage());
e.printStackTrace();
state = PlaybackState.IDLE;
}
}
System.out.println("🛑 音频播放线程结束");
}
@Override
public void onOpen(ServerHandshake handshake) {
System.out.println("WebSocket连接已建立");
isConnected = true;
// 获取并保存logid
logid = handshake.getFieldValue("X-Tt-Logid");
if (logid != null && !logid.isEmpty()) {
System.out.println("连接建立logid: " + logid);
}
}
@Override
public void onMessage(String message) {
System.out.println("收到文本消息: " + message);
}
@Override
public void onMessage(ByteBuffer bytes) {
try {
byte[] data = new byte[bytes.remaining()];
bytes.get(data);
System.out.println("收到WebSocket二进制消息长度: " + data.length + " 字节");
System.out.println("原始数据前20字节: " + bytesToHex(data, Math.min(20, data.length)));
try {
Protocol.Message message = Protocol.unmarshal(data);
System.out.println("解析消息成功 - 类型: " + message.type + ", 事件ID: " + message.event + ", 会话ID: " + message.sessionId);
// 直接使用ProtocolV2.Message不再转换到旧格式
switch (message.type) {
case FULL_SERVER:
handleFullServerMessage(message);
break;
case AUDIO_ONLY_SERVER:
handleAudioOnlyServerMessage(message);
break;
case ERROR:
handleErrorMessage(message);
break;
default:
System.err.println("未知消息类型: " + message.type);
}
incomingMessages.offer(message);
} catch (IOException e) {
System.err.println("消息解析失败: " + e.getMessage());
System.err.println("尝试解析为文本消息...");
try {
String text = new String(data, "UTF-8");
System.err.println("文本内容: " + text);
} catch (Exception textEx) {
System.err.println("也无法解析为文本: " + textEx.getMessage());
}
}
} catch (Exception e) {
System.err.println("处理消息时出错: " + e.getMessage());
e.printStackTrace();
}
}
private String bytesToHex(byte[] bytes, int length) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < length; i++) {
sb.append(String.format("%02X ", bytes[i]));
}
return sb.toString();
}
private void handleFullServerMessage(Protocol.Message message) {
ServerResponseHandler.handleFullServerMessage(this, message);
}
private void handleAudioOnlyServerMessage(Protocol.Message message) {
ServerResponseHandler.handleAudioOnlyServerMessage(this, message);
}
private void handleErrorMessage(Protocol.Message message) {
ServerResponseHandler.handleErrorMessage(message);
}
// 播放音频数据 - 对齐Golang实现简化音频处理
public void playAudioData(byte[] audioData) {
// 录音文件模式下不播放音频
if (isAudioFileInput()) {
return;
}
try {
if (audioData == null || audioData.length == 0) {
return;
}
System.out.println("播放音频数据: " + audioData.length + " 字节");
// 根据配置格式处理音频数据
switch (Config.pcmFormat) {
case Config.PCM_S16LE:
// s16le格式直接播放
if (audioData.length % 2 != 0) {
System.err.println("s16le音频数据长度不是2的倍数: " + audioData.length);
return;
}
audioQueue.offer(audioData);
break;
case Config.DEFAULT_PCM:
// f32le格式需要转换为s16le
if (audioData.length % 4 != 0) {
System.err.println("f32le音频数据长度不是4的倍数: " + audioData.length);
return;
}
int sampleCount = audioData.length / 4;
short[] samples = new short[sampleCount];
ByteBuffer buffer = ByteBuffer.wrap(audioData).order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < sampleCount; i++) {
float sample = buffer.getFloat();
// 将float转换为short确保范围正确
samples[i] = (short) Math.max(-32768, Math.min(32767, sample * 32767.0f));
}
// 转换为字节数组并播放
byte[] s16Data = AudioCapture.int16SamplesToBytes(samples);
audioQueue.offer(s16Data);
break;
}
} catch (Exception e) {
System.err.println("播放音频数据失败: " + e.getMessage());
e.printStackTrace();
}
}
@Override
public void onClose(int code, String reason, boolean remote) {
System.out.println("WebSocket连接已关闭. 代码: " + code + ", 原因: " + reason);
// 打印logid
if (logid != null && !logid.isEmpty()) {
System.out.println("连接关闭logid: " + logid);
}
isConnected = false;
cleanup();
}
@Override
public void onError(Exception ex) {
System.err.println("WebSocket错误: " + ex.getMessage());
ex.printStackTrace();
}
// 检查是否为录音文件输入模式
private boolean isAudioFileInput() {
return !Config.audioFilePath.isEmpty();
}
public boolean isConnected() {
return isConnected;
}
public String getLogid() {
return logid;
}
public Protocol.Message pollIncomingMessage(long timeout, TimeUnit unit) throws InterruptedException {
return incomingMessages.poll(timeout, unit);
}
public void sendAudioData(String sessionId, byte[] audioData) throws IOException {
if (!isConnected) {
throw new IOException("WebSocket未连接");
}
try {
byte[] message = Protocol.createAudioMessage(sessionId, audioData);
send(message);
} catch (Exception e) {
throw new IOException("发送音频消息失败: " + e.getMessage(), e);
}
}
public void sendTextMessage(String sessionId, String text) throws IOException {
if (!isConnected) {
throw new IOException("WebSocket未连接");
}
byte[] message = Protocol.createFullClientMessage(sessionId, text);
send(message);
}
public void sendProtocolMessage(String sessionId, String text, int eventId) throws IOException {
if (!isConnected) {
throw new IOException("WebSocket未连接");
}
try {
byte[] messageBytes;
if (eventId == 1) {
messageBytes = Protocol.createStartConnectionMessage();
} else if (eventId == 100) {
messageBytes = Protocol.createStartSessionMessage(sessionId, text);
} else {
// 创建带特定事件ID的消息
Protocol.Message message = new Protocol.Message();
message.type = Protocol.MsgType.FULL_CLIENT;
message.typeFlag = Protocol.MSG_TYPE_FLAG_WITH_EVENT;
message.event = eventId;
message.sessionId = sessionId;
message.payload = text.getBytes("UTF-8");
messageBytes = Protocol.marshal(message);
}
send(messageBytes);
} catch (Exception e) {
throw new IOException("发送协议消息失败: " + e.getMessage(), e);
}
}
public void sendChatTextQuery(String sessionId, String text) throws IOException {
if (!isConnected) {
throw new IOException("WebSocket未连接");
}
try {
// 创建ChatTextQuery消息事件501
RequestPayloads.ChatTextQueryPayload payload = new RequestPayloads.ChatTextQueryPayload(text);
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
Protocol.Message message = new Protocol.Message();
message.type = Protocol.MsgType.FULL_CLIENT;
message.typeFlag = Protocol.MSG_TYPE_FLAG_WITH_EVENT;
message.event = 501; // ChatTextQuery事件
message.sessionId = sessionId;
message.payload = jsonPayload.getBytes("UTF-8");
byte[] messageBytes = Protocol.marshal(message);
send(messageBytes);
System.out.println("发送ChatTextQuery消息成功: " + text);
} catch (Exception e) {
throw new IOException("发送ChatTextQuery消息失败: " + e.getMessage(), e);
}
}
private void cleanup() {
shouldStop = true;
if (audioPlaybackThread != null) {
audioPlaybackThread.interrupt();
}
if (audioOutputLine != null) {
audioOutputLine.drain();
audioOutputLine.stop();
audioOutputLine.close();
}
// 保存音频到文件
ServerResponseHandler.saveAudioToPCMFile("output.pcm");
}
}

View File

@@ -0,0 +1,366 @@
package com.volcengine.realtimedialog;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.*;
public class Protocol {
private static final ObjectMapper objectMapper = new ObjectMapper();
// 消息类型
public enum MsgType {
INVALID(0),
FULL_CLIENT(1),
AUDIO_ONLY_CLIENT(2),
FULL_SERVER(9),
AUDIO_ONLY_SERVER(11),
FRONT_END_RESULT_SERVER(12),
ERROR(15);
private final int value;
MsgType(int value) {
this.value = value;
}
public int getValue() {
return value;
}
public static MsgType fromBits(int bits) {
for (MsgType type : values()) {
if (type.value == bits) {
return type;
}
}
return INVALID;
}
}
// 消息类型标志位
public static final int MSG_TYPE_FLAG_NO_SEQ = 0;
public static final int MSG_TYPE_FLAG_POSITIVE_SEQ = 0b1;
public static final int MSG_TYPE_FLAG_LAST_NO_SEQ = 0b10;
public static final int MSG_TYPE_FLAG_NEGATIVE_SEQ = 0b11;
public static final int MSG_TYPE_FLAG_WITH_EVENT = 0b100;
// 版本和头部大小
public static final int VERSION_1 = 0x10;
public static final int HEADER_SIZE_4 = 0x1;
// 序列化方法
public static final int SERIALIZATION_RAW = 0;
public static final int SERIALIZATION_JSON = 0b1 << 4;
// 压缩方法
public static final int COMPRESSION_NONE = 0;
public static class Message {
public MsgType type;
public int typeFlag;
public int event;
public String sessionId;
public String connectId;
public int sequence;
public long errorCode;
public byte[] payload;
public Message() {
this.type = MsgType.INVALID;
}
}
public static byte[] marshal(Message msg) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
// 构建头部
int versionAndHeaderSize = VERSION_1 | HEADER_SIZE_4;
dos.writeByte(versionAndHeaderSize);
// 消息类型和标志
int typeAndFlag = (msg.type.getValue() << 4) | (msg.typeFlag & 0x0F);
dos.writeByte(typeAndFlag);
// 序列化和压缩
int serializationAndCompression = SERIALIZATION_JSON | COMPRESSION_NONE;
dos.writeByte(serializationAndCompression);
// 保留字节
dos.writeByte(0);
// 根据消息类型写入数据
List<WriteFunc> writers = getWriters(msg);
for (WriteFunc writer : writers) {
writer.write(dos, msg);
}
return baos.toByteArray();
}
public static Message unmarshal(byte[] data) throws IOException {
if (data.length < 4) {
throw new IOException("数据长度不足");
}
ByteBuffer buf = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN);
Message msg = new Message();
// 读取头部
int versionAndHeaderSize = buf.get() & 0xFF;
int typeAndFlag = buf.get() & 0xFF;
int serializationAndCompression = buf.get() & 0xFF;
int reserved = buf.get() & 0xFF;
// 解析消息类型
int msgTypeBits = (typeAndFlag >> 4) & 0x0F;
msg.type = MsgType.fromBits(msgTypeBits);
msg.typeFlag = typeAndFlag & 0x0F;
// 根据消息类型读取数据
List<ReadFunc> readers = getReaders(msg);
for (ReadFunc reader : readers) {
reader.read(buf, msg);
}
return msg;
}
private interface WriteFunc {
void write(DataOutputStream dos, Message msg) throws IOException;
}
private interface ReadFunc {
void read(ByteBuffer buf, Message msg) throws IOException;
}
private static List<WriteFunc> getWriters(Message msg) {
List<WriteFunc> writers = new ArrayList<>();
// 事件ID
if (containsEvent(msg.typeFlag)) {
writers.add((dos, m) -> dos.writeInt(m.event));
}
// 会话ID
if (shouldWriteSessionId(msg)) {
writers.add((dos, m) -> {
byte[] sessionIdBytes = m.sessionId.getBytes("UTF-8");
dos.writeInt(sessionIdBytes.length);
dos.write(sessionIdBytes);
});
}
// 连接ID
if (shouldWriteConnectId(msg)) {
writers.add((dos, m) -> {
byte[] connectIdBytes = m.connectId.getBytes("UTF-8");
dos.writeInt(connectIdBytes.length);
dos.write(connectIdBytes);
});
}
// 序列号
if (containsSequence(msg.typeFlag)) {
writers.add((dos, m) -> dos.writeInt(m.sequence));
}
// 错误码
if (msg.type == MsgType.ERROR) {
writers.add((dos, m) -> dos.writeInt((int) m.errorCode));
}
// 载荷
writers.add((dos, m) -> {
if (m.payload != null) {
dos.writeInt(m.payload.length);
dos.write(m.payload);
} else {
dos.writeInt(0);
}
});
return writers;
}
private static List<ReadFunc> getReaders(Message msg) {
List<ReadFunc> readers = new ArrayList<>();
// 事件ID
if (containsEvent(msg.typeFlag)) {
readers.add((buf, m) -> m.event = buf.getInt());
}
// 会话ID
if (shouldReadSessionId(msg)) {
readers.add((buf, m) -> {
int size = buf.getInt();
if (size > 0) {
byte[] bytes = new byte[size];
buf.get(bytes);
m.sessionId = new String(bytes, "UTF-8");
}
});
}
// 连接ID
if (shouldReadConnectId(msg)) {
readers.add((buf, m) -> {
int size = buf.getInt();
if (size > 0) {
byte[] bytes = new byte[size];
buf.get(bytes);
m.connectId = new String(bytes, "UTF-8");
}
});
}
// 序列号
if (containsSequence(msg.typeFlag)) {
readers.add((buf, m) -> m.sequence = buf.getInt());
}
// 错误码
if (msg.type == MsgType.ERROR) {
readers.add((buf, m) -> m.errorCode = buf.getInt() & 0xFFFFFFFFL);
}
// 载荷
readers.add((buf, m) -> {
int size = buf.getInt();
if (size > 0) {
m.payload = new byte[size];
buf.get(m.payload);
}
});
return readers;
}
private static boolean containsEvent(int typeFlag) {
return (typeFlag & MSG_TYPE_FLAG_WITH_EVENT) == MSG_TYPE_FLAG_WITH_EVENT;
}
private static boolean containsSequence(int typeFlag) {
return (typeFlag & MSG_TYPE_FLAG_POSITIVE_SEQ) == MSG_TYPE_FLAG_POSITIVE_SEQ ||
(typeFlag & MSG_TYPE_FLAG_NEGATIVE_SEQ) == MSG_TYPE_FLAG_NEGATIVE_SEQ;
}
private static boolean shouldWriteSessionId(Message msg) {
// 根据Go版本的逻辑某些事件不需要会话ID
return containsEvent(msg.typeFlag) &&
msg.event != 1 && msg.event != 2 && msg.event != 50 && msg.event != 51 && msg.event != 52;
}
private static boolean shouldReadSessionId(Message msg) {
return containsEvent(msg.typeFlag) &&
msg.event != 1 && msg.event != 2 && msg.event != 50 && msg.event != 51 && msg.event != 52;
}
private static boolean shouldWriteConnectId(Message msg) {
return containsEvent(msg.typeFlag) && (msg.event == 50 || msg.event == 51 || msg.event == 52);
}
private static boolean shouldReadConnectId(Message msg) {
return containsEvent(msg.typeFlag) && (msg.event == 50 || msg.event == 51 || msg.event == 52);
}
// 辅助方法
public static byte[] createStartConnectionMessage() throws IOException {
Message msg = new Message();
msg.type = MsgType.FULL_CLIENT;
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
msg.event = 1;
msg.payload = "{}".getBytes("UTF-8");
return marshal(msg);
}
public static byte[] createStartSessionMessage(String sessionId, String payload) throws IOException {
Message msg = new Message();
msg.type = MsgType.FULL_CLIENT;
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
msg.event = 100;
msg.sessionId = sessionId;
msg.payload = payload.getBytes("UTF-8");
return marshal(msg);
}
public static byte[] createAudioMessage(String sessionId, byte[] audioData) throws IOException {
Message msg = new Message();
msg.type = MsgType.AUDIO_ONLY_CLIENT;
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
msg.event = 200; // 音频事件 - 完全对齐Go版本
msg.sessionId = sessionId;
msg.payload = audioData;
return marshalRawAudio(msg);
}
// 专门用于音频消息的方法 - 使用原始序列化
private static byte[] marshalRawAudio(Message message) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
// 构建头部 - 使用原始序列化
int versionAndHeaderSize = VERSION_1 | HEADER_SIZE_4;
dos.writeByte(versionAndHeaderSize);
// 消息类型和标志
int typeAndFlag = (message.type.getValue() << 4) | (message.typeFlag & 0x0F);
dos.writeByte(typeAndFlag);
// 序列化和压缩 - 使用原始数据
int serializationAndCompression = SERIALIZATION_RAW | COMPRESSION_NONE;
dos.writeByte(serializationAndCompression);
// 保留字节
dos.writeByte(0);
// 事件ID
if (containsEvent(message.typeFlag)) {
dos.writeInt(message.event);
}
// 会话ID
if (shouldWriteSessionId(message)) {
byte[] sessionIdBytes = message.sessionId.getBytes("UTF-8");
dos.writeInt(sessionIdBytes.length);
dos.write(sessionIdBytes);
}
// 载荷
if (message.payload != null) {
dos.writeInt(message.payload.length);
dos.write(message.payload);
} else {
dos.writeInt(0);
}
return baos.toByteArray();
}
public static String generateSessionId() {
return UUID.randomUUID().toString();
}
// 创建FullClient消息 - 用于兼容旧代码
public static byte[] createFullClientMessage(String sessionId, String text) throws IOException {
ObjectNode root = objectMapper.createObjectNode();
root.put("session_id", sessionId);
root.put("text", text);
root.put("speaker", Config.DEFAULT_SPEAKER);
Message message = new Message();
message.type = MsgType.FULL_CLIENT;
message.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
message.sessionId = sessionId;
message.payload = objectMapper.writeValueAsBytes(root);
return marshal(message);
}
}

View File

@@ -0,0 +1,98 @@
package com.volcengine.realtimedialog;
import com.fasterxml.jackson.annotation.JsonInclude;
import java.util.HashMap;
import java.util.Map;
@JsonInclude(JsonInclude.Include.NON_NULL)
public class RequestPayloads {
// StartSession请求载荷
public static class StartSessionPayload {
public ASRPayload asr;
public TTSPayload tts;
public DialogPayload dialog;
public StartSessionPayload() {
this.asr = new ASRPayload();
this.tts = new TTSPayload();
this.dialog = new DialogPayload();
}
}
public static class ASRPayload {
public Map<String, Object> extra = new HashMap<>();
}
public static class TTSPayload {
public String speaker = Config.DEFAULT_SPEAKER;
public AudioConfig audio_config = new AudioConfig();
}
public static class AudioConfig {
public int channel = 1;
public String format = Config.pcmFormat;
public int sample_rate = Config.OUTPUT_SAMPLE_RATE;
}
public static class DialogPayload {
public String dialog_id = "";
public String bot_name = "豆包";
public String system_role = "你使用活泼灵动的女声,性格开朗,热爱生活。";
public String speaking_style = "你的说话风格简洁明了,语速适中,语调自然。";
public LocationInfo location = new LocationInfo();
public Map<String, Object> extra = new HashMap<>();
}
public static class LocationInfo {
public double longitude = 0.0;
public double latitude = 0.0;
public String city = "北京";
public String country = "中国";
public String province = "北京";
public String district = "";
public String town = "";
public String country_code = "CN";
public String address = "";
}
// SayHello请求载荷
public static class SayHelloPayload {
public String content;
public SayHelloPayload(String content) {
this.content = content;
}
}
// ChatTextQuery请求载荷
public static class ChatTextQueryPayload {
public String content;
public ChatTextQueryPayload(String content) {
this.content = content;
}
}
// ChatTTSText请求载荷
public static class ChatTTSTextPayload {
public boolean start;
public boolean end;
public String content;
public ChatTTSTextPayload(boolean start, boolean end, String content) {
this.start = start;
this.end = end;
this.content = content;
}
}
// ChatRAGText请求载荷
public static class ChatRAGTextPayload {
public String external_rag;
public ChatRAGTextPayload(String externalRAG) {
this.external_rag = externalRAG;
}
}
}

View File

@@ -0,0 +1,342 @@
package com.volcengine.realtimedialog;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean;
public class ServerResponseHandler {
private static final ObjectMapper objectMapper = new ObjectMapper();
private static final int SAMPLE_RATE = 24000;
private static final int CHANNELS = 1;
private static final int BUFFER_SECONDS = 100;
// 音频缓冲区
private static final List<Float> audioBuffer = Collections.synchronizedList(new ArrayList<>());
private static final List<Short> s16Buffer = Collections.synchronizedList(new ArrayList<>());
private static final List<Byte> audioData = Collections.synchronizedList(new ArrayList<>());
// 状态标志
private static final AtomicBoolean isSendingChatTTSText = new AtomicBoolean(false);
private static final AtomicBoolean isUserQuerying = new AtomicBoolean(false);
private static final Object sayHelloOverLock = new Object();
private static volatile boolean sayHelloOver = false;
private static final Object firstMsgLock = new Object();
private static volatile boolean firstMsgProcessed = false;
// 外部RAG数据结构
public static class RAGObject {
public String title;
public String content;
public RAGObject(String title, String content) {
this.title = title;
this.content = content;
}
}
// ChatTTSText载荷
public static class ChatTTSTextPayload {
public boolean start;
public boolean end;
public String content;
public ChatTTSTextPayload(boolean start, boolean end, String content) {
this.start = start;
this.end = end;
this.content = content;
}
}
// ChatRAGText载荷
public static class ChatRAGTextPayload {
public String externalRAG;
public ChatRAGTextPayload(String externalRAG) {
this.externalRAG = externalRAG;
}
}
// 消息处理
public static void handleFullServerMessage(NetClient netClient, Protocol.Message message) {
try {
String jsonStr = new String(message.payload);
System.out.println("📨 收到服务器完整消息 (event=" + message.event + ", session_id=" + message.sessionId + "): " + jsonStr);
// 事件处理
System.out.println("🔍 处理事件 ID: " + message.event);
switch (message.event) {
case 50: // ConnectionStarted
System.out.println("✅ 连接已建立");
return;
case 150: // SessionStarted
System.out.println("✅ 会话已开始");
return;
case 152: // session finished event
case 153: // session finished event
System.out.println("🏁 会话结束事件");
// 通知CallManager停止
CallManager.stopFromHandler();
return;
case 359: // 首次响应事件
System.out.println("🎯 收到事件359音频文件模式: " + isAudioFileInput());
if (isAudioFileInput()) {
System.out.println("🎉 音频文件模式收到首次响应,保存音频并退出...");
// 音频文件模式下收到事件359后保存音频并退出
saveAudioToPCMFile("output.pcm");
CallManager.stopFromHandler();
return;
}
// 文本模式下收到事件359后提示用户输入
if (Config.mod.equals("text")) {
System.out.println("💬 请输入内容");
} else {
// 音频模式下,标记首次消息已处理
synchronized (firstMsgLock) {
if (!firstMsgProcessed) {
firstMsgProcessed = true;
synchronized (sayHelloOverLock) {
sayHelloOver = true;
sayHelloOverLock.notifyAll();
}
}
}
}
break;
case 300: // SayHello响应事件对齐Golang版本
System.out.println("🎯 收到SayHello响应事件");
if (Config.mod.equals("text")) {
System.out.println("💬 问候语已发送,请输入内容");
}
break;
case 450: // ASR info event, clear audio buffer
// 清空本地音频缓存,等待接收下一轮的音频
synchronized (audioData) {
audioData.clear();
}
synchronized (audioBuffer) {
audioBuffer.clear();
}
// 用户说话了不需要触发连续SayHello引导用户交互了
CallManager.notifyUserQuery();
isUserQuerying.set(true);
break;
case 350: // 发送ChatTTSText请求事件之后收到tts_type为chat_tts_text的事件
if (isSendingChatTTSText.get()) {
// 解析JSON数据
JsonNode jsonData = objectMapper.readTree(message.payload);
String ttsType = jsonData.get("tts_type").asText();
// 一种简单方式清空本地闲聊音频
if (Arrays.asList("chat_tts_text", "external_rag").contains(ttsType)) {
synchronized (audioData) {
audioData.clear();
}
synchronized (audioBuffer) {
audioBuffer.clear();
}
isSendingChatTTSText.set(false);
}
}
break;
case 459:
isUserQuerying.set(false);
// 概率触发发送ChatTTSText请求
if (new Random().nextInt(100000) % 1000 == 0) {
new Thread(() -> {
try {
isSendingChatTTSText.set(true);
System.out.println("hit ChatTTSText event, start sending...");
// 发送ChatTTSText请求
sendChatTTSText(netClient, message.sessionId, new ChatTTSTextPayload(
true, false, "这是查询到外部数据之前的安抚话术。"
));
sendChatTTSText(netClient, message.sessionId, new ChatTTSTextPayload(
false, true, ""
));
// 模拟查询外部RAG数据耗时这里简单起见直接sleep5秒保证GTA安抚话术播报不受影响
Thread.sleep(5000);
// 发送外部RAG数据
List<RAGObject> externalRAG = Arrays.asList(
new RAGObject("北京天气", "今天北京整体以晴到多云为主,但西部和北部地带可能会出现分散性雷阵雨,特别是午后至傍晚时段需注意突发降雨。\n💨 风况与湿度\n风力较弱一般为 23 级南风或西南风\n白天湿度较高早晚略凉爽"),
new RAGObject("北京空气质量", "当前北京空气质量为良AQI指数在50左右适合户外活动。建议关注实时空气质量变化尤其是敏感人群。")
);
String externalRAGJson = objectMapper.writeValueAsString(externalRAG);
sendChatRAGText(netClient, message.sessionId, new ChatRAGTextPayload(externalRAGJson));
} catch (Exception e) {
System.err.println("ChatTTSText处理错误: " + e.getMessage());
}
}).start();
}
break;
}
} catch (Exception e) {
System.err.println("处理完整服务器消息失败: " + e.getMessage());
e.printStackTrace();
}
}
// 处理音频消息 - 对齐Golang实现简化逻辑
public static void handleAudioOnlyServerMessage(NetClient netClient, Protocol.Message message) {
try {
System.out.println("🎵 收到音频消息 (event=" + message.event + "): session_id=" + message.sessionId + ", 数据长度: " + (message.payload != null ? message.payload.length : 0));
if (message.payload != null && message.payload.length > 0) {
// 直接处理音频数据简化逻辑对齐Golang
handleIncomingAudio(message.payload);
// 保存音频数据到文件
synchronized (audioData) {
for (byte b : message.payload) {
audioData.add(b);
}
}
// 直接播放音频 - 对齐Golang实现
netClient.playAudioData(message.payload);
System.out.println("✅ 音频数据已保存,当前总长度: " + audioData.size() + " 字节");
}
} catch (Exception e) {
System.err.println("处理音频消息失败: " + e.getMessage());
}
}
// 处理错误消息
public static void handleErrorMessage(Protocol.Message message) {
String errorMsg = new String(message.payload);
System.err.println("收到错误消息 (code=" + message.event + "): " + errorMsg);
System.exit(1);
}
// 处理输入音频数据 - 对齐Golang实现简化逻辑
private static void handleIncomingAudio(byte[] data) {
if (isSendingChatTTSText.get()) {
return;
}
// 简化音频处理逻辑对齐Golang实现
switch (Config.pcmFormat) {
case Config.PCM_S16LE:
System.out.println("收到音频字节长度: " + data.length + ", s16le长度: " + (data.length / 2));
int sampleCount = data.length / 2;
short[] samples = new short[sampleCount];
ByteBuffer buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < sampleCount; i++) {
samples[i] = buffer.getShort();
}
// 将音频加载到缓冲区 - 简化逻辑对齐Golang
synchronized (s16Buffer) {
for (short sample : samples) {
s16Buffer.add(sample);
}
// 限制缓冲区大小 - 简化逻辑
if (s16Buffer.size() > SAMPLE_RATE * BUFFER_SECONDS) {
s16Buffer.subList(0, s16Buffer.size() - (SAMPLE_RATE * BUFFER_SECONDS)).clear();
}
}
break;
case Config.DEFAULT_PCM:
System.out.println("收到音频字节长度: " + data.length + ", f32le长度: " + (data.length / 4));
int floatSampleCount = data.length / 4;
float[] floatSamples = new float[floatSampleCount];
ByteBuffer floatBuffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
for (int i = 0; i < floatSampleCount; i++) {
int bits = floatBuffer.getInt();
floatSamples[i] = Float.intBitsToFloat(bits);
}
// 将音频加载到缓冲区 - 简化逻辑对齐Golang
synchronized (audioBuffer) {
for (float sample : floatSamples) {
audioBuffer.add(sample);
}
// 限制缓冲区大小 - 简化逻辑
if (audioBuffer.size() > SAMPLE_RATE * BUFFER_SECONDS) {
audioBuffer.subList(0, audioBuffer.size() - (SAMPLE_RATE * BUFFER_SECONDS)).clear();
}
}
break;
}
}
// 保存音频到PCM文件
public static void saveAudioToPCMFile(String filename) {
synchronized (audioData) {
if (audioData.isEmpty()) {
System.out.println("没有音频数据可保存。");
return;
}
}
try {
File pcmFile = new File("./" + filename);
try (FileOutputStream fos = new FileOutputStream(pcmFile)) {
synchronized (audioData) {
byte[] audioBytes = new byte[audioData.size()];
for (int i = 0; i < audioData.size(); i++) {
audioBytes[i] = audioData.get(i);
}
fos.write(audioBytes);
}
System.out.println("音频已保存到: " + pcmFile.getAbsolutePath());
}
} catch (IOException e) {
System.err.println("保存PCM文件失败: " + e.getMessage());
}
}
// 发送ChatTTSText消息
private static void sendChatTTSText(NetClient netClient, String sessionId, ChatTTSTextPayload payload) throws Exception {
ObjectNode root = objectMapper.createObjectNode();
root.put("session_id", sessionId);
root.put("start", payload.start);
root.put("end", payload.end);
root.put("content", payload.content);
String jsonStr = objectMapper.writeValueAsString(root);
byte[] message = Protocol.createFullClientMessage(sessionId, jsonStr);
netClient.send(message);
}
// 发送ChatRAGText消息
private static void sendChatRAGText(NetClient netClient, String sessionId, ChatRAGTextPayload payload) throws Exception {
ObjectNode root = objectMapper.createObjectNode();
root.put("session_id", sessionId);
root.put("external_rag", payload.externalRAG);
String jsonStr = objectMapper.writeValueAsString(root);
byte[] message = Protocol.createFullClientMessage(sessionId, jsonStr);
netClient.send(message);
}
// 检查是否为音频文件输入模式
private static boolean isAudioFileInput() {
return !Config.audioFilePath.isEmpty();
}
}

Binary file not shown.