feat: 添加realtime_dialog和realtime_dialog_external_rag_test项目,更新test2项目
This commit is contained in:
6
realtime_dialog/java/.idea/.gitignore
generated
vendored
Normal file
6
realtime_dialog/java/.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
/target/
|
||||
6
realtime_dialog/java/.idea/CodeverseWorkspaceAppSettings.xml
generated
Normal file
6
realtime_dialog/java/.idea/CodeverseWorkspaceAppSettings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="com.codeverse.userSettings.CodeverseWorkspaceAppSettingsState">
|
||||
<option name="progress" value="1.0" />
|
||||
</component>
|
||||
</project>
|
||||
13
realtime_dialog/java/.idea/compiler.xml
generated
Normal file
13
realtime_dialog/java/.idea/compiler.xml
generated
Normal file
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CompilerConfiguration">
|
||||
<annotationProcessing>
|
||||
<profile name="Maven default annotation processors profile" enabled="true">
|
||||
<sourceOutputDir name="target/generated-sources/annotations" />
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
|
||||
<outputRelativeToContentRoot value="true" />
|
||||
<module name="realtimedialog" />
|
||||
</profile>
|
||||
</annotationProcessing>
|
||||
</component>
|
||||
</project>
|
||||
7
realtime_dialog/java/.idea/encodings.xml
generated
Normal file
7
realtime_dialog/java/.idea/encodings.xml
generated
Normal file
@@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
|
||||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
|
||||
</component>
|
||||
</project>
|
||||
30
realtime_dialog/java/.idea/jarRepositories.xml
generated
Normal file
30
realtime_dialog/java/.idea/jarRepositories.xml
generated
Normal file
@@ -0,0 +1,30 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="RemoteRepositoriesConfiguration">
|
||||
<remote-repository>
|
||||
<option name="id" value="bytedance-snapshots" />
|
||||
<option name="name" value="bytedance-snapshots" />
|
||||
<option name="url" value="https://maven.byted.org/repository/public" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="bytedance-releases" />
|
||||
<option name="name" value="bytedance-releases" />
|
||||
<option name="url" value="https://maven.byted.org/repository/public" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Central Repository" />
|
||||
<option name="url" value="https://maven.byted.org/repository/public" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="central" />
|
||||
<option name="name" value="Maven Central repository" />
|
||||
<option name="url" value="https://repo1.maven.org/maven2" />
|
||||
</remote-repository>
|
||||
<remote-repository>
|
||||
<option name="id" value="jboss.community" />
|
||||
<option name="name" value="JBoss Community repository" />
|
||||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
|
||||
</remote-repository>
|
||||
</component>
|
||||
</project>
|
||||
12
realtime_dialog/java/.idea/misc.xml
generated
Normal file
12
realtime_dialog/java/.idea/misc.xml
generated
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ExternalStorageConfigurationManager" enabled="true" />
|
||||
<component name="MavenProjectsManager">
|
||||
<option name="originalFiles">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/pom.xml" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_1_8" default="true" project-jdk-name="corretto-1.8" project-jdk-type="JavaSDK" />
|
||||
</project>
|
||||
6
realtime_dialog/java/.idea/vcs.xml
generated
Normal file
6
realtime_dialog/java/.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
63
realtime_dialog/java/README.md
Normal file
63
realtime_dialog/java/README.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# RealtimeDialog Java客户端
|
||||
|
||||
## 项目简介
|
||||
Java版本的RealtimeDialog客户端,支持实时语音对话功能。
|
||||
|
||||
## 环境要求
|
||||
- Java 1.8 或更高版本
|
||||
- Maven 3.6 或更高版本
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 1. 编译项目
|
||||
```bash
|
||||
cd java
|
||||
mvn clean compile
|
||||
```
|
||||
|
||||
### 2. 运行应用
|
||||
|
||||
#### 麦克风模式(默认)
|
||||
```bash
|
||||
mvn exec:java
|
||||
```
|
||||
|
||||
#### 音频文件模式
|
||||
```bash
|
||||
mvn exec:java -Dexec.args="--audio=whoareyou.wav"
|
||||
```
|
||||
|
||||
#### 文本模式
|
||||
```bash
|
||||
mvn exec:java -Dexec.args="--mod=text"
|
||||
```
|
||||
|
||||
#### 指定音频格式
|
||||
```bash
|
||||
mvn exec:java -Dexec.args="--format=pcm_s16le"
|
||||
```
|
||||
|
||||
### 3. 打包可执行JAR
|
||||
```bash
|
||||
mvn clean package
|
||||
java -jar target/realtimedialog-1.0.0.jar --audio=whoareyou.wav
|
||||
```
|
||||
|
||||
## 配置说明
|
||||
|
||||
在使用前,需要在`Config.java`中配置以下参数:
|
||||
- `X-Api-App-ID`: 你的应用ID
|
||||
- `X-Api-Access-Key`: 你的访问密钥
|
||||
|
||||
## 功能特性
|
||||
- 支持麦克风实时语音输入
|
||||
- 支持音频文件输入
|
||||
- 支持文本输入模式
|
||||
- 支持音频输出播放
|
||||
- 支持外部RAG功能
|
||||
- 支持多种音频格式(pcm, pcm_s16le)
|
||||
|
||||
## 命令行参数
|
||||
- `--format`: 音频格式,默认为"pcm"
|
||||
- `--audio`: 音频文件路径,如果不设置则使用麦克风输入
|
||||
- `--mod`: 输入模式,audio(默认)或text
|
||||
56
realtime_dialog/java/dependency-reduced-pom.xml
Normal file
56
realtime_dialog/java/dependency-reduced-pom.xml
Normal file
@@ -0,0 +1,56 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>com.volcengine</groupId>
|
||||
<artifactId>realtimedialog</artifactId>
|
||||
<name>RealtimeDialog Java Client</name>
|
||||
<version>1.0.0</version>
|
||||
<description>Java client for Volcengine RealtimeDialog service</description>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<encoding>UTF-8</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<transformers>
|
||||
<transformer>
|
||||
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<configuration>
|
||||
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<java-websocket.version>1.5.3</java-websocket.version>
|
||||
<jackson.version>2.15.2</jackson.version>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
</properties>
|
||||
</project>
|
||||
128
realtime_dialog/java/pom.xml
Normal file
128
realtime_dialog/java/pom.xml
Normal file
@@ -0,0 +1,128 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
||||
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.volcengine</groupId>
|
||||
<artifactId>realtimedialog</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>RealtimeDialog Java Client</name>
|
||||
<description>Java client for Volcengine RealtimeDialog service</description>
|
||||
|
||||
<properties>
|
||||
<maven.compiler.source>1.8</maven.compiler.source>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<jackson.version>2.15.2</jackson.version>
|
||||
<java-websocket.version>1.5.3</java-websocket.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- WebSocket client -->
|
||||
<dependency>
|
||||
<groupId>org.java-websocket</groupId>
|
||||
<artifactId>Java-WebSocket</artifactId>
|
||||
<version>${java-websocket.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- JSON processing -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-databind</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.core</groupId>
|
||||
<artifactId>jackson-core</artifactId>
|
||||
<version>${jackson.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Logging -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<version>1.7.36</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Apache Commons CLI for command line parsing -->
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
<version>1.5.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Audio processing -->
|
||||
<dependency>
|
||||
<groupId>com.googlecode.soundlibs</groupId>
|
||||
<artifactId>mp3spi</artifactId>
|
||||
<version>1.9.5.4</version>
|
||||
</dependency>
|
||||
|
||||
<!-- UUID generation -->
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.uuid</groupId>
|
||||
<artifactId>java-uuid-generator</artifactId>
|
||||
<version>4.2.0</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Base64 encoding -->
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
<version>1.15</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- Compiler plugin -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.11.0</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<encoding>UTF-8</encoding>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- Shade plugin for creating fat jar -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.4.1</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- Exec plugin for running the application -->
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<configuration>
|
||||
<mainClass>com.volcengine.realtimedialog.Main</mainClass>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,114 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
import java.io.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
|
||||
public class AudioCapture {
|
||||
private static final int BUFFER_SIZE = 4096;
|
||||
private TargetDataLine targetLine;
|
||||
private volatile boolean isCapturing = false;
|
||||
private Thread captureThread;
|
||||
private final BlockingQueue<byte[]> audioQueue;
|
||||
|
||||
public AudioCapture() {
|
||||
this.audioQueue = new ArrayBlockingQueue<>(100);
|
||||
}
|
||||
|
||||
public void startCapture() throws LineUnavailableException {
|
||||
AudioFormat format = new AudioFormat(
|
||||
Config.INPUT_SAMPLE_RATE,
|
||||
16,
|
||||
Config.CHANNELS,
|
||||
true,
|
||||
false // little endian
|
||||
);
|
||||
|
||||
DataLine.Info info = new DataLine.Info(TargetDataLine.class, format);
|
||||
if (!AudioSystem.isLineSupported(info)) {
|
||||
throw new LineUnavailableException("音频输入设备不支持指定格式");
|
||||
}
|
||||
|
||||
targetLine = (TargetDataLine) AudioSystem.getLine(info);
|
||||
targetLine.open(format);
|
||||
targetLine.start();
|
||||
|
||||
isCapturing = true;
|
||||
captureThread = new Thread(this::captureLoop);
|
||||
captureThread.setName("AudioCapture");
|
||||
captureThread.start();
|
||||
}
|
||||
|
||||
private void captureLoop() {
|
||||
byte[] buffer = new byte[Config.AUDIO_CHUNK_SIZE];
|
||||
|
||||
while (isCapturing) {
|
||||
int bytesRead = targetLine.read(buffer, 0, buffer.length);
|
||||
if (bytesRead > 0) {
|
||||
byte[] audioData = new byte[bytesRead];
|
||||
System.arraycopy(buffer, 0, audioData, 0, bytesRead);
|
||||
try {
|
||||
audioQueue.put(audioData);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public byte[] readAudioData() throws InterruptedException {
|
||||
return audioQueue.poll();
|
||||
}
|
||||
|
||||
public void stopCapture() {
|
||||
isCapturing = false;
|
||||
if (captureThread != null) {
|
||||
captureThread.interrupt();
|
||||
}
|
||||
if (targetLine != null) {
|
||||
targetLine.stop();
|
||||
targetLine.close();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isCapturing() {
|
||||
return isCapturing;
|
||||
}
|
||||
|
||||
public static byte[] readWavFile(String filePath) throws IOException {
|
||||
File file = new File(filePath);
|
||||
if (!file.exists()) {
|
||||
throw new FileNotFoundException("音频文件不存在: " + filePath);
|
||||
}
|
||||
|
||||
try (FileInputStream fis = new FileInputStream(file)) {
|
||||
byte[] fileData = new byte[(int) file.length()];
|
||||
fis.read(fileData);
|
||||
|
||||
// 跳过WAV文件头(44字节)
|
||||
if (filePath.toLowerCase().endsWith(".wav") && fileData.length > Config.WAV_HEADER_SIZE) {
|
||||
byte[] audioData = new byte[fileData.length - Config.WAV_HEADER_SIZE];
|
||||
System.arraycopy(fileData, Config.WAV_HEADER_SIZE, audioData, 0, audioData.length);
|
||||
return audioData;
|
||||
}
|
||||
|
||||
return fileData;
|
||||
}
|
||||
}
|
||||
|
||||
public static short[] bytesToInt16Samples(byte[] data) {
|
||||
short[] samples = new short[data.length / 2];
|
||||
ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(samples);
|
||||
return samples;
|
||||
}
|
||||
|
||||
public static byte[] int16SamplesToBytes(short[] samples) {
|
||||
byte[] bytes = new byte[samples.length * 2];
|
||||
ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(samples);
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,398 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
public class CallManager {
|
||||
private final String sessionId;
|
||||
private NetClient netClient;
|
||||
private AudioCapture audioCapture;
|
||||
private Thread audioSendThread;
|
||||
private Thread textInputThread;
|
||||
private final AtomicBoolean isRunning;
|
||||
private final AtomicBoolean isAudioMode;
|
||||
private static final BlockingQueue<Object> queryChan = new LinkedBlockingQueue<>();
|
||||
private static volatile CallManager currentInstance;
|
||||
|
||||
public CallManager() {
|
||||
this.sessionId = Protocol.generateSessionId();
|
||||
this.isRunning = new AtomicBoolean(false);
|
||||
this.isAudioMode = new AtomicBoolean(true);
|
||||
}
|
||||
|
||||
public void start() throws Exception {
|
||||
System.out.println("启动实时通话管理器,会话ID: " + sessionId);
|
||||
|
||||
// 设置当前实例
|
||||
currentInstance = this;
|
||||
|
||||
// 建立WebSocket连接
|
||||
connectWebSocket();
|
||||
|
||||
isRunning.set(true);
|
||||
|
||||
// 启动音频模式或文本模式
|
||||
if (Config.mod.equals("text")) {
|
||||
isAudioMode.set(false);
|
||||
startTextMode();
|
||||
} else {
|
||||
isAudioMode.set(true);
|
||||
startAudioMode();
|
||||
}
|
||||
|
||||
|
||||
// 等待运行结束
|
||||
waitForCompletion();
|
||||
}
|
||||
|
||||
private void connectWebSocket() throws Exception {
|
||||
URI uri = new URI(Config.WS_URL);
|
||||
|
||||
Map<String, String> headers = new HashMap<>();
|
||||
headers.put("X-Api-Resource-Id", Config.API_RESOURCE_ID);
|
||||
headers.put("X-Api-Access-Key", Config.API_ACCESS_KEY);
|
||||
headers.put("X-Api-App-Key", Config.API_APP_KEY);
|
||||
headers.put("X-Api-App-ID", Config.API_APP_ID);
|
||||
headers.put("X-Api-Connect-Id", sessionId);
|
||||
|
||||
netClient = new NetClient(uri, headers);
|
||||
netClient.connectBlocking(30, TimeUnit.SECONDS);
|
||||
|
||||
if (!netClient.isConnected()) {
|
||||
throw new IOException("WebSocket连接失败");
|
||||
}
|
||||
|
||||
System.out.println("WebSocket连接成功");
|
||||
|
||||
// 发送连接开始消息(事件1)
|
||||
startConnection();
|
||||
|
||||
// 发送会话开始消息(事件100)
|
||||
startSession();
|
||||
}
|
||||
|
||||
private void startConnection() throws Exception {
|
||||
System.out.println("发送连接开始消息...");
|
||||
// 使用正确的协议格式发送事件1
|
||||
netClient.sendProtocolMessage(sessionId, "{}", 1);
|
||||
System.out.println("连接开始消息发送完成");
|
||||
}
|
||||
|
||||
private void startSession() throws Exception {
|
||||
System.out.println("发送会话开始消息...");
|
||||
|
||||
RequestPayloads.StartSessionPayload payload = new RequestPayloads.StartSessionPayload();
|
||||
|
||||
// 根据模式设置参数
|
||||
if (Config.mod.equals("text")) {
|
||||
payload.dialog.extra = createExtraMap("text");
|
||||
} else if (!Config.audioFilePath.isEmpty()) {
|
||||
payload.dialog.extra = createExtraMap("audio_file");
|
||||
} else {
|
||||
payload.dialog.extra = createExtraMap("audio");
|
||||
}
|
||||
|
||||
// 发送会话开始消息
|
||||
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
|
||||
netClient.sendProtocolMessage(sessionId, jsonPayload, 100); // 事件100的载荷
|
||||
|
||||
System.out.println("会话开始消息发送完成,等待服务器响应...");
|
||||
|
||||
// 等待会话启动响应(事件150),对齐Go实现
|
||||
long startTime = System.currentTimeMillis();
|
||||
long timeout = 30000; // 30秒超时
|
||||
boolean sessionStarted = false;
|
||||
|
||||
while (System.currentTimeMillis() - startTime < timeout) {
|
||||
Protocol.Message message = netClient.pollIncomingMessage(1, TimeUnit.SECONDS);
|
||||
if (message != null && message.type == Protocol.MsgType.FULL_SERVER && message.event == 150) {
|
||||
// 解析响应payload获取dialog_id
|
||||
try {
|
||||
String responseJson = new String(message.payload, "UTF-8");
|
||||
com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper();
|
||||
java.util.Map<String, Object> response = mapper.readValue(responseJson, java.util.Map.class);
|
||||
String dialogId = (String) response.get("dialog_id");
|
||||
if (dialogId != null && !dialogId.isEmpty()) {
|
||||
System.out.println("会话启动成功,dialog_id: " + dialogId);
|
||||
sessionStarted = true;
|
||||
break;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("解析会话启动响应失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!sessionStarted) {
|
||||
throw new IOException("会话启动超时或失败,未收到服务器的会话启动确认");
|
||||
}
|
||||
|
||||
System.out.println("会话开始完成\n" + jsonPayload);
|
||||
}
|
||||
|
||||
private Map<String, Object> createExtraMap(String inputMod) {
|
||||
Map<String, Object> extra = new HashMap<>();
|
||||
extra.put("strict_audit", false);
|
||||
extra.put("audit_response", "抱歉这个问题我无法回答,你可以换个其他话题,我会尽力为你提供帮助。");
|
||||
extra.put("input_mod", inputMod);
|
||||
extra.put("model", "O");
|
||||
return extra;
|
||||
}
|
||||
|
||||
private void startAudioMode() throws Exception {
|
||||
System.out.println("启动音频模式");
|
||||
|
||||
if (Config.audioFilePath.isEmpty()) {
|
||||
// 麦克风模式
|
||||
sendGreetingMessage();
|
||||
startMicrophoneCapture();
|
||||
startMessageReceiver();
|
||||
} else {
|
||||
// 音频文件模式 - 不启动麦克风,只启动消息接收
|
||||
startFilePlayback();
|
||||
startMessageReceiver();
|
||||
}
|
||||
}
|
||||
|
||||
private void startTextMode() throws Exception {
|
||||
System.out.println("启动文本模式");
|
||||
|
||||
// 发送问候语,对齐Golang版本
|
||||
sendGreetingMessage();
|
||||
|
||||
// 启动文本输入线程
|
||||
startTextInput();
|
||||
|
||||
// 启动消息接收线程
|
||||
startMessageReceiver();
|
||||
}
|
||||
|
||||
private void startMicrophoneCapture() throws Exception {
|
||||
audioCapture = new AudioCapture();
|
||||
audioCapture.startCapture();
|
||||
|
||||
// 启动音频发送线程
|
||||
audioSendThread = new Thread(this::microphoneSendLoop);
|
||||
audioSendThread.setName("MicrophoneAudioSend");
|
||||
audioSendThread.start();
|
||||
|
||||
System.out.println("麦克风采集已启动");
|
||||
}
|
||||
|
||||
private void startFilePlayback() throws Exception {
|
||||
System.out.println("开始发送音频文件: " + Config.audioFilePath);
|
||||
|
||||
// 读取音频文件
|
||||
byte[] audioData = AudioCapture.readWavFile(Config.audioFilePath);
|
||||
|
||||
// 启动文件发送线程
|
||||
audioSendThread = new Thread(() -> fileSendLoop(audioData));
|
||||
audioSendThread.setName("FileAudioSend");
|
||||
audioSendThread.start();
|
||||
}
|
||||
|
||||
private void microphoneSendLoop() {
|
||||
try {
|
||||
while (isRunning.get() && audioCapture.isCapturing()) {
|
||||
byte[] audioData = audioCapture.readAudioData();
|
||||
if (audioData != null) {
|
||||
netClient.sendAudioData(sessionId, audioData);
|
||||
}
|
||||
|
||||
// 模拟实时发送间隔
|
||||
Thread.sleep(Config.AUDIO_SEND_INTERVAL);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("麦克风发送线程错误: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void fileSendLoop(byte[] audioData) {
|
||||
try {
|
||||
int chunkSize = Config.AUDIO_CHUNK_SIZE; // 640字节,与Go实现保持一致
|
||||
int totalSize = audioData.length;
|
||||
int position = 0;
|
||||
int chunkCount = 0;
|
||||
|
||||
System.out.println("开始发送音频文件,总大小: " + totalSize + " 字节, 块大小: " + chunkSize + " 字节");
|
||||
|
||||
while (isRunning.get() && position < totalSize) {
|
||||
int remaining = totalSize - position;
|
||||
int currentChunkSize = Math.min(chunkSize, remaining);
|
||||
|
||||
byte[] chunk = new byte[currentChunkSize];
|
||||
System.arraycopy(audioData, position, chunk, 0, currentChunkSize);
|
||||
|
||||
System.out.println("发送音频块 #" + (++chunkCount) + ": 位置=" + position + ", 大小=" + currentChunkSize + " 字节");
|
||||
|
||||
netClient.sendAudioData(sessionId, chunk);
|
||||
|
||||
position += currentChunkSize;
|
||||
|
||||
// 模拟实时发送间隔 - 每20ms发送一块,与Go实现保持一致
|
||||
Thread.sleep(Config.AUDIO_SEND_INTERVAL);
|
||||
}
|
||||
|
||||
System.out.println("音频文件发送完成,共发送 " + chunkCount + " 块");
|
||||
|
||||
// 发送音频结束标记 - 发送一段静音数据提示服务器音频输入结束
|
||||
System.out.println("发送音频结束标记...");
|
||||
byte[] silenceChunk = new byte[chunkSize]; // 静音数据
|
||||
netClient.sendAudioData(sessionId, silenceChunk);
|
||||
|
||||
System.out.println("音频文件发送完成,等待服务器响应...");
|
||||
// 文件发送完成后,等待服务器通过事件359通知退出
|
||||
} catch (Exception e) {
|
||||
System.err.println("文件发送线程错误: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private void sendGreetingMessage() throws Exception {
|
||||
System.out.println("发送问候语...");
|
||||
|
||||
// 创建SayHello载荷,对齐Golang版本,使用事件300
|
||||
RequestPayloads.SayHelloPayload payload = new RequestPayloads.SayHelloPayload("你好,我是豆包,有什么可以帮助你的吗?");
|
||||
|
||||
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
|
||||
netClient.sendProtocolMessage(sessionId, jsonPayload, 300); // 事件300 - SayHello,对齐Golang版本
|
||||
|
||||
System.out.println("问候语发送完成");
|
||||
}
|
||||
|
||||
private void startTextInput() {
|
||||
textInputThread = new Thread(this::textInputLoop);
|
||||
textInputThread.setName("TextInput");
|
||||
textInputThread.start();
|
||||
}
|
||||
|
||||
private void textInputLoop() {
|
||||
Scanner scanner = new Scanner(System.in);
|
||||
System.out.println("请输入文本 (输入 'quit' 退出):");
|
||||
|
||||
try {
|
||||
while (isRunning.get()) {
|
||||
String text = scanner.nextLine();
|
||||
|
||||
if (text.equalsIgnoreCase("quit")) {
|
||||
stop();
|
||||
break;
|
||||
}
|
||||
|
||||
if (!text.trim().isEmpty()) {
|
||||
// 使用事件501发送文本查询,对齐Golang版本
|
||||
netClient.sendChatTextQuery(sessionId, text);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("文本输入线程错误: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void startMessageReceiver() {
|
||||
Thread receiverThread = new Thread(this::messageReceiveLoop);
|
||||
receiverThread.setName("MessageReceiver");
|
||||
receiverThread.start();
|
||||
}
|
||||
|
||||
private void messageReceiveLoop() {
|
||||
try {
|
||||
while (isRunning.get()) {
|
||||
Protocol.Message message = netClient.pollIncomingMessage(1, TimeUnit.SECONDS);
|
||||
if (message != null) {
|
||||
// 消息已在NetClient中处理,这里可以添加额外的逻辑
|
||||
if (message.type == Protocol.MsgType.ERROR) {
|
||||
String error = new String(message.payload);
|
||||
System.err.println("服务器错误: " + error);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("消息接收线程错误: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
private void waitForCompletion() throws InterruptedException {
|
||||
while (isRunning.get()) {
|
||||
Thread.sleep(100);
|
||||
|
||||
// 对于音频文件模式,文件发送完成后等待服务器响应
|
||||
if (isAudioMode.get() && !Config.audioFilePath.isEmpty() && audioSendThread != null && !audioSendThread.isAlive()) {
|
||||
// 音频文件已发送完成,继续等待服务器响应
|
||||
System.out.println("音频文件发送完成,等待服务器响应...");
|
||||
// 不退出,继续等待消息接收线程处理服务器响应
|
||||
// 服务器会通过事件359通知可以退出
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
System.out.println("停止通话管理器");
|
||||
isRunning.set(false);
|
||||
|
||||
try {
|
||||
// 发送会话结束消息(事件102)- 参考Go实现
|
||||
if (netClient != null && netClient.isConnected()) {
|
||||
System.out.println("发送会话结束消息...");
|
||||
finishSession();
|
||||
Thread.sleep(100); // 给服务器处理时间
|
||||
}
|
||||
} catch (Exception e) {
|
||||
System.err.println("发送会话结束消息失败: " + e.getMessage());
|
||||
}
|
||||
|
||||
// 停止音频采集
|
||||
if (audioCapture != null) {
|
||||
audioCapture.stopCapture();
|
||||
}
|
||||
|
||||
// 关闭WebSocket连接并打印logid
|
||||
if (netClient != null) {
|
||||
String logid = netClient.getLogid();
|
||||
if (logid != null && !logid.isEmpty()) {
|
||||
System.out.println("通话结束,logid: " + logid);
|
||||
}
|
||||
netClient.close();
|
||||
}
|
||||
|
||||
// 等待线程结束
|
||||
try {
|
||||
if (audioSendThread != null) {
|
||||
audioSendThread.join(1000);
|
||||
}
|
||||
if (textInputThread != null) {
|
||||
textInputThread.join(1000);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
|
||||
System.out.println("通话管理器已停止");
|
||||
}
|
||||
|
||||
private void finishSession() throws Exception {
|
||||
if (netClient != null && sessionId != null) {
|
||||
netClient.sendProtocolMessage(sessionId, "{}", 102); // 事件102 - FinishSession
|
||||
System.out.println("会话结束消息已发送");
|
||||
}
|
||||
}
|
||||
|
||||
// 通知用户查询事件
|
||||
public static void notifyUserQuery() {
|
||||
queryChan.offer(new Object());
|
||||
}
|
||||
|
||||
// 从处理器停止CallManager
|
||||
public static void stopFromHandler() {
|
||||
if (currentInstance != null) {
|
||||
currentInstance.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
public class Config {
|
||||
// WebSocket连接配置
|
||||
public static final String WS_URL = "wss://openspeech.bytedance.com/api/v3/realtime/dialogue";
|
||||
public static final String API_RESOURCE_ID = "volc.speech.dialog";
|
||||
|
||||
// 用户需要配置的参数
|
||||
public static String API_APP_ID = "";
|
||||
public static String API_ACCESS_KEY = "";
|
||||
public static String API_APP_KEY = "PlgvMymc7f3tQnJ6";
|
||||
|
||||
// 音频参数配置
|
||||
public static final int INPUT_SAMPLE_RATE = 16000;
|
||||
public static final int OUTPUT_SAMPLE_RATE = 24000;
|
||||
public static final int CHANNELS = 1;
|
||||
public static final int INPUT_FRAMES_PER_BUFFER = 160;
|
||||
public static final int OUTPUT_FRAMES_PER_BUFFER = 512;
|
||||
public static final int BUFFER_SECONDS = 100;
|
||||
|
||||
// 音频格式
|
||||
public static final String DEFAULT_PCM = "pcm";
|
||||
public static final String PCM_S16LE = "pcm_s16le";
|
||||
|
||||
// TTS配置
|
||||
public static final String DEFAULT_SPEAKER = "zh_female_vv_jupiter_bigtts";
|
||||
|
||||
// 网络配置
|
||||
public static final int AUDIO_CHUNK_SIZE = 640; // 字节,对应20ms音频数据
|
||||
public static final long AUDIO_SEND_INTERVAL = 20; // 毫秒
|
||||
|
||||
// WAV文件配置
|
||||
public static final int WAV_HEADER_SIZE = 44; // WAV文件头大小
|
||||
|
||||
// 命令行参数默认值
|
||||
public static String audioFilePath = "";
|
||||
public static String mod = "audio";
|
||||
public static String pcmFormat = PCM_S16LE;
|
||||
|
||||
public static void setAppId(String appId) {
|
||||
API_APP_ID = appId;
|
||||
}
|
||||
|
||||
public static void setAccessKey(String accessKey) {
|
||||
API_ACCESS_KEY = accessKey;
|
||||
}
|
||||
|
||||
public static void setAudioFilePath(String path) {
|
||||
audioFilePath = path;
|
||||
}
|
||||
|
||||
public static void setMod(String mode) {
|
||||
mod = mode;
|
||||
}
|
||||
|
||||
public static void setPcmFormat(String format) {
|
||||
pcmFormat = format;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import org.apache.commons.cli.*;
|
||||
|
||||
public class Main {
|
||||
|
||||
public static void main(String[] args) {
|
||||
// 解析命令行参数
|
||||
CommandLine cmd = parseCommandLine(args);
|
||||
if (cmd == null) {
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// 应用配置
|
||||
applyConfiguration(cmd);
|
||||
|
||||
// 验证必要的配置
|
||||
if (!validateConfiguration()) {
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// 启动通话管理器
|
||||
CallManager callManager = new CallManager();
|
||||
|
||||
try {
|
||||
// 添加关闭钩子
|
||||
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
|
||||
System.out.println("正在关闭应用...");
|
||||
callManager.stop();
|
||||
}));
|
||||
|
||||
// 开始通话
|
||||
callManager.start();
|
||||
System.out.println("通话结束");
|
||||
} catch (Exception e) {
|
||||
System.err.println("运行错误: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
private static CommandLine parseCommandLine(String[] args) {
|
||||
Options options = new Options();
|
||||
|
||||
// 音频文件路径
|
||||
Option audioOption = Option.builder("a")
|
||||
.longOpt("audio")
|
||||
.desc("音频文件路径,如果不设置则使用麦克风输入")
|
||||
.hasArg()
|
||||
.argName("FILE")
|
||||
.build();
|
||||
options.addOption(audioOption);
|
||||
|
||||
// 输入模式
|
||||
Option modOption = Option.builder("m")
|
||||
.longOpt("mod")
|
||||
.desc("输入模式:audio(默认)或text")
|
||||
.hasArg()
|
||||
.argName("MODE")
|
||||
.build();
|
||||
options.addOption(modOption);
|
||||
|
||||
// 音频格式
|
||||
Option formatOption = Option.builder("f")
|
||||
.longOpt("format")
|
||||
.desc("音频格式,默认为pcm,可选pcm_s16le")
|
||||
.hasArg()
|
||||
.argName("FORMAT")
|
||||
.build();
|
||||
options.addOption(formatOption);
|
||||
|
||||
// 应用ID
|
||||
Option appIdOption = Option.builder()
|
||||
.longOpt("app_id")
|
||||
.desc("应用ID,如果不设置则使用Config中的默认值")
|
||||
.hasArg()
|
||||
.argName("APP_ID")
|
||||
.build();
|
||||
options.addOption(appIdOption);
|
||||
|
||||
// 访问密钥
|
||||
Option accessKeyOption = Option.builder()
|
||||
.longOpt("access_key")
|
||||
.desc("访问密钥,如果不设置则使用Config中的默认值")
|
||||
.hasArg()
|
||||
.argName("ACCESS_KEY")
|
||||
.build();
|
||||
options.addOption(accessKeyOption);
|
||||
|
||||
// 帮助
|
||||
Option helpOption = Option.builder("h")
|
||||
.longOpt("help")
|
||||
.desc("显示帮助信息")
|
||||
.build();
|
||||
options.addOption(helpOption);
|
||||
|
||||
CommandLineParser parser = new DefaultParser();
|
||||
HelpFormatter formatter = new HelpFormatter();
|
||||
|
||||
try {
|
||||
CommandLine cmd = parser.parse(options, args);
|
||||
|
||||
if (cmd.hasOption("help")) {
|
||||
formatter.printHelp("java -jar realtimelog-1.0.0.jar", options);
|
||||
return null;
|
||||
}
|
||||
|
||||
return cmd;
|
||||
|
||||
} catch (ParseException e) {
|
||||
System.err.println("参数解析错误: " + e.getMessage());
|
||||
formatter.printHelp("java -jar realtimelog-1.0.0.jar", options);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void applyConfiguration(CommandLine cmd) {
|
||||
// 应用音频文件路径
|
||||
if (cmd.hasOption("audio")) {
|
||||
Config.setAudioFilePath(cmd.getOptionValue("audio"));
|
||||
}
|
||||
|
||||
// 应用输入模式
|
||||
if (cmd.hasOption("mod")) {
|
||||
String mode = cmd.getOptionValue("mod");
|
||||
if (!mode.equals("audio") && !mode.equals("text")) {
|
||||
System.err.println("错误:mod参数必须是audio或text");
|
||||
System.exit(1);
|
||||
}
|
||||
Config.setMod(mode);
|
||||
}
|
||||
|
||||
// 应用音频格式
|
||||
if (cmd.hasOption("format")) {
|
||||
String format = cmd.getOptionValue("format");
|
||||
if (!format.equals("pcm") && !format.equals("pcm_s16le")) {
|
||||
System.err.println("错误:format参数必须是pcm或pcm_s16le");
|
||||
System.exit(1);
|
||||
}
|
||||
Config.setPcmFormat(format);
|
||||
}
|
||||
|
||||
// 应用应用ID
|
||||
if (cmd.hasOption("app_id")) {
|
||||
Config.setAppId(cmd.getOptionValue("app_id"));
|
||||
}
|
||||
|
||||
// 应用访问密钥
|
||||
if (cmd.hasOption("access_key")) {
|
||||
Config.setAccessKey(cmd.getOptionValue("access_key"));
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean validateConfiguration() {
|
||||
// 检查必要的配置
|
||||
if (Config.API_APP_ID.equals("your_app_id")) {
|
||||
System.err.println("错误:必须设置应用ID");
|
||||
System.err.println("请在Config.java中设置API_APP_ID,或使用--app_id参数");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Config.API_ACCESS_KEY.equals("your_access_key")) {
|
||||
System.err.println("错误:必须设置访问密钥");
|
||||
System.err.println("请在Config.java中设置API_ACCESS_KEY,或使用--access_key参数");
|
||||
return false;
|
||||
}
|
||||
|
||||
// 检查音频文件是否存在
|
||||
if (!Config.audioFilePath.isEmpty()) {
|
||||
java.io.File file = new java.io.File(Config.audioFilePath);
|
||||
if (!file.exists()) {
|
||||
System.err.println("错误:音频文件不存在: " + Config.audioFilePath);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,402 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import org.java_websocket.client.WebSocketClient;
|
||||
import org.java_websocket.drafts.Draft_6455;
|
||||
import org.java_websocket.handshake.ServerHandshake;
|
||||
|
||||
import javax.sound.sampled.*;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class NetClient extends WebSocketClient {
|
||||
private final BlockingQueue<Protocol.Message> incomingMessages;
|
||||
private volatile boolean isConnected = false;
|
||||
private volatile boolean shouldStop = false;
|
||||
private SourceDataLine audioOutputLine;
|
||||
private Thread audioPlaybackThread;
|
||||
private final BlockingQueue<byte[]> audioQueue;
|
||||
private volatile String logid; // 保存logid用于通话结束时打印
|
||||
|
||||
public NetClient(URI serverUri, Map<String, String> headers) {
|
||||
super(serverUri, new Draft_6455(), headers, 0);
|
||||
this.incomingMessages = new LinkedBlockingQueue<>();
|
||||
this.audioQueue = new LinkedBlockingQueue<>();
|
||||
// 只在非录音文件模式下初始化音频输出(文本模式需要播放器)
|
||||
if (!isAudioFileInput()) {
|
||||
initializeAudioOutput();
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeAudioOutput() {
|
||||
try {
|
||||
AudioFormat format = new AudioFormat(
|
||||
Config.OUTPUT_SAMPLE_RATE,
|
||||
16,
|
||||
Config.CHANNELS,
|
||||
true,
|
||||
false // little endian
|
||||
);
|
||||
|
||||
DataLine.Info info = new DataLine.Info(SourceDataLine.class, format);
|
||||
if (!AudioSystem.isLineSupported(info)) {
|
||||
System.err.println("不支持音频输出格式");
|
||||
return;
|
||||
}
|
||||
|
||||
audioOutputLine = (SourceDataLine) AudioSystem.getLine(info);
|
||||
audioOutputLine.open(format);
|
||||
audioOutputLine.start();
|
||||
|
||||
// 启动音频播放线程
|
||||
audioPlaybackThread = new Thread(this::audioPlaybackLoop);
|
||||
audioPlaybackThread.setName("AudioPlayback");
|
||||
audioPlaybackThread.start();
|
||||
|
||||
} catch (LineUnavailableException e) {
|
||||
System.err.println("音频输出初始化失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// 播放状态枚举
|
||||
private enum PlaybackState {
|
||||
IDLE, // 空闲状态
|
||||
PLAYING, // 正在播放
|
||||
WAITING_DATA // 等待数据
|
||||
}
|
||||
|
||||
private void audioPlaybackLoop() {
|
||||
PlaybackState state = PlaybackState.IDLE;
|
||||
int emptyCount = 0;
|
||||
final int maxEmptyCount = 20; // 1秒没有数据
|
||||
final boolean isTextMode = Config.mod.equals("text");
|
||||
final boolean isAudioFileMode = isAudioFileInput();
|
||||
|
||||
System.out.println("音频播放线程启动 - 模式: " + Config.mod +
|
||||
", 文本模式: " + isTextMode +
|
||||
", 音频文件模式: " + isAudioFileMode);
|
||||
|
||||
while (!shouldStop) {
|
||||
try {
|
||||
byte[] audioData = audioQueue.poll(50, TimeUnit.MILLISECONDS);
|
||||
|
||||
if (audioData != null && audioOutputLine != null) {
|
||||
// 状态转换:接收到数据 -> 播放状态
|
||||
if (state != PlaybackState.PLAYING) {
|
||||
state = PlaybackState.PLAYING;
|
||||
if (!isTextMode && !isAudioFileMode) {
|
||||
System.out.println("🎵 开始播放音频...");
|
||||
}
|
||||
}
|
||||
|
||||
// 写入音频数据到播放设备
|
||||
audioOutputLine.write(audioData, 0, audioData.length);
|
||||
emptyCount = 0;
|
||||
|
||||
// 调试信息控制
|
||||
if (!isTextMode && !isAudioFileMode && audioData.length > 0) {
|
||||
System.out.println("播放音频数据: " + audioData.length + " 字节");
|
||||
}
|
||||
} else {
|
||||
// 没有数据到达
|
||||
if (state == PlaybackState.PLAYING) {
|
||||
// 从播放状态转换到等待数据状态
|
||||
state = PlaybackState.WAITING_DATA;
|
||||
emptyCount = 0;
|
||||
} else if (state == PlaybackState.WAITING_DATA) {
|
||||
emptyCount++;
|
||||
if (emptyCount > maxEmptyCount) {
|
||||
// 转换到空闲状态
|
||||
state = PlaybackState.IDLE;
|
||||
if (!isTextMode && !isAudioFileMode) {
|
||||
System.out.println("⏸️ 音频播放暂停,等待数据...");
|
||||
}
|
||||
emptyCount = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
System.err.println("❌ 音频播放错误: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
state = PlaybackState.IDLE;
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println("🛑 音频播放线程结束");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onOpen(ServerHandshake handshake) {
|
||||
System.out.println("WebSocket连接已建立");
|
||||
isConnected = true;
|
||||
|
||||
// 获取并保存logid
|
||||
logid = handshake.getFieldValue("X-Tt-Logid");
|
||||
if (logid != null && !logid.isEmpty()) {
|
||||
System.out.println("连接建立,logid: " + logid);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(String message) {
|
||||
System.out.println("收到文本消息: " + message);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(ByteBuffer bytes) {
|
||||
try {
|
||||
byte[] data = new byte[bytes.remaining()];
|
||||
bytes.get(data);
|
||||
|
||||
System.out.println("收到WebSocket二进制消息,长度: " + data.length + " 字节");
|
||||
System.out.println("原始数据前20字节: " + bytesToHex(data, Math.min(20, data.length)));
|
||||
|
||||
try {
|
||||
Protocol.Message message = Protocol.unmarshal(data);
|
||||
|
||||
System.out.println("解析消息成功 - 类型: " + message.type + ", 事件ID: " + message.event + ", 会话ID: " + message.sessionId);
|
||||
|
||||
// 直接使用ProtocolV2.Message,不再转换到旧格式
|
||||
switch (message.type) {
|
||||
case FULL_SERVER:
|
||||
handleFullServerMessage(message);
|
||||
break;
|
||||
case AUDIO_ONLY_SERVER:
|
||||
handleAudioOnlyServerMessage(message);
|
||||
break;
|
||||
case ERROR:
|
||||
handleErrorMessage(message);
|
||||
break;
|
||||
default:
|
||||
System.err.println("未知消息类型: " + message.type);
|
||||
}
|
||||
|
||||
incomingMessages.offer(message);
|
||||
|
||||
} catch (IOException e) {
|
||||
System.err.println("消息解析失败: " + e.getMessage());
|
||||
System.err.println("尝试解析为文本消息...");
|
||||
try {
|
||||
String text = new String(data, "UTF-8");
|
||||
System.err.println("文本内容: " + text);
|
||||
} catch (Exception textEx) {
|
||||
System.err.println("也无法解析为文本: " + textEx.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("处理消息时出错: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private String bytesToHex(byte[] bytes, int length) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
sb.append(String.format("%02X ", bytes[i]));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private void handleFullServerMessage(Protocol.Message message) {
|
||||
ServerResponseHandler.handleFullServerMessage(this, message);
|
||||
}
|
||||
|
||||
private void handleAudioOnlyServerMessage(Protocol.Message message) {
|
||||
ServerResponseHandler.handleAudioOnlyServerMessage(this, message);
|
||||
}
|
||||
|
||||
private void handleErrorMessage(Protocol.Message message) {
|
||||
ServerResponseHandler.handleErrorMessage(message);
|
||||
}
|
||||
|
||||
// 播放音频数据 - 对齐Golang实现,简化音频处理
|
||||
public void playAudioData(byte[] audioData) {
|
||||
// 录音文件模式下不播放音频
|
||||
if (isAudioFileInput()) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
if (audioData == null || audioData.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println("播放音频数据: " + audioData.length + " 字节");
|
||||
|
||||
// 根据配置格式处理音频数据
|
||||
switch (Config.pcmFormat) {
|
||||
case Config.PCM_S16LE:
|
||||
// s16le格式直接播放
|
||||
if (audioData.length % 2 != 0) {
|
||||
System.err.println("s16le音频数据长度不是2的倍数: " + audioData.length);
|
||||
return;
|
||||
}
|
||||
audioQueue.offer(audioData);
|
||||
break;
|
||||
|
||||
case Config.DEFAULT_PCM:
|
||||
// f32le格式需要转换为s16le
|
||||
if (audioData.length % 4 != 0) {
|
||||
System.err.println("f32le音频数据长度不是4的倍数: " + audioData.length);
|
||||
return;
|
||||
}
|
||||
|
||||
int sampleCount = audioData.length / 4;
|
||||
short[] samples = new short[sampleCount];
|
||||
ByteBuffer buffer = ByteBuffer.wrap(audioData).order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
for (int i = 0; i < sampleCount; i++) {
|
||||
float sample = buffer.getFloat();
|
||||
// 将float转换为short,确保范围正确
|
||||
samples[i] = (short) Math.max(-32768, Math.min(32767, sample * 32767.0f));
|
||||
}
|
||||
|
||||
// 转换为字节数组并播放
|
||||
byte[] s16Data = AudioCapture.int16SamplesToBytes(samples);
|
||||
audioQueue.offer(s16Data);
|
||||
break;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("播放音频数据失败: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose(int code, String reason, boolean remote) {
|
||||
System.out.println("WebSocket连接已关闭. 代码: " + code + ", 原因: " + reason);
|
||||
|
||||
// 打印logid
|
||||
if (logid != null && !logid.isEmpty()) {
|
||||
System.out.println("连接关闭,logid: " + logid);
|
||||
}
|
||||
|
||||
isConnected = false;
|
||||
cleanup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Exception ex) {
|
||||
System.err.println("WebSocket错误: " + ex.getMessage());
|
||||
ex.printStackTrace();
|
||||
}
|
||||
|
||||
// 检查是否为录音文件输入模式
|
||||
private boolean isAudioFileInput() {
|
||||
return !Config.audioFilePath.isEmpty();
|
||||
}
|
||||
|
||||
public boolean isConnected() {
|
||||
return isConnected;
|
||||
}
|
||||
|
||||
public String getLogid() {
|
||||
return logid;
|
||||
}
|
||||
|
||||
public Protocol.Message pollIncomingMessage(long timeout, TimeUnit unit) throws InterruptedException {
|
||||
return incomingMessages.poll(timeout, unit);
|
||||
}
|
||||
|
||||
public void sendAudioData(String sessionId, byte[] audioData) throws IOException {
|
||||
if (!isConnected) {
|
||||
throw new IOException("WebSocket未连接");
|
||||
}
|
||||
|
||||
try {
|
||||
byte[] message = Protocol.createAudioMessage(sessionId, audioData);
|
||||
send(message);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("发送音频消息失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public void sendTextMessage(String sessionId, String text) throws IOException {
|
||||
if (!isConnected) {
|
||||
throw new IOException("WebSocket未连接");
|
||||
}
|
||||
|
||||
byte[] message = Protocol.createFullClientMessage(sessionId, text);
|
||||
send(message);
|
||||
}
|
||||
|
||||
public void sendProtocolMessage(String sessionId, String text, int eventId) throws IOException {
|
||||
if (!isConnected) {
|
||||
throw new IOException("WebSocket未连接");
|
||||
}
|
||||
|
||||
try {
|
||||
byte[] messageBytes;
|
||||
if (eventId == 1) {
|
||||
messageBytes = Protocol.createStartConnectionMessage();
|
||||
} else if (eventId == 100) {
|
||||
messageBytes = Protocol.createStartSessionMessage(sessionId, text);
|
||||
} else {
|
||||
// 创建带特定事件ID的消息
|
||||
Protocol.Message message = new Protocol.Message();
|
||||
message.type = Protocol.MsgType.FULL_CLIENT;
|
||||
message.typeFlag = Protocol.MSG_TYPE_FLAG_WITH_EVENT;
|
||||
message.event = eventId;
|
||||
message.sessionId = sessionId;
|
||||
message.payload = text.getBytes("UTF-8");
|
||||
messageBytes = Protocol.marshal(message);
|
||||
}
|
||||
send(messageBytes);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("发送协议消息失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
public void sendChatTextQuery(String sessionId, String text) throws IOException {
|
||||
if (!isConnected) {
|
||||
throw new IOException("WebSocket未连接");
|
||||
}
|
||||
|
||||
try {
|
||||
// 创建ChatTextQuery消息(事件501)
|
||||
RequestPayloads.ChatTextQueryPayload payload = new RequestPayloads.ChatTextQueryPayload(text);
|
||||
String jsonPayload = new com.fasterxml.jackson.databind.ObjectMapper().writeValueAsString(payload);
|
||||
|
||||
Protocol.Message message = new Protocol.Message();
|
||||
message.type = Protocol.MsgType.FULL_CLIENT;
|
||||
message.typeFlag = Protocol.MSG_TYPE_FLAG_WITH_EVENT;
|
||||
message.event = 501; // ChatTextQuery事件
|
||||
message.sessionId = sessionId;
|
||||
message.payload = jsonPayload.getBytes("UTF-8");
|
||||
|
||||
byte[] messageBytes = Protocol.marshal(message);
|
||||
send(messageBytes);
|
||||
|
||||
System.out.println("发送ChatTextQuery消息成功: " + text);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("发送ChatTextQuery消息失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
private void cleanup() {
|
||||
shouldStop = true;
|
||||
|
||||
if (audioPlaybackThread != null) {
|
||||
audioPlaybackThread.interrupt();
|
||||
}
|
||||
|
||||
if (audioOutputLine != null) {
|
||||
audioOutputLine.drain();
|
||||
audioOutputLine.stop();
|
||||
audioOutputLine.close();
|
||||
}
|
||||
|
||||
// 保存音频到文件
|
||||
ServerResponseHandler.saveAudioToPCMFile("output.pcm");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,366 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.*;
|
||||
|
||||
public class Protocol {
|
||||
private static final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
// 消息类型
|
||||
public enum MsgType {
|
||||
INVALID(0),
|
||||
FULL_CLIENT(1),
|
||||
AUDIO_ONLY_CLIENT(2),
|
||||
FULL_SERVER(9),
|
||||
AUDIO_ONLY_SERVER(11),
|
||||
FRONT_END_RESULT_SERVER(12),
|
||||
ERROR(15);
|
||||
|
||||
private final int value;
|
||||
|
||||
MsgType(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public int getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public static MsgType fromBits(int bits) {
|
||||
for (MsgType type : values()) {
|
||||
if (type.value == bits) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
// 消息类型标志位
|
||||
public static final int MSG_TYPE_FLAG_NO_SEQ = 0;
|
||||
public static final int MSG_TYPE_FLAG_POSITIVE_SEQ = 0b1;
|
||||
public static final int MSG_TYPE_FLAG_LAST_NO_SEQ = 0b10;
|
||||
public static final int MSG_TYPE_FLAG_NEGATIVE_SEQ = 0b11;
|
||||
public static final int MSG_TYPE_FLAG_WITH_EVENT = 0b100;
|
||||
|
||||
// 版本和头部大小
|
||||
public static final int VERSION_1 = 0x10;
|
||||
public static final int HEADER_SIZE_4 = 0x1;
|
||||
|
||||
// 序列化方法
|
||||
public static final int SERIALIZATION_RAW = 0;
|
||||
public static final int SERIALIZATION_JSON = 0b1 << 4;
|
||||
|
||||
// 压缩方法
|
||||
public static final int COMPRESSION_NONE = 0;
|
||||
|
||||
public static class Message {
|
||||
public MsgType type;
|
||||
public int typeFlag;
|
||||
public int event;
|
||||
public String sessionId;
|
||||
public String connectId;
|
||||
public int sequence;
|
||||
public long errorCode;
|
||||
public byte[] payload;
|
||||
|
||||
public Message() {
|
||||
this.type = MsgType.INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
public static byte[] marshal(Message msg) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream dos = new DataOutputStream(baos);
|
||||
|
||||
// 构建头部
|
||||
int versionAndHeaderSize = VERSION_1 | HEADER_SIZE_4;
|
||||
dos.writeByte(versionAndHeaderSize);
|
||||
|
||||
// 消息类型和标志
|
||||
int typeAndFlag = (msg.type.getValue() << 4) | (msg.typeFlag & 0x0F);
|
||||
dos.writeByte(typeAndFlag);
|
||||
|
||||
// 序列化和压缩
|
||||
int serializationAndCompression = SERIALIZATION_JSON | COMPRESSION_NONE;
|
||||
dos.writeByte(serializationAndCompression);
|
||||
|
||||
// 保留字节
|
||||
dos.writeByte(0);
|
||||
|
||||
// 根据消息类型写入数据
|
||||
List<WriteFunc> writers = getWriters(msg);
|
||||
for (WriteFunc writer : writers) {
|
||||
writer.write(dos, msg);
|
||||
}
|
||||
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public static Message unmarshal(byte[] data) throws IOException {
|
||||
if (data.length < 4) {
|
||||
throw new IOException("数据长度不足");
|
||||
}
|
||||
|
||||
ByteBuffer buf = ByteBuffer.wrap(data).order(ByteOrder.BIG_ENDIAN);
|
||||
Message msg = new Message();
|
||||
|
||||
// 读取头部
|
||||
int versionAndHeaderSize = buf.get() & 0xFF;
|
||||
int typeAndFlag = buf.get() & 0xFF;
|
||||
int serializationAndCompression = buf.get() & 0xFF;
|
||||
int reserved = buf.get() & 0xFF;
|
||||
|
||||
// 解析消息类型
|
||||
int msgTypeBits = (typeAndFlag >> 4) & 0x0F;
|
||||
msg.type = MsgType.fromBits(msgTypeBits);
|
||||
msg.typeFlag = typeAndFlag & 0x0F;
|
||||
|
||||
// 根据消息类型读取数据
|
||||
List<ReadFunc> readers = getReaders(msg);
|
||||
for (ReadFunc reader : readers) {
|
||||
reader.read(buf, msg);
|
||||
}
|
||||
|
||||
return msg;
|
||||
}
|
||||
|
||||
private interface WriteFunc {
|
||||
void write(DataOutputStream dos, Message msg) throws IOException;
|
||||
}
|
||||
|
||||
private interface ReadFunc {
|
||||
void read(ByteBuffer buf, Message msg) throws IOException;
|
||||
}
|
||||
|
||||
private static List<WriteFunc> getWriters(Message msg) {
|
||||
List<WriteFunc> writers = new ArrayList<>();
|
||||
|
||||
// 事件ID
|
||||
if (containsEvent(msg.typeFlag)) {
|
||||
writers.add((dos, m) -> dos.writeInt(m.event));
|
||||
}
|
||||
|
||||
// 会话ID
|
||||
if (shouldWriteSessionId(msg)) {
|
||||
writers.add((dos, m) -> {
|
||||
byte[] sessionIdBytes = m.sessionId.getBytes("UTF-8");
|
||||
dos.writeInt(sessionIdBytes.length);
|
||||
dos.write(sessionIdBytes);
|
||||
});
|
||||
}
|
||||
|
||||
// 连接ID
|
||||
if (shouldWriteConnectId(msg)) {
|
||||
writers.add((dos, m) -> {
|
||||
byte[] connectIdBytes = m.connectId.getBytes("UTF-8");
|
||||
dos.writeInt(connectIdBytes.length);
|
||||
dos.write(connectIdBytes);
|
||||
});
|
||||
}
|
||||
|
||||
// 序列号
|
||||
if (containsSequence(msg.typeFlag)) {
|
||||
writers.add((dos, m) -> dos.writeInt(m.sequence));
|
||||
}
|
||||
|
||||
// 错误码
|
||||
if (msg.type == MsgType.ERROR) {
|
||||
writers.add((dos, m) -> dos.writeInt((int) m.errorCode));
|
||||
}
|
||||
|
||||
// 载荷
|
||||
writers.add((dos, m) -> {
|
||||
if (m.payload != null) {
|
||||
dos.writeInt(m.payload.length);
|
||||
dos.write(m.payload);
|
||||
} else {
|
||||
dos.writeInt(0);
|
||||
}
|
||||
});
|
||||
|
||||
return writers;
|
||||
}
|
||||
|
||||
private static List<ReadFunc> getReaders(Message msg) {
|
||||
List<ReadFunc> readers = new ArrayList<>();
|
||||
|
||||
// 事件ID
|
||||
if (containsEvent(msg.typeFlag)) {
|
||||
readers.add((buf, m) -> m.event = buf.getInt());
|
||||
}
|
||||
|
||||
// 会话ID
|
||||
if (shouldReadSessionId(msg)) {
|
||||
readers.add((buf, m) -> {
|
||||
int size = buf.getInt();
|
||||
if (size > 0) {
|
||||
byte[] bytes = new byte[size];
|
||||
buf.get(bytes);
|
||||
m.sessionId = new String(bytes, "UTF-8");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 连接ID
|
||||
if (shouldReadConnectId(msg)) {
|
||||
readers.add((buf, m) -> {
|
||||
int size = buf.getInt();
|
||||
if (size > 0) {
|
||||
byte[] bytes = new byte[size];
|
||||
buf.get(bytes);
|
||||
m.connectId = new String(bytes, "UTF-8");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 序列号
|
||||
if (containsSequence(msg.typeFlag)) {
|
||||
readers.add((buf, m) -> m.sequence = buf.getInt());
|
||||
}
|
||||
|
||||
// 错误码
|
||||
if (msg.type == MsgType.ERROR) {
|
||||
readers.add((buf, m) -> m.errorCode = buf.getInt() & 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
// 载荷
|
||||
readers.add((buf, m) -> {
|
||||
int size = buf.getInt();
|
||||
if (size > 0) {
|
||||
m.payload = new byte[size];
|
||||
buf.get(m.payload);
|
||||
}
|
||||
});
|
||||
|
||||
return readers;
|
||||
}
|
||||
|
||||
private static boolean containsEvent(int typeFlag) {
|
||||
return (typeFlag & MSG_TYPE_FLAG_WITH_EVENT) == MSG_TYPE_FLAG_WITH_EVENT;
|
||||
}
|
||||
|
||||
private static boolean containsSequence(int typeFlag) {
|
||||
return (typeFlag & MSG_TYPE_FLAG_POSITIVE_SEQ) == MSG_TYPE_FLAG_POSITIVE_SEQ ||
|
||||
(typeFlag & MSG_TYPE_FLAG_NEGATIVE_SEQ) == MSG_TYPE_FLAG_NEGATIVE_SEQ;
|
||||
}
|
||||
|
||||
private static boolean shouldWriteSessionId(Message msg) {
|
||||
// 根据Go版本的逻辑,某些事件不需要会话ID
|
||||
return containsEvent(msg.typeFlag) &&
|
||||
msg.event != 1 && msg.event != 2 && msg.event != 50 && msg.event != 51 && msg.event != 52;
|
||||
}
|
||||
|
||||
private static boolean shouldReadSessionId(Message msg) {
|
||||
return containsEvent(msg.typeFlag) &&
|
||||
msg.event != 1 && msg.event != 2 && msg.event != 50 && msg.event != 51 && msg.event != 52;
|
||||
}
|
||||
|
||||
private static boolean shouldWriteConnectId(Message msg) {
|
||||
return containsEvent(msg.typeFlag) && (msg.event == 50 || msg.event == 51 || msg.event == 52);
|
||||
}
|
||||
|
||||
private static boolean shouldReadConnectId(Message msg) {
|
||||
return containsEvent(msg.typeFlag) && (msg.event == 50 || msg.event == 51 || msg.event == 52);
|
||||
}
|
||||
|
||||
// 辅助方法
|
||||
public static byte[] createStartConnectionMessage() throws IOException {
|
||||
Message msg = new Message();
|
||||
msg.type = MsgType.FULL_CLIENT;
|
||||
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
|
||||
msg.event = 1;
|
||||
msg.payload = "{}".getBytes("UTF-8");
|
||||
return marshal(msg);
|
||||
}
|
||||
|
||||
public static byte[] createStartSessionMessage(String sessionId, String payload) throws IOException {
|
||||
Message msg = new Message();
|
||||
msg.type = MsgType.FULL_CLIENT;
|
||||
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
|
||||
msg.event = 100;
|
||||
msg.sessionId = sessionId;
|
||||
msg.payload = payload.getBytes("UTF-8");
|
||||
return marshal(msg);
|
||||
}
|
||||
|
||||
public static byte[] createAudioMessage(String sessionId, byte[] audioData) throws IOException {
|
||||
Message msg = new Message();
|
||||
msg.type = MsgType.AUDIO_ONLY_CLIENT;
|
||||
msg.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
|
||||
msg.event = 200; // 音频事件 - 完全对齐Go版本
|
||||
msg.sessionId = sessionId;
|
||||
msg.payload = audioData;
|
||||
return marshalRawAudio(msg);
|
||||
}
|
||||
|
||||
// 专门用于音频消息的方法 - 使用原始序列化
|
||||
private static byte[] marshalRawAudio(Message message) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
DataOutputStream dos = new DataOutputStream(baos);
|
||||
|
||||
// 构建头部 - 使用原始序列化
|
||||
int versionAndHeaderSize = VERSION_1 | HEADER_SIZE_4;
|
||||
dos.writeByte(versionAndHeaderSize);
|
||||
|
||||
// 消息类型和标志
|
||||
int typeAndFlag = (message.type.getValue() << 4) | (message.typeFlag & 0x0F);
|
||||
dos.writeByte(typeAndFlag);
|
||||
|
||||
// 序列化和压缩 - 使用原始数据
|
||||
int serializationAndCompression = SERIALIZATION_RAW | COMPRESSION_NONE;
|
||||
dos.writeByte(serializationAndCompression);
|
||||
|
||||
// 保留字节
|
||||
dos.writeByte(0);
|
||||
|
||||
// 事件ID
|
||||
if (containsEvent(message.typeFlag)) {
|
||||
dos.writeInt(message.event);
|
||||
}
|
||||
|
||||
// 会话ID
|
||||
if (shouldWriteSessionId(message)) {
|
||||
byte[] sessionIdBytes = message.sessionId.getBytes("UTF-8");
|
||||
dos.writeInt(sessionIdBytes.length);
|
||||
dos.write(sessionIdBytes);
|
||||
}
|
||||
|
||||
// 载荷
|
||||
if (message.payload != null) {
|
||||
dos.writeInt(message.payload.length);
|
||||
dos.write(message.payload);
|
||||
} else {
|
||||
dos.writeInt(0);
|
||||
}
|
||||
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
public static String generateSessionId() {
|
||||
return UUID.randomUUID().toString();
|
||||
}
|
||||
|
||||
// 创建FullClient消息 - 用于兼容旧代码
|
||||
public static byte[] createFullClientMessage(String sessionId, String text) throws IOException {
|
||||
ObjectNode root = objectMapper.createObjectNode();
|
||||
root.put("session_id", sessionId);
|
||||
root.put("text", text);
|
||||
root.put("speaker", Config.DEFAULT_SPEAKER);
|
||||
|
||||
Message message = new Message();
|
||||
message.type = MsgType.FULL_CLIENT;
|
||||
message.typeFlag = MSG_TYPE_FLAG_WITH_EVENT;
|
||||
message.sessionId = sessionId;
|
||||
message.payload = objectMapper.writeValueAsBytes(root);
|
||||
|
||||
return marshal(message);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class RequestPayloads {
|
||||
|
||||
// StartSession请求载荷
|
||||
public static class StartSessionPayload {
|
||||
public ASRPayload asr;
|
||||
public TTSPayload tts;
|
||||
public DialogPayload dialog;
|
||||
|
||||
public StartSessionPayload() {
|
||||
this.asr = new ASRPayload();
|
||||
this.tts = new TTSPayload();
|
||||
this.dialog = new DialogPayload();
|
||||
}
|
||||
}
|
||||
|
||||
public static class ASRPayload {
|
||||
public Map<String, Object> extra = new HashMap<>();
|
||||
}
|
||||
|
||||
public static class TTSPayload {
|
||||
public String speaker = Config.DEFAULT_SPEAKER;
|
||||
public AudioConfig audio_config = new AudioConfig();
|
||||
}
|
||||
|
||||
public static class AudioConfig {
|
||||
public int channel = 1;
|
||||
public String format = Config.pcmFormat;
|
||||
public int sample_rate = Config.OUTPUT_SAMPLE_RATE;
|
||||
}
|
||||
|
||||
public static class DialogPayload {
|
||||
public String dialog_id = "";
|
||||
public String bot_name = "豆包";
|
||||
public String system_role = "你使用活泼灵动的女声,性格开朗,热爱生活。";
|
||||
public String speaking_style = "你的说话风格简洁明了,语速适中,语调自然。";
|
||||
public LocationInfo location = new LocationInfo();
|
||||
public Map<String, Object> extra = new HashMap<>();
|
||||
}
|
||||
|
||||
public static class LocationInfo {
|
||||
public double longitude = 0.0;
|
||||
public double latitude = 0.0;
|
||||
public String city = "北京";
|
||||
public String country = "中国";
|
||||
public String province = "北京";
|
||||
public String district = "";
|
||||
public String town = "";
|
||||
public String country_code = "CN";
|
||||
public String address = "";
|
||||
}
|
||||
|
||||
// SayHello请求载荷
|
||||
public static class SayHelloPayload {
|
||||
public String content;
|
||||
|
||||
public SayHelloPayload(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
// ChatTextQuery请求载荷
|
||||
public static class ChatTextQueryPayload {
|
||||
public String content;
|
||||
|
||||
public ChatTextQueryPayload(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
// ChatTTSText请求载荷
|
||||
public static class ChatTTSTextPayload {
|
||||
public boolean start;
|
||||
public boolean end;
|
||||
public String content;
|
||||
|
||||
public ChatTTSTextPayload(boolean start, boolean end, String content) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
// ChatRAGText请求载荷
|
||||
public static class ChatRAGTextPayload {
|
||||
public String external_rag;
|
||||
|
||||
public ChatRAGTextPayload(String externalRAG) {
|
||||
this.external_rag = externalRAG;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,342 @@
|
||||
package com.volcengine.realtimedialog;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
public class ServerResponseHandler {
|
||||
private static final ObjectMapper objectMapper = new ObjectMapper();
|
||||
private static final int SAMPLE_RATE = 24000;
|
||||
private static final int CHANNELS = 1;
|
||||
private static final int BUFFER_SECONDS = 100;
|
||||
|
||||
// 音频缓冲区
|
||||
private static final List<Float> audioBuffer = Collections.synchronizedList(new ArrayList<>());
|
||||
private static final List<Short> s16Buffer = Collections.synchronizedList(new ArrayList<>());
|
||||
private static final List<Byte> audioData = Collections.synchronizedList(new ArrayList<>());
|
||||
|
||||
// 状态标志
|
||||
private static final AtomicBoolean isSendingChatTTSText = new AtomicBoolean(false);
|
||||
private static final AtomicBoolean isUserQuerying = new AtomicBoolean(false);
|
||||
private static final Object sayHelloOverLock = new Object();
|
||||
private static volatile boolean sayHelloOver = false;
|
||||
private static final Object firstMsgLock = new Object();
|
||||
private static volatile boolean firstMsgProcessed = false;
|
||||
|
||||
// 外部RAG数据结构
|
||||
public static class RAGObject {
|
||||
public String title;
|
||||
public String content;
|
||||
|
||||
public RAGObject(String title, String content) {
|
||||
this.title = title;
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
// ChatTTSText载荷
|
||||
public static class ChatTTSTextPayload {
|
||||
public boolean start;
|
||||
public boolean end;
|
||||
public String content;
|
||||
|
||||
public ChatTTSTextPayload(boolean start, boolean end, String content) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
|
||||
// ChatRAGText载荷
|
||||
public static class ChatRAGTextPayload {
|
||||
public String externalRAG;
|
||||
|
||||
public ChatRAGTextPayload(String externalRAG) {
|
||||
this.externalRAG = externalRAG;
|
||||
}
|
||||
}
|
||||
|
||||
// 消息处理
|
||||
public static void handleFullServerMessage(NetClient netClient, Protocol.Message message) {
|
||||
try {
|
||||
String jsonStr = new String(message.payload);
|
||||
System.out.println("📨 收到服务器完整消息 (event=" + message.event + ", session_id=" + message.sessionId + "): " + jsonStr);
|
||||
|
||||
// 事件处理
|
||||
System.out.println("🔍 处理事件 ID: " + message.event);
|
||||
switch (message.event) {
|
||||
case 50: // ConnectionStarted
|
||||
System.out.println("✅ 连接已建立");
|
||||
return;
|
||||
|
||||
case 150: // SessionStarted
|
||||
System.out.println("✅ 会话已开始");
|
||||
return;
|
||||
|
||||
case 152: // session finished event
|
||||
case 153: // session finished event
|
||||
System.out.println("🏁 会话结束事件");
|
||||
// 通知CallManager停止
|
||||
CallManager.stopFromHandler();
|
||||
return;
|
||||
|
||||
case 359: // 首次响应事件
|
||||
System.out.println("🎯 收到事件359,音频文件模式: " + isAudioFileInput());
|
||||
if (isAudioFileInput()) {
|
||||
System.out.println("🎉 音频文件模式收到首次响应,保存音频并退出...");
|
||||
// 音频文件模式下,收到事件359后保存音频并退出
|
||||
saveAudioToPCMFile("output.pcm");
|
||||
CallManager.stopFromHandler();
|
||||
return;
|
||||
}
|
||||
|
||||
// 文本模式下,收到事件359后提示用户输入
|
||||
if (Config.mod.equals("text")) {
|
||||
System.out.println("💬 请输入内容");
|
||||
} else {
|
||||
// 音频模式下,标记首次消息已处理
|
||||
synchronized (firstMsgLock) {
|
||||
if (!firstMsgProcessed) {
|
||||
firstMsgProcessed = true;
|
||||
synchronized (sayHelloOverLock) {
|
||||
sayHelloOver = true;
|
||||
sayHelloOverLock.notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 300: // SayHello响应事件,对齐Golang版本
|
||||
System.out.println("🎯 收到SayHello响应事件");
|
||||
if (Config.mod.equals("text")) {
|
||||
System.out.println("💬 问候语已发送,请输入内容");
|
||||
}
|
||||
break;
|
||||
|
||||
case 450: // ASR info event, clear audio buffer
|
||||
// 清空本地音频缓存,等待接收下一轮的音频
|
||||
synchronized (audioData) {
|
||||
audioData.clear();
|
||||
}
|
||||
synchronized (audioBuffer) {
|
||||
audioBuffer.clear();
|
||||
}
|
||||
// 用户说话了,不需要触发连续SayHello引导用户交互了
|
||||
CallManager.notifyUserQuery();
|
||||
isUserQuerying.set(true);
|
||||
break;
|
||||
|
||||
case 350: // 发送ChatTTSText请求事件之后,收到tts_type为chat_tts_text的事件
|
||||
if (isSendingChatTTSText.get()) {
|
||||
// 解析JSON数据
|
||||
JsonNode jsonData = objectMapper.readTree(message.payload);
|
||||
String ttsType = jsonData.get("tts_type").asText();
|
||||
// 一种简单方式清空本地闲聊音频
|
||||
if (Arrays.asList("chat_tts_text", "external_rag").contains(ttsType)) {
|
||||
synchronized (audioData) {
|
||||
audioData.clear();
|
||||
}
|
||||
synchronized (audioBuffer) {
|
||||
audioBuffer.clear();
|
||||
}
|
||||
isSendingChatTTSText.set(false);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 459:
|
||||
isUserQuerying.set(false);
|
||||
// 概率触发发送ChatTTSText请求
|
||||
if (new Random().nextInt(100000) % 1000 == 0) {
|
||||
new Thread(() -> {
|
||||
try {
|
||||
isSendingChatTTSText.set(true);
|
||||
System.out.println("hit ChatTTSText event, start sending...");
|
||||
|
||||
// 发送ChatTTSText请求
|
||||
sendChatTTSText(netClient, message.sessionId, new ChatTTSTextPayload(
|
||||
true, false, "这是查询到外部数据之前的安抚话术。"
|
||||
));
|
||||
sendChatTTSText(netClient, message.sessionId, new ChatTTSTextPayload(
|
||||
false, true, ""
|
||||
));
|
||||
|
||||
// 模拟查询外部RAG数据耗时,这里简单起见直接sleep5秒保证GTA安抚话术播报不受影响
|
||||
Thread.sleep(5000);
|
||||
|
||||
// 发送外部RAG数据
|
||||
List<RAGObject> externalRAG = Arrays.asList(
|
||||
new RAGObject("北京天气", "今天北京整体以晴到多云为主,但西部和北部地带可能会出现分散性雷阵雨,特别是午后至傍晚时段需注意突发降雨。\n💨 风况与湿度\n风力较弱,一般为 2–3 级南风或西南风\n白天湿度较高,早晚略凉爽"),
|
||||
new RAGObject("北京空气质量", "当前北京空气质量为良,AQI指数在50左右,适合户外活动。建议关注实时空气质量变化,尤其是敏感人群。")
|
||||
);
|
||||
|
||||
String externalRAGJson = objectMapper.writeValueAsString(externalRAG);
|
||||
sendChatRAGText(netClient, message.sessionId, new ChatRAGTextPayload(externalRAGJson));
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("ChatTTSText处理错误: " + e.getMessage());
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("处理完整服务器消息失败: " + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// 处理音频消息 - 对齐Golang实现,简化逻辑
|
||||
public static void handleAudioOnlyServerMessage(NetClient netClient, Protocol.Message message) {
|
||||
try {
|
||||
System.out.println("🎵 收到音频消息 (event=" + message.event + "): session_id=" + message.sessionId + ", 数据长度: " + (message.payload != null ? message.payload.length : 0));
|
||||
|
||||
if (message.payload != null && message.payload.length > 0) {
|
||||
// 直接处理音频数据(简化逻辑,对齐Golang)
|
||||
handleIncomingAudio(message.payload);
|
||||
|
||||
// 保存音频数据到文件
|
||||
synchronized (audioData) {
|
||||
for (byte b : message.payload) {
|
||||
audioData.add(b);
|
||||
}
|
||||
}
|
||||
|
||||
// 直接播放音频 - 对齐Golang实现
|
||||
netClient.playAudioData(message.payload);
|
||||
|
||||
System.out.println("✅ 音频数据已保存,当前总长度: " + audioData.size() + " 字节");
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
System.err.println("处理音频消息失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// 处理错误消息
|
||||
public static void handleErrorMessage(Protocol.Message message) {
|
||||
String errorMsg = new String(message.payload);
|
||||
System.err.println("收到错误消息 (code=" + message.event + "): " + errorMsg);
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
// 处理输入音频数据 - 对齐Golang实现,简化逻辑
|
||||
private static void handleIncomingAudio(byte[] data) {
|
||||
if (isSendingChatTTSText.get()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 简化音频处理逻辑,对齐Golang实现
|
||||
switch (Config.pcmFormat) {
|
||||
case Config.PCM_S16LE:
|
||||
System.out.println("收到音频字节长度: " + data.length + ", s16le长度: " + (data.length / 2));
|
||||
int sampleCount = data.length / 2;
|
||||
short[] samples = new short[sampleCount];
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int i = 0; i < sampleCount; i++) {
|
||||
samples[i] = buffer.getShort();
|
||||
}
|
||||
|
||||
// 将音频加载到缓冲区 - 简化逻辑对齐Golang
|
||||
synchronized (s16Buffer) {
|
||||
for (short sample : samples) {
|
||||
s16Buffer.add(sample);
|
||||
}
|
||||
// 限制缓冲区大小 - 简化逻辑
|
||||
if (s16Buffer.size() > SAMPLE_RATE * BUFFER_SECONDS) {
|
||||
s16Buffer.subList(0, s16Buffer.size() - (SAMPLE_RATE * BUFFER_SECONDS)).clear();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case Config.DEFAULT_PCM:
|
||||
System.out.println("收到音频字节长度: " + data.length + ", f32le长度: " + (data.length / 4));
|
||||
int floatSampleCount = data.length / 4;
|
||||
float[] floatSamples = new float[floatSampleCount];
|
||||
|
||||
ByteBuffer floatBuffer = ByteBuffer.wrap(data).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int i = 0; i < floatSampleCount; i++) {
|
||||
int bits = floatBuffer.getInt();
|
||||
floatSamples[i] = Float.intBitsToFloat(bits);
|
||||
}
|
||||
|
||||
// 将音频加载到缓冲区 - 简化逻辑对齐Golang
|
||||
synchronized (audioBuffer) {
|
||||
for (float sample : floatSamples) {
|
||||
audioBuffer.add(sample);
|
||||
}
|
||||
// 限制缓冲区大小 - 简化逻辑
|
||||
if (audioBuffer.size() > SAMPLE_RATE * BUFFER_SECONDS) {
|
||||
audioBuffer.subList(0, audioBuffer.size() - (SAMPLE_RATE * BUFFER_SECONDS)).clear();
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 保存音频到PCM文件
|
||||
public static void saveAudioToPCMFile(String filename) {
|
||||
synchronized (audioData) {
|
||||
if (audioData.isEmpty()) {
|
||||
System.out.println("没有音频数据可保存。");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
File pcmFile = new File("./" + filename);
|
||||
try (FileOutputStream fos = new FileOutputStream(pcmFile)) {
|
||||
synchronized (audioData) {
|
||||
byte[] audioBytes = new byte[audioData.size()];
|
||||
for (int i = 0; i < audioData.size(); i++) {
|
||||
audioBytes[i] = audioData.get(i);
|
||||
}
|
||||
fos.write(audioBytes);
|
||||
}
|
||||
System.out.println("音频已保存到: " + pcmFile.getAbsolutePath());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("保存PCM文件失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// 发送ChatTTSText消息
|
||||
private static void sendChatTTSText(NetClient netClient, String sessionId, ChatTTSTextPayload payload) throws Exception {
|
||||
ObjectNode root = objectMapper.createObjectNode();
|
||||
root.put("session_id", sessionId);
|
||||
root.put("start", payload.start);
|
||||
root.put("end", payload.end);
|
||||
root.put("content", payload.content);
|
||||
|
||||
String jsonStr = objectMapper.writeValueAsString(root);
|
||||
byte[] message = Protocol.createFullClientMessage(sessionId, jsonStr);
|
||||
netClient.send(message);
|
||||
}
|
||||
|
||||
// 发送ChatRAGText消息
|
||||
private static void sendChatRAGText(NetClient netClient, String sessionId, ChatRAGTextPayload payload) throws Exception {
|
||||
ObjectNode root = objectMapper.createObjectNode();
|
||||
root.put("session_id", sessionId);
|
||||
root.put("external_rag", payload.externalRAG);
|
||||
|
||||
String jsonStr = objectMapper.writeValueAsString(root);
|
||||
byte[] message = Protocol.createFullClientMessage(sessionId, jsonStr);
|
||||
netClient.send(message);
|
||||
}
|
||||
|
||||
// 检查是否为音频文件输入模式
|
||||
private static boolean isAudioFileInput() {
|
||||
return !Config.audioFilePath.isEmpty();
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
realtime_dialog/java/whoareyou.wav
Normal file
BIN
realtime_dialog/java/whoareyou.wav
Normal file
Binary file not shown.
Reference in New Issue
Block a user