feat(s2s-text): dedicated text-mode prompt + Markdown rendering

Architecture fix: voice and text mode now have completely separate prompts.

Backend:
- VoiceAssistantProfileSupport.buildTextSystemRole: dedicated text-mode system
  role that inherits all business rules (identity, KB-first, sensitive topics,
  sales guidance, personal info) but removes voice-specific constraints (short
  sentences, colloquial, single-line conclusion).
- DEFAULT_TEXT_SPEAKING_STYLE: text-specific style demanding detailed,
  structured, Markdown-formatted answers with complete information.
- VoiceGatewayService.handleStart: switch between voice/text system role and
  speaking style based on state.textMode.
- VoiceGatewayService.buildStartSessionPayload: preserve Markdown in text mode
  (voice mode still strips asterisks/backticks via normalizeTextForSpeech to
  avoid TTS pronouncing format chars).

Frontend:
- Added react-markdown@9 + remark-gfm@4 dependencies.
- ChatPanel renders assistant messages (non-voice) with ReactMarkdown:
  headings, lists (ul/ol), bold, italic, inline/block code, tables, blockquote,
  links, horizontal rules — all styled with Tailwind classes matching the dark
  theme.
- User messages and voice-handoff messages remain plain text.

Verification: mvn test VoiceGatewaySmokeTest 20/20 pass, vite build succeeds.
This commit is contained in:
User
2026-04-17 10:10:20 +08:00
parent 4b78f81cbc
commit e145f1d97e
5 changed files with 1576 additions and 13 deletions

View File

@@ -13,6 +13,17 @@ public class VoiceAssistantProfileSupport {
public static final String DEFAULT_SPEAKING_STYLE = "整体语气亲切自然、轻快有温度,像熟悉行业的朋友在语音聊天。优先短句和口语化表达,先给结论,再补一句最有帮助的信息。不要播音腔,不要念稿,不要客服腔,不要过度热情,也不要输出任何思考过程。";
/**
* 文字对话专用 speaking_style鼓励详尽、结构化、可使用 Markdown 格式。
* 与语音模式的「短句口语化」形成对比 —— 文字用户需要完整信息。
*/
public static final String DEFAULT_TEXT_SPEAKING_STYLE = "这是文字对话,回答要详尽、结构化、专业,但不要啰嗦废话。"
+ "使用 Markdown 格式组织内容:关键词用**粗体**强调,要点用无序列表(- 开头步骤用有序列表1. 2. 3.),对比信息用表格,代码/命令用代码块。"
+ "回答结构建议:先给**核心结论**1-2句话然后展开【关键依据】【详细说明】【建议行动】等小节。"
+ "可以使用二级/三级标题组织长回答。段落分明,每段聚焦一个要点。"
+ "举例、数据、产品规格、成分列表必须完整呈现,不要为了简短牺牲信息完整性。"
+ "不要输出思考过程或元描述,不要说\"好的我来回答\"之类的引导语,直接进入正文。";
public AssistantProfile resolve(AssistantProfile profile) {
return AssistantProfile.resolve(profile);
}
@@ -60,6 +71,46 @@ public class VoiceAssistantProfileSupport {
+ "";
}
/**
* 文字模式专用 system_role
* - 继承所有业务规则(身份归属、知识库优先、敏感话题、成交引导、个人信息)
* - 移除语音特有约束("短句"、"口语化"、"先给结论再补一句"
* - 明确要求详尽、结构化、使用 Markdown
*/
public String buildTextSystemRole(AssistantProfile profile, String handoffSummary) {
AssistantProfile resolved = resolve(profile);
String displayName = getDisplayName(resolved);
String documentsClause = StringUtils.hasText(resolved.documents())
? ",拥有丰富的基础知识库信息" + resolved.documents().trim()
: ",拥有丰富的基础知识库信息";
String personalInfoBlock = buildPersonalInfoBlock(resolved);
String sensitiveTopicDirective = " 当涉及备孕、怀孕、孕期、哺乳、疾病治疗、用药搭配等敏感健康话题时,"
+ "你必须先根据知识库内容给出专业建议,然后引导用户联系大沃的专业健康管理顾问获取一对一指导。"
+ "绝对禁止说\"建议咨询医生\"\"咨询营养师\"\"咨询专业人士\"等笼统第三方建议,只能引导到大沃的健康管理顾问。";
String salesGuidance = buildSalesGuidanceDirective();
String summaryBlock = StringUtils.hasText(handoffSummary)
? "\n\n【对话历史摘要】以下是用户之前的对话记录请据此保持对话连贯性不要重复已回答的内容\n" + handoffSummary.trim()
: "";
String textFormatDirective = " 【文字对话格式要求】这是文字对话,用户能看到完整排版,回答必须详尽、结构化、专业。"
+ "使用 Markdown 格式:关键词**粗体**、要点用无序列表(- 开头、步骤用有序列表1. 2. 3.)、对比用表格、代码/命令用代码块。"
+ "长回答用二级/三级标题(## ###)组织小节,建议结构:先写**核心结论**1-2 句),再展开【关键依据】【详细说明】【建议行动】。"
+ "产品规格、成分、功效、用法、价格、活动政策等**必须完整呈现**,不得为了简短牺牲信息完整性。"
+ "不要输出\"好的我来回答\"之类的引导语,直接进入正文。";
return "你是名为" + displayName + "的AI智能体由一成系统自主研发专注于PM事业、FitLine产品和一成系统相关知识。"
+ "你不是字节跳动、抖音、火山引擎或任何第三方平台研发的助手;如果用户问到研发归属,只能回答\"我是一成系统自主研发的AI智能体\""
+ "你是专业、热情、有亲和力的事业伙伴,始终使用第一人称\"\",不要用第三人称描述自己。"
+ "回答要围绕产品、使用方法、搭配建议、常见问题和事业机会展开" + documentsClause + ""
+ "优先使用知识库信息自然转述不要照念需要查资料时直接调用search_knowledge工具绝不猜测或编造产品信息。"
+ "禁止输出思考过程或元描述。"
+ "禁止使用推脱式说法用户追问产品详情时必须调用search_knowledge工具查询不要凭自身知识回答具体产品信息。"
+ sensitiveTopicDirective
+ salesGuidance
+ personalInfoBlock
+ textFormatDirective
+ summaryBlock
+ "";
}
public String normalizeTextForSpeech(String text) {
return String.valueOf(text == null ? "" : text)
.replaceAll("^#{1,6}\\s*", "")

View File

@@ -278,8 +278,14 @@ public class VoiceGatewayService {
state.handoffSummary = StringUtils.hasText(redisSummary) ? redisSummary : buildDeterministicHandoffSummary(chatRepository.getHistoryForLlm(state.sessionId, 10));
// Always use backend-constructed system role (matches Node.js behavior).
// Frontend may send stale/default systemRole — never let it override.
state.systemRole = voiceAssistantProfileSupport.buildVoiceSystemRole(state.assistantProfile, state.handoffSummary);
state.speakingStyle = firstNonBlank(textValue(node.path("speakingStyle")), VoiceAssistantProfileSupport.DEFAULT_SPEAKING_STYLE);
// Text mode gets a dedicated prompt encouraging detailed, Markdown-formatted replies.
state.systemRole = state.textMode
? voiceAssistantProfileSupport.buildTextSystemRole(state.assistantProfile, state.handoffSummary)
: voiceAssistantProfileSupport.buildVoiceSystemRole(state.assistantProfile, state.handoffSummary);
String defaultStyle = state.textMode
? VoiceAssistantProfileSupport.DEFAULT_TEXT_SPEAKING_STYLE
: VoiceAssistantProfileSupport.DEFAULT_SPEAKING_STYLE;
state.speakingStyle = firstNonBlank(textValue(node.path("speakingStyle")), defaultStyle);
state.speaker = firstNonBlank(textValue(node.path("speaker")), properties.getDefaultSpeaker());
state.modelVersion = firstNonBlank(textValue(node.path("modelVersion")), "O");
state.greetingText = firstNonBlank(textValue(node.path("greetingText")), voiceAssistantProfileSupport.buildVoiceGreeting(state.assistantProfile));
@@ -375,8 +381,16 @@ public class VoiceGatewayService {
Map<String, Object> dialog = new LinkedHashMap<>();
dialog.put("dialog_id", "");
dialog.put("bot_name", state.botName);
dialog.put("system_role", voiceAssistantProfileSupport.normalizeTextForSpeech(ANTI_THINKING_PREFIX + " " + state.systemRole));
// Text mode keeps Markdown intact so LLM sees format directive; voice mode strips Markdown
// to avoid TTS pronouncing asterisks/backticks.
String rawSystemRole = ANTI_THINKING_PREFIX + " " + state.systemRole;
if (state.textMode) {
dialog.put("system_role", rawSystemRole);
dialog.put("speaking_style", state.speakingStyle);
} else {
dialog.put("system_role", voiceAssistantProfileSupport.normalizeTextForSpeech(rawSystemRole));
dialog.put("speaking_style", voiceAssistantProfileSupport.normalizeTextForSpeech(state.speakingStyle));
}
String inputMod = state.textMode ? "text" : "audio";
dialog.put("extra", Map.of("input_mod", inputMod, "model", state.modelVersion, "strict_audit", false, "audit_response", "抱歉,这个问题我暂时无法回答。"));
Map<String, Object> payload = new LinkedHashMap<>();

File diff suppressed because it is too large Load Diff

View File

@@ -13,7 +13,9 @@
"axios": "^1.6.2",
"lucide-react": "^0.344.0",
"react": "^18.2.0",
"react-dom": "^18.2.0"
"react-dom": "^18.2.0",
"react-markdown": "^9.1.0",
"remark-gfm": "^4.0.1"
},
"devDependencies": {
"@tailwindcss/vite": "^4.0.0",

View File

@@ -1,5 +1,7 @@
import { useState, useRef, useEffect, useCallback } from 'react';
import { Send, Bot, User, Loader2, ArrowLeft, Sparkles, Wrench, StopCircle } from 'lucide-react';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import { startChatSession, sendMessageStream } from '../services/chatApi';
import { getSessionHistory } from '../services/voiceApi';
import { NativeVoiceService } from '../services/nativeVoiceService';
@@ -368,7 +370,38 @@ export default function ChatPanel({ sessionId, voiceSubtitles, settings, onBack,
: 'bg-slate-700/50 text-slate-200 rounded-tl-sm'
}`}
>
{msg.content}
{msg.role === 'assistant' && !msg.fromVoice ? (
<div className="markdown-body">
<ReactMarkdown
remarkPlugins={[remarkGfm]}
components={{
p: ({ node, ...props }) => <p className="mb-2 last:mb-0" {...props} />,
ul: ({ node, ...props }) => <ul className="list-disc pl-5 mb-2 space-y-0.5" {...props} />,
ol: ({ node, ...props }) => <ol className="list-decimal pl-5 mb-2 space-y-0.5" {...props} />,
li: ({ node, ...props }) => <li className="leading-relaxed" {...props} />,
h1: ({ node, ...props }) => <h1 className="text-base font-bold mt-2 mb-1.5" {...props} />,
h2: ({ node, ...props }) => <h2 className="text-sm font-bold mt-2 mb-1.5" {...props} />,
h3: ({ node, ...props }) => <h3 className="text-sm font-semibold mt-2 mb-1" {...props} />,
strong: ({ node, ...props }) => <strong className="font-semibold text-white" {...props} />,
em: ({ node, ...props }) => <em className="italic" {...props} />,
code: ({ node, inline, ...props }) => inline
? <code className="px-1 py-0.5 rounded bg-slate-900/60 text-violet-300 text-[12px]" {...props} />
: <code className="block px-2 py-1.5 rounded bg-slate-900/80 text-violet-200 text-[12px] overflow-x-auto my-1.5" {...props} />,
pre: ({ node, ...props }) => <pre className="my-1.5" {...props} />,
table: ({ node, ...props }) => <div className="overflow-x-auto my-2"><table className="min-w-full text-xs border-collapse" {...props} /></div>,
th: ({ node, ...props }) => <th className="border border-slate-600/40 px-2 py-1 bg-slate-900/40 font-semibold text-left" {...props} />,
td: ({ node, ...props }) => <td className="border border-slate-600/40 px-2 py-1" {...props} />,
blockquote: ({ node, ...props }) => <blockquote className="border-l-2 border-violet-500/50 pl-2 italic text-slate-300 my-1.5" {...props} />,
a: ({ node, ...props }) => <a className="text-violet-400 hover:text-violet-300 underline" target="_blank" rel="noopener noreferrer" {...props} />,
hr: () => <hr className="my-2 border-slate-600/30" />,
}}
>
{msg.content || ''}
</ReactMarkdown>
</div>
) : (
msg.content
)}
{msg.fromVoice && (
<span className="ml-1.5 text-[9px] text-slate-600 align-middle">🎙</span>
)}