dify
This commit is contained in:
10
dify/web/app/components/base/voice-input/index.module.css
Normal file
10
dify/web/app/components/base/voice-input/index.module.css
Normal file
@@ -0,0 +1,10 @@
|
||||
.wrapper {
|
||||
background: linear-gradient(131deg, #2250F2 0%, #0EBCF3 100%);
|
||||
box-shadow: 0px 4px 6px -2px rgba(16, 24, 40, 0.03), 0px 12px 16px -4px rgba(16, 24, 40, 0.08);
|
||||
}
|
||||
|
||||
.convert {
|
||||
background: linear-gradient(91.92deg, #104AE1 -1.74%, #0098EE 75.74%);
|
||||
background-clip: text;
|
||||
color: transparent;
|
||||
}
|
||||
499
dify/web/app/components/base/voice-input/index.stories.tsx
Normal file
499
dify/web/app/components/base/voice-input/index.stories.tsx
Normal file
@@ -0,0 +1,499 @@
|
||||
import type { Meta, StoryObj } from '@storybook/nextjs'
|
||||
import { useState } from 'react'
|
||||
|
||||
// Mock component since VoiceInput requires browser APIs and service dependencies
|
||||
const VoiceInputMock = ({ onConverted, onCancel }: any) => {
|
||||
const [state, setState] = useState<'idle' | 'recording' | 'converting'>('recording')
|
||||
const [duration, setDuration] = useState(0)
|
||||
|
||||
// Simulate recording
|
||||
useState(() => {
|
||||
const interval = setInterval(() => {
|
||||
setDuration(d => d + 1)
|
||||
}, 1000)
|
||||
return () => clearInterval(interval)
|
||||
})
|
||||
|
||||
const handleStop = () => {
|
||||
setState('converting')
|
||||
setTimeout(() => {
|
||||
onConverted('This is simulated transcribed text from voice input.')
|
||||
}, 2000)
|
||||
}
|
||||
|
||||
const minutes = Math.floor(duration / 60)
|
||||
const seconds = duration % 60
|
||||
|
||||
return (
|
||||
<div className="relative h-16 w-full overflow-hidden rounded-xl border-2 border-primary-600">
|
||||
<div className="absolute inset-[1.5px] flex items-center overflow-hidden rounded-[10.5px] bg-primary-25 py-[14px] pl-[14.5px] pr-[6.5px]">
|
||||
{/* Waveform visualization placeholder */}
|
||||
<div className="absolute bottom-0 left-0 flex h-4 w-full items-end gap-[3px] px-2">
|
||||
{Array.from({ length: 40 }).map((_, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="w-[2px] rounded-t bg-blue-200"
|
||||
style={{
|
||||
height: `${Math.random() * 100}%`,
|
||||
animation: state === 'recording' ? 'pulse 1s infinite' : 'none',
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{state === 'converting' && (
|
||||
<div className="mr-2 h-4 w-4 animate-spin rounded-full border-2 border-primary-700 border-t-transparent" />
|
||||
)}
|
||||
|
||||
<div className="z-10 grow">
|
||||
{state === 'recording' && (
|
||||
<div className="text-sm text-gray-500">Speaking...</div>
|
||||
)}
|
||||
{state === 'converting' && (
|
||||
<div className="text-sm text-gray-500">Converting to text...</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{state === 'recording' && (
|
||||
<div
|
||||
className="mr-1 flex h-8 w-8 cursor-pointer items-center justify-center rounded-lg hover:bg-primary-100"
|
||||
onClick={handleStop}
|
||||
>
|
||||
<div className="h-5 w-5 rounded bg-primary-600" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{state === 'converting' && (
|
||||
<div
|
||||
className="mr-1 flex h-8 w-8 cursor-pointer items-center justify-center rounded-lg hover:bg-gray-200"
|
||||
onClick={onCancel}
|
||||
>
|
||||
<span className="text-lg text-gray-500">×</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className={`w-[45px] pl-1 text-xs font-medium ${duration > 500 ? 'text-red-600' : 'text-gray-700'}`}>
|
||||
{`0${minutes}:${seconds >= 10 ? seconds : `0${seconds}`}`}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const meta = {
|
||||
title: 'Base/Data Entry/VoiceInput',
|
||||
component: VoiceInputMock,
|
||||
parameters: {
|
||||
layout: 'centered',
|
||||
docs: {
|
||||
description: {
|
||||
component: 'Voice input component for recording audio and converting speech to text. Features waveform visualization, recording timer (max 10 minutes), and audio-to-text conversion using js-audio-recorder.\n\n**Note:** This is a simplified mock for Storybook. The actual component requires microphone permissions and audio-to-text API.',
|
||||
},
|
||||
},
|
||||
},
|
||||
tags: ['autodocs'],
|
||||
} satisfies Meta<typeof VoiceInputMock>
|
||||
|
||||
export default meta
|
||||
type Story = StoryObj<typeof meta>
|
||||
|
||||
// Basic demo
|
||||
const VoiceInputDemo = () => {
|
||||
const [isRecording, setIsRecording] = useState(false)
|
||||
const [transcription, setTranscription] = useState('')
|
||||
|
||||
const handleStartRecording = () => {
|
||||
setIsRecording(true)
|
||||
setTranscription('')
|
||||
}
|
||||
|
||||
const handleConverted = (text: string) => {
|
||||
setTranscription(text)
|
||||
setIsRecording(false)
|
||||
}
|
||||
|
||||
const handleCancel = () => {
|
||||
setIsRecording(false)
|
||||
setTranscription('')
|
||||
}
|
||||
|
||||
return (
|
||||
<div style={{ width: '600px' }}>
|
||||
{!isRecording && (
|
||||
<button
|
||||
className="w-full rounded-lg bg-blue-600 px-4 py-3 font-medium text-white hover:bg-blue-700"
|
||||
onClick={handleStartRecording}
|
||||
>
|
||||
🎤 Start Voice Recording
|
||||
</button>
|
||||
)}
|
||||
|
||||
{isRecording && (
|
||||
<VoiceInputMock
|
||||
onConverted={handleConverted}
|
||||
onCancel={handleCancel}
|
||||
/>
|
||||
)}
|
||||
|
||||
{transcription && (
|
||||
<div className="mt-4 rounded-lg bg-gray-50 p-4">
|
||||
<div className="mb-2 text-xs font-medium text-gray-600">Transcription:</div>
|
||||
<div className="text-sm text-gray-800">{transcription}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Default state
|
||||
export const Default: Story = {
|
||||
render: () => <VoiceInputDemo />,
|
||||
}
|
||||
|
||||
// Recording state
|
||||
export const RecordingState: Story = {
|
||||
render: () => (
|
||||
<div style={{ width: '600px' }}>
|
||||
<VoiceInputMock
|
||||
onConverted={() => console.log('Converted')}
|
||||
onCancel={() => console.log('Cancelled')}
|
||||
/>
|
||||
<div className="mt-3 text-xs text-gray-500">
|
||||
Recording in progress with live waveform visualization
|
||||
</div>
|
||||
</div>
|
||||
),
|
||||
}
|
||||
|
||||
// Real-world example - Chat input with voice
|
||||
const ChatInputWithVoiceDemo = () => {
|
||||
const [message, setMessage] = useState('')
|
||||
const [isRecording, setIsRecording] = useState(false)
|
||||
|
||||
return (
|
||||
<div style={{ width: '700px' }} className="rounded-lg border border-gray-200 bg-white p-6">
|
||||
<h3 className="mb-4 text-lg font-semibold">Chat Interface</h3>
|
||||
|
||||
{/* Existing messages */}
|
||||
<div className="mb-4 h-64 space-y-3 overflow-y-auto">
|
||||
<div className="flex gap-3">
|
||||
<div className="flex h-8 w-8 items-center justify-center rounded-full bg-blue-500 text-sm text-white">
|
||||
U
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<div className="rounded-lg bg-gray-100 p-3 text-sm">
|
||||
Hello! How can I help you today?
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex gap-3">
|
||||
<div className="flex h-8 w-8 items-center justify-center rounded-full bg-green-500 text-sm text-white">
|
||||
A
|
||||
</div>
|
||||
<div className="flex-1">
|
||||
<div className="rounded-lg bg-blue-50 p-3 text-sm">
|
||||
I can assist you with various tasks. What would you like to know?
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Input area */}
|
||||
<div className="space-y-3">
|
||||
{!isRecording ? (
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
type="text"
|
||||
className="flex-1 rounded-lg border border-gray-300 px-4 py-3 text-sm"
|
||||
placeholder="Type a message..."
|
||||
value={message}
|
||||
onChange={e => setMessage(e.target.value)}
|
||||
/>
|
||||
<button
|
||||
className="rounded-lg bg-gray-100 px-4 py-3 hover:bg-gray-200"
|
||||
onClick={() => setIsRecording(true)}
|
||||
title="Voice input"
|
||||
>
|
||||
🎤
|
||||
</button>
|
||||
<button className="rounded-lg bg-blue-600 px-6 py-3 text-white hover:bg-blue-700">
|
||||
Send
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<VoiceInputMock
|
||||
onConverted={(text: string) => {
|
||||
setMessage(text)
|
||||
setIsRecording(false)
|
||||
}}
|
||||
onCancel={() => setIsRecording(false)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export const ChatInputWithVoice: Story = {
|
||||
render: () => <ChatInputWithVoiceDemo />,
|
||||
}
|
||||
|
||||
// Real-world example - Search with voice
|
||||
const SearchWithVoiceDemo = () => {
|
||||
const [searchQuery, setSearchQuery] = useState('')
|
||||
const [isRecording, setIsRecording] = useState(false)
|
||||
|
||||
return (
|
||||
<div style={{ width: '700px' }} className="rounded-lg border border-gray-200 bg-white p-6">
|
||||
<h3 className="mb-4 text-lg font-semibold">Voice Search</h3>
|
||||
|
||||
{!isRecording ? (
|
||||
<div className="flex gap-2">
|
||||
<div className="relative flex-1">
|
||||
<input
|
||||
type="text"
|
||||
className="w-full rounded-lg border border-gray-300 px-4 py-3 pl-10 text-sm"
|
||||
placeholder="Search or use voice..."
|
||||
value={searchQuery}
|
||||
onChange={e => setSearchQuery(e.target.value)}
|
||||
/>
|
||||
<span className="absolute left-3 top-1/2 -translate-y-1/2 text-gray-400">
|
||||
🔍
|
||||
</span>
|
||||
</div>
|
||||
<button
|
||||
className="rounded-lg bg-blue-600 px-4 py-3 text-white hover:bg-blue-700"
|
||||
onClick={() => setIsRecording(true)}
|
||||
>
|
||||
🎤 Voice Search
|
||||
</button>
|
||||
</div>
|
||||
) : (
|
||||
<VoiceInputMock
|
||||
onConverted={(text: string) => {
|
||||
setSearchQuery(text)
|
||||
setIsRecording(false)
|
||||
}}
|
||||
onCancel={() => setIsRecording(false)}
|
||||
/>
|
||||
)}
|
||||
|
||||
{searchQuery && !isRecording && (
|
||||
<div className="mt-4 rounded-lg bg-blue-50 p-4">
|
||||
<div className="mb-2 text-xs font-medium text-blue-900">
|
||||
Searching for: <strong>{searchQuery}</strong>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export const SearchWithVoice: Story = {
|
||||
render: () => <SearchWithVoiceDemo />,
|
||||
}
|
||||
|
||||
// Real-world example - Note taking
|
||||
const NoteTakingDemo = () => {
|
||||
const [notes, setNotes] = useState<string[]>([])
|
||||
const [isRecording, setIsRecording] = useState(false)
|
||||
|
||||
return (
|
||||
<div style={{ width: '700px' }} className="rounded-lg border border-gray-200 bg-white p-6">
|
||||
<div className="mb-4 flex items-center justify-between">
|
||||
<h3 className="text-lg font-semibold">Voice Notes</h3>
|
||||
<span className="text-sm text-gray-500">{notes.length} notes</span>
|
||||
</div>
|
||||
|
||||
<div className="mb-4">
|
||||
{!isRecording ? (
|
||||
<button
|
||||
className="flex w-full items-center justify-center gap-2 rounded-lg bg-red-500 px-4 py-3 font-medium text-white hover:bg-red-600"
|
||||
onClick={() => setIsRecording(true)}
|
||||
>
|
||||
<span className="text-xl">🎤</span>
|
||||
Record Voice Note
|
||||
</button>
|
||||
) : (
|
||||
<VoiceInputMock
|
||||
onConverted={(text: string) => {
|
||||
setNotes([...notes, text])
|
||||
setIsRecording(false)
|
||||
}}
|
||||
onCancel={() => setIsRecording(false)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="max-h-80 space-y-2 overflow-y-auto">
|
||||
{notes.length === 0 ? (
|
||||
<div className="py-12 text-center text-gray-400">
|
||||
No notes yet. Click the button above to start recording.
|
||||
</div>
|
||||
) : (
|
||||
notes.map((note, index) => (
|
||||
<div key={index} className="rounded-lg border border-gray-200 bg-gray-50 p-3">
|
||||
<div className="flex items-start justify-between">
|
||||
<div className="flex-1">
|
||||
<div className="mb-1 text-xs text-gray-500">Note {index + 1}</div>
|
||||
<div className="text-sm text-gray-800">{note}</div>
|
||||
</div>
|
||||
<button
|
||||
className="text-gray-400 hover:text-red-500"
|
||||
onClick={() => setNotes(notes.filter((_, i) => i !== index))}
|
||||
>
|
||||
×
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export const NoteTaking: Story = {
|
||||
render: () => <NoteTakingDemo />,
|
||||
}
|
||||
|
||||
// Real-world example - Form with voice
|
||||
const FormWithVoiceDemo = () => {
|
||||
const [formData, setFormData] = useState({
|
||||
name: '',
|
||||
description: '',
|
||||
})
|
||||
const [activeField, setActiveField] = useState<'name' | 'description' | null>(null)
|
||||
|
||||
return (
|
||||
<div style={{ width: '600px' }} className="rounded-lg border border-gray-200 bg-white p-6">
|
||||
<h3 className="mb-4 text-lg font-semibold">Create Product</h3>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
<label className="mb-2 block text-sm font-medium text-gray-700">
|
||||
Product Name
|
||||
</label>
|
||||
{activeField === 'name' ? (
|
||||
<VoiceInputMock
|
||||
onConverted={(text: string) => {
|
||||
setFormData({ ...formData, name: text })
|
||||
setActiveField(null)
|
||||
}}
|
||||
onCancel={() => setActiveField(null)}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex gap-2">
|
||||
<input
|
||||
type="text"
|
||||
className="flex-1 rounded-lg border border-gray-300 px-3 py-2 text-sm"
|
||||
placeholder="Enter product name..."
|
||||
value={formData.name}
|
||||
onChange={e => setFormData({ ...formData, name: e.target.value })}
|
||||
/>
|
||||
<button
|
||||
className="rounded-lg bg-gray-100 px-3 py-2 hover:bg-gray-200"
|
||||
onClick={() => setActiveField('name')}
|
||||
>
|
||||
🎤
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label className="mb-2 block text-sm font-medium text-gray-700">
|
||||
Description
|
||||
</label>
|
||||
{activeField === 'description' ? (
|
||||
<VoiceInputMock
|
||||
onConverted={(text: string) => {
|
||||
setFormData({ ...formData, description: text })
|
||||
setActiveField(null)
|
||||
}}
|
||||
onCancel={() => setActiveField(null)}
|
||||
/>
|
||||
) : (
|
||||
<div className="space-y-2">
|
||||
<textarea
|
||||
className="w-full rounded-lg border border-gray-300 px-3 py-2 text-sm"
|
||||
rows={4}
|
||||
placeholder="Enter product description..."
|
||||
value={formData.description}
|
||||
onChange={e => setFormData({ ...formData, description: e.target.value })}
|
||||
/>
|
||||
<button
|
||||
className="w-full rounded-lg bg-gray-100 px-3 py-2 text-sm hover:bg-gray-200"
|
||||
onClick={() => setActiveField('description')}
|
||||
>
|
||||
🎤 Use Voice Input
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<button className="w-full rounded-lg bg-blue-600 px-4 py-2 text-white hover:bg-blue-700">
|
||||
Create Product
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export const FormWithVoice: Story = {
|
||||
render: () => <FormWithVoiceDemo />,
|
||||
}
|
||||
|
||||
// Features showcase
|
||||
export const FeaturesShowcase: Story = {
|
||||
render: () => (
|
||||
<div style={{ width: '700px' }} className="rounded-lg border border-gray-200 bg-white p-6">
|
||||
<h3 className="mb-4 text-lg font-semibold">Voice Input Features</h3>
|
||||
|
||||
<div className="mb-6">
|
||||
<VoiceInputMock
|
||||
onConverted={() => undefined}
|
||||
onCancel={() => undefined}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-4">
|
||||
<div className="rounded-lg bg-blue-50 p-4">
|
||||
<div className="mb-2 text-sm font-medium text-blue-900">🎤 Audio Recording</div>
|
||||
<ul className="space-y-1 text-xs text-blue-800">
|
||||
<li>• Uses js-audio-recorder for browser-based recording</li>
|
||||
<li>• 16kHz sample rate, 16-bit, mono channel</li>
|
||||
<li>• Converts to MP3 format for transmission</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg bg-green-50 p-4">
|
||||
<div className="mb-2 text-sm font-medium text-green-900">📊 Waveform Visualization</div>
|
||||
<ul className="space-y-1 text-xs text-green-800">
|
||||
<li>• Real-time audio level display using Canvas API</li>
|
||||
<li>• Animated bars showing voice amplitude</li>
|
||||
<li>• Visual feedback during recording</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg bg-purple-50 p-4">
|
||||
<div className="mb-2 text-sm font-medium text-purple-900">⏱️ Time Limits</div>
|
||||
<ul className="space-y-1 text-xs text-purple-800">
|
||||
<li>• Maximum recording duration: 10 minutes (600 seconds)</li>
|
||||
<li>• Timer turns red after 8:20 (500 seconds)</li>
|
||||
<li>• Automatic stop at max duration</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div className="rounded-lg bg-orange-50 p-4">
|
||||
<div className="mb-2 text-sm font-medium text-orange-900">🔄 Audio-to-Text Conversion</div>
|
||||
<ul className="space-y-1 text-xs text-orange-800">
|
||||
<li>• Server-side speech-to-text processing</li>
|
||||
<li>• Optional word timestamps support</li>
|
||||
<li>• Loading state during conversion</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
),
|
||||
}
|
||||
217
dify/web/app/components/base/voice-input/index.tsx
Normal file
217
dify/web/app/components/base/voice-input/index.tsx
Normal file
@@ -0,0 +1,217 @@
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { useParams, usePathname } from 'next/navigation'
|
||||
import {
|
||||
RiCloseLine,
|
||||
RiLoader2Line,
|
||||
} from '@remixicon/react'
|
||||
import Recorder from 'js-audio-recorder'
|
||||
import { useRafInterval } from 'ahooks'
|
||||
import { convertToMp3 } from './utils'
|
||||
import s from './index.module.css'
|
||||
import cn from '@/utils/classnames'
|
||||
import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
|
||||
import { audioToText } from '@/service/share'
|
||||
|
||||
type VoiceInputTypes = {
|
||||
onConverted: (text: string) => void
|
||||
onCancel: () => void
|
||||
wordTimestamps?: string
|
||||
}
|
||||
|
||||
const VoiceInput = ({
|
||||
onCancel,
|
||||
onConverted,
|
||||
wordTimestamps,
|
||||
}: VoiceInputTypes) => {
|
||||
const { t } = useTranslation()
|
||||
const recorder = useRef(new Recorder({
|
||||
sampleBits: 16,
|
||||
sampleRate: 16000,
|
||||
numChannels: 1,
|
||||
compiling: false,
|
||||
}))
|
||||
const canvasRef = useRef<HTMLCanvasElement | null>(null)
|
||||
const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
|
||||
const drawRecordId = useRef<number | null>(null)
|
||||
const [originDuration, setOriginDuration] = useState(0)
|
||||
const [startRecord, setStartRecord] = useState(false)
|
||||
const [startConvert, setStartConvert] = useState(false)
|
||||
const pathname = usePathname()
|
||||
const params = useParams()
|
||||
const clearInterval = useRafInterval(() => {
|
||||
setOriginDuration(originDuration + 1)
|
||||
}, 1000)
|
||||
|
||||
const drawRecord = useCallback(() => {
|
||||
drawRecordId.current = requestAnimationFrame(drawRecord)
|
||||
const canvas = canvasRef.current!
|
||||
const ctx = ctxRef.current!
|
||||
const dataUnit8Array = recorder.current.getRecordAnalyseData()
|
||||
const dataArray = [].slice.call(dataUnit8Array)
|
||||
const lineLength = Number.parseInt(`${canvas.width / 3}`)
|
||||
const gap = Number.parseInt(`${1024 / lineLength}`)
|
||||
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||
ctx.beginPath()
|
||||
let x = 0
|
||||
for (let i = 0; i < lineLength; i++) {
|
||||
let v = dataArray.slice(i * gap, i * gap + gap).reduce((prev: number, next: number) => {
|
||||
return prev + next
|
||||
}, 0) / gap
|
||||
|
||||
if (v < 128)
|
||||
v = 128
|
||||
if (v > 178)
|
||||
v = 178
|
||||
const y = (v - 128) / 50 * canvas.height
|
||||
|
||||
ctx.moveTo(x, 16)
|
||||
if (ctx.roundRect)
|
||||
ctx.roundRect(x, 16 - y, 2, y, [1, 1, 0, 0])
|
||||
else
|
||||
ctx.rect(x, 16 - y, 2, y)
|
||||
ctx.fill()
|
||||
x += 3
|
||||
}
|
||||
ctx.closePath()
|
||||
}, [])
|
||||
const handleStopRecorder = useCallback(async () => {
|
||||
clearInterval()
|
||||
setStartRecord(false)
|
||||
setStartConvert(true)
|
||||
recorder.current.stop()
|
||||
if (drawRecordId.current)
|
||||
cancelAnimationFrame(drawRecordId.current)
|
||||
drawRecordId.current = null
|
||||
const canvas = canvasRef.current!
|
||||
const ctx = ctxRef.current!
|
||||
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||
const mp3Blob = convertToMp3(recorder.current)
|
||||
const mp3File = new File([mp3Blob], 'temp.mp3', { type: 'audio/mp3' })
|
||||
const formData = new FormData()
|
||||
formData.append('file', mp3File)
|
||||
formData.append('word_timestamps', wordTimestamps || 'disabled')
|
||||
|
||||
let url = ''
|
||||
let isPublic = false
|
||||
|
||||
if (params.token) {
|
||||
url = '/audio-to-text'
|
||||
isPublic = true
|
||||
}
|
||||
else if (params.appId) {
|
||||
if (pathname.search('explore/installed') > -1)
|
||||
url = `/installed-apps/${params.appId}/audio-to-text`
|
||||
else
|
||||
url = `/apps/${params.appId}/audio-to-text`
|
||||
}
|
||||
|
||||
try {
|
||||
const audioResponse = await audioToText(url, isPublic, formData)
|
||||
onConverted(audioResponse.text)
|
||||
onCancel()
|
||||
}
|
||||
catch {
|
||||
onConverted('')
|
||||
onCancel()
|
||||
}
|
||||
}, [clearInterval, onCancel, onConverted, params.appId, params.token, pathname, wordTimestamps])
|
||||
const handleStartRecord = async () => {
|
||||
try {
|
||||
await recorder.current.start()
|
||||
setStartRecord(true)
|
||||
setStartConvert(false)
|
||||
|
||||
if (canvasRef.current && ctxRef.current)
|
||||
drawRecord()
|
||||
}
|
||||
catch {
|
||||
onCancel()
|
||||
}
|
||||
}
|
||||
|
||||
const initCanvas = () => {
|
||||
const dpr = window.devicePixelRatio || 1
|
||||
const canvas = document.getElementById('voice-input-record') as HTMLCanvasElement
|
||||
|
||||
if (canvas) {
|
||||
const { width: cssWidth, height: cssHeight } = canvas.getBoundingClientRect()
|
||||
|
||||
canvas.width = dpr * cssWidth
|
||||
canvas.height = dpr * cssHeight
|
||||
canvasRef.current = canvas
|
||||
|
||||
const ctx = canvas.getContext('2d')
|
||||
if (ctx) {
|
||||
ctx.scale(dpr, dpr)
|
||||
ctx.fillStyle = 'rgba(209, 224, 255, 1)'
|
||||
ctxRef.current = ctx
|
||||
}
|
||||
}
|
||||
}
|
||||
if (originDuration >= 600 && startRecord)
|
||||
handleStopRecorder()
|
||||
|
||||
useEffect(() => {
|
||||
initCanvas()
|
||||
handleStartRecord()
|
||||
const recorderRef = recorder?.current
|
||||
return () => {
|
||||
recorderRef?.stop()
|
||||
}
|
||||
}, [])
|
||||
|
||||
const minutes = Number.parseInt(`${Number.parseInt(`${originDuration}`) / 60}`)
|
||||
const seconds = Number.parseInt(`${originDuration}`) % 60
|
||||
|
||||
return (
|
||||
<div className={cn(s.wrapper, 'absolute inset-0 rounded-xl')}>
|
||||
<div className='absolute inset-[1.5px] flex items-center overflow-hidden rounded-[10.5px] bg-primary-25 py-[14px] pl-[14.5px] pr-[6.5px]'>
|
||||
<canvas id='voice-input-record' className='absolute bottom-0 left-0 h-4 w-full' />
|
||||
{
|
||||
startConvert && <RiLoader2Line className='mr-2 h-4 w-4 animate-spin text-primary-700' />
|
||||
}
|
||||
<div className='grow'>
|
||||
{
|
||||
startRecord && (
|
||||
<div className='text-sm text-gray-500'>
|
||||
{t('common.voiceInput.speaking')}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
{
|
||||
startConvert && (
|
||||
<div className={cn(s.convert, 'text-sm')}>
|
||||
{t('common.voiceInput.converting')}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
{
|
||||
startRecord && (
|
||||
<div
|
||||
className='mr-1 flex h-8 w-8 cursor-pointer items-center justify-center rounded-lg hover:bg-primary-100'
|
||||
onClick={handleStopRecorder}
|
||||
>
|
||||
<StopCircle className='h-5 w-5 text-primary-600' />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
{
|
||||
startConvert && (
|
||||
<div
|
||||
className='mr-1 flex h-8 w-8 cursor-pointer items-center justify-center rounded-lg hover:bg-gray-200'
|
||||
onClick={onCancel}
|
||||
>
|
||||
<RiCloseLine className='h-4 w-4 text-gray-500' />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
<div className={`w-[45px] pl-1 text-xs font-medium ${originDuration > 500 ? 'text-[#F04438]' : 'text-gray-700'}`}>{`0${minutes.toFixed(0)}:${seconds >= 10 ? seconds : `0${seconds}`}`}</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default VoiceInput
|
||||
53
dify/web/app/components/base/voice-input/utils.ts
Normal file
53
dify/web/app/components/base/voice-input/utils.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
import lamejs from 'lamejs'
|
||||
import MPEGMode from 'lamejs/src/js/MPEGMode'
|
||||
import Lame from 'lamejs/src/js/Lame'
|
||||
import BitStream from 'lamejs/src/js/BitStream'
|
||||
|
||||
if (globalThis) {
|
||||
(globalThis as any).MPEGMode = MPEGMode
|
||||
;(globalThis as any).Lame = Lame
|
||||
;(globalThis as any).BitStream = BitStream
|
||||
}
|
||||
|
||||
export const convertToMp3 = (recorder: any) => {
|
||||
const wav = lamejs.WavHeader.readHeader(recorder.getWAV())
|
||||
const { channels, sampleRate } = wav
|
||||
const mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128)
|
||||
const result = recorder.getChannelData()
|
||||
const buffer: BlobPart[] = []
|
||||
|
||||
const leftData = result.left && new Int16Array(result.left.buffer, 0, result.left.byteLength / 2)
|
||||
const rightData = result.right && new Int16Array(result.right.buffer, 0, result.right.byteLength / 2)
|
||||
const remaining = leftData.length + (rightData ? rightData.length : 0)
|
||||
|
||||
const maxSamples = 1152
|
||||
const toArrayBuffer = (bytes: Int8Array) => {
|
||||
const arrayBuffer = new ArrayBuffer(bytes.length)
|
||||
new Uint8Array(arrayBuffer).set(bytes)
|
||||
return arrayBuffer
|
||||
}
|
||||
|
||||
for (let i = 0; i < remaining; i += maxSamples) {
|
||||
const left = leftData.subarray(i, i + maxSamples)
|
||||
let right = null
|
||||
let mp3buf = null
|
||||
|
||||
if (channels === 2) {
|
||||
right = rightData.subarray(i, i + maxSamples)
|
||||
mp3buf = mp3enc.encodeBuffer(left, right)
|
||||
}
|
||||
else {
|
||||
mp3buf = mp3enc.encodeBuffer(left)
|
||||
}
|
||||
|
||||
if (mp3buf.length > 0)
|
||||
buffer.push(toArrayBuffer(mp3buf))
|
||||
}
|
||||
|
||||
const enc = mp3enc.flush()
|
||||
|
||||
if (enc.length > 0)
|
||||
buffer.push(toArrayBuffer(enc))
|
||||
|
||||
return new Blob(buffer, { type: 'audio/mp3' })
|
||||
}
|
||||
Reference in New Issue
Block a user