Skip to content

Commit a6d846e

Browse files
committed
feat: Enables audio recording in chat form
Adds the ability to record and send audio messages directly from the chat form. This includes: - Implementing audio recording functionality using the MediaRecorder API. - Providing UI elements (mic button) to control the recording process. - Converting recorded audio to WAV format for broader compatibility. - Supporting text files preview.
1 parent 5d1d146 commit a6d846e

File tree

4 files changed

+270
-20
lines changed

4 files changed

+270
-20
lines changed

tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentFilePreview.svelte

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
<script lang="ts">
22
import { Button } from '$lib/components/ui/button';
3+
import { FileTypeCategory, TextMimeType } from '$lib/constants/supported-file-types';
34
import { X } from '@lucide/svelte';
45
import { stopPropagation } from 'svelte/legacy';
56
@@ -44,7 +45,7 @@
4445
}
4546
</script>
4647

47-
{#if type === 'text/plain' || type === 'text'}
48+
{#if type === TextMimeType.PLAIN || type === FileTypeCategory.TEXT}
4849
{#if readonly}
4950
<!-- Readonly mode (ChatMessage) -->
5051
<button

tools/server/webui/src/lib/components/app/chat/ChatForm/ChatForm.svelte

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@
44
import { inputClasses } from '$lib/constants/input-classes';
55
import { onMount } from 'svelte';
66
import { config } from '$lib/stores/settings.svelte';
7+
import {
8+
AudioRecorder,
9+
convertToWav,
10+
createAudioFile,
11+
isAudioRecordingSupported
12+
} from '$lib/utils/audio-recording';
13+
import { TextMimeType } from '$lib/constants/supported-file-types';
714
815
interface Props {
916
class?: string;
@@ -29,27 +36,17 @@
2936
uploadedFiles = $bindable([]),
3037
}: Props = $props();
3138
32-
// Get settings
3339
const currentConfig = $derived(config());
3440
const pasteLongTextToFileLength = $derived(Number(currentConfig.pasteLongTextToFileLen) || 2500);
3541
36-
let message = $state('');
42+
let audioRecorder: AudioRecorder | undefined;
43+
let isRecording = $state(false);
3744
let fileInputRef: ChatFormFileInputInvisible | undefined;
45+
let message = $state('');
3846
let previousIsLoading = $state(isLoading);
47+
let recordingSupported = $state(false);
3948
let textareaRef: ChatFormTextarea | undefined;
4049
41-
onMount(() => {
42-
textareaRef?.focus();
43-
});
44-
45-
$effect(() => {
46-
if (previousIsLoading && !isLoading) {
47-
textareaRef?.focus();
48-
}
49-
50-
previousIsLoading = isLoading;
51-
});
52-
5350
async function handleKeydown(event: KeyboardEvent) {
5451
if (event.key === 'Enter' && !event.shiftKey) {
5552
event.preventDefault();
@@ -95,7 +92,7 @@
9592
return;
9693
}
9794
98-
const text = event.clipboardData.getData('text/plain');
95+
const text = event.clipboardData.getData(TextMimeType.PLAIN);
9996
10097
if (
10198
text.length > 0 &&
@@ -105,7 +102,7 @@
105102
event.preventDefault();
106103
107104
const textFile = new File([text], 'Pasted', {
108-
type: 'text/plain'
105+
type: TextMimeType.PLAIN
109106
});
110107
111108
onFileUpload?.([textFile]);
@@ -135,6 +132,48 @@
135132
function handleStop() {
136133
onStop?.();
137134
}
135+
136+
async function handleMicClick() {
137+
if (!audioRecorder || !recordingSupported) {
138+
console.warn('Audio recording not supported');
139+
return;
140+
}
141+
142+
if (isRecording) {
143+
try {
144+
const audioBlob = await audioRecorder.stopRecording();
145+
const wavBlob = await convertToWav(audioBlob);
146+
const audioFile = createAudioFile(wavBlob);
147+
148+
onFileUpload?.([audioFile]);
149+
isRecording = false;
150+
} catch (error) {
151+
console.error('Failed to stop recording:', error);
152+
isRecording = false;
153+
}
154+
} else {
155+
try {
156+
await audioRecorder.startRecording();
157+
isRecording = true;
158+
} catch (error) {
159+
console.error('Failed to start recording:', error);
160+
}
161+
}
162+
}
163+
164+
onMount(() => {
165+
textareaRef?.focus();
166+
recordingSupported = isAudioRecordingSupported();
167+
audioRecorder = new AudioRecorder();
168+
});
169+
170+
$effect(() => {
171+
if (previousIsLoading && !isLoading) {
172+
textareaRef?.focus();
173+
}
174+
175+
previousIsLoading = isLoading;
176+
});
138177
</script>
139178

140179
<ChatFormFileInputInvisible bind:this={fileInputRef} onFileSelect={handleFileSelect} />
@@ -157,10 +196,12 @@
157196
/>
158197

159198
<ChatFormActionButtons
199+
canSend={message.trim().length > 0 || uploadedFiles.length > 0}
160200
{disabled}
161201
{isLoading}
162-
canSend={message.trim().length > 0 || uploadedFiles.length > 0}
202+
{isRecording}
163203
onFileUpload={handleFileUpload}
204+
onMicClick={handleMicClick}
164205
onStop={handleStop}
165206
/>
166207
</div>

tools/server/webui/src/lib/constants/supported-file-types.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ export enum ImageFileType {
2222

2323
export enum AudioFileType {
2424
MP3 = 'mp3',
25-
WAV = 'wav'
25+
WAV = 'wav',
26+
WEBM = 'webm'
2627
}
2728

2829
export enum PdfFileType {
@@ -129,7 +130,10 @@ export enum ImageMimeType {
129130
export enum AudioMimeType {
130131
MP3_MPEG = 'audio/mpeg',
131132
MP3 = 'audio/mp3',
132-
WAV = 'audio/wav'
133+
MP4 = 'audio/mp4',
134+
WAV = 'audio/wav',
135+
WEBM = 'audio/webm',
136+
WEBM_OPUS = 'audio/webm;codecs=opus'
133137
}
134138

135139
export enum PdfMimeType {
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
import { AudioMimeType } from "$lib/constants/supported-file-types";
2+
3+
export interface AudioRecordingOptions {
4+
mimeType?: string;
5+
audioBitsPerSecond?: number;
6+
}
7+
8+
export class AudioRecorder {
9+
private mediaRecorder: MediaRecorder | null = null;
10+
private audioChunks: Blob[] = [];
11+
private stream: MediaStream | null = null;
12+
private recordingState: boolean = false;
13+
14+
async startRecording(options: AudioRecordingOptions = {}): Promise<void> {
15+
try {
16+
this.stream = await navigator.mediaDevices.getUserMedia({
17+
audio: {
18+
echoCancellation: true,
19+
noiseSuppression: true,
20+
autoGainControl: true
21+
}
22+
});
23+
24+
this.initializeRecorder(this.stream);
25+
26+
this.audioChunks = [];
27+
// Start recording with a small timeslice to ensure we get data
28+
this.mediaRecorder!.start(100);
29+
this.recordingState = true;
30+
} catch (error) {
31+
console.error('Failed to start recording:', error);
32+
throw new Error('Failed to access microphone. Please check permissions.');
33+
}
34+
}
35+
36+
async stopRecording(): Promise<Blob> {
37+
return new Promise((resolve, reject) => {
38+
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive') {
39+
reject(new Error('No active recording to stop'));
40+
return;
41+
}
42+
43+
this.mediaRecorder.onstop = () => {
44+
const mimeType = this.mediaRecorder?.mimeType || AudioMimeType.WAV;
45+
const audioBlob = new Blob(this.audioChunks, { type: mimeType });
46+
47+
this.cleanup();
48+
49+
resolve(audioBlob);
50+
};
51+
52+
this.mediaRecorder.onerror = (event) => {
53+
console.error('Recording error:', event);
54+
this.cleanup();
55+
reject(new Error('Recording failed'));
56+
};
57+
58+
this.mediaRecorder.stop();
59+
});
60+
}
61+
62+
isRecording(): boolean {
63+
return this.recordingState;
64+
}
65+
66+
cancelRecording(): void {
67+
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
68+
this.mediaRecorder.stop();
69+
}
70+
this.cleanup();
71+
}
72+
73+
private initializeRecorder(stream: MediaStream): void {
74+
const options: MediaRecorderOptions = {};
75+
76+
if (MediaRecorder.isTypeSupported(AudioMimeType.WAV)) {
77+
options.mimeType = AudioMimeType.WAV;
78+
} else if (MediaRecorder.isTypeSupported(AudioMimeType.WEBM_OPUS)) {
79+
options.mimeType = AudioMimeType.WEBM_OPUS;
80+
} else if (MediaRecorder.isTypeSupported(AudioMimeType.WEBM)) {
81+
options.mimeType = AudioMimeType.WEBM;
82+
} else if (MediaRecorder.isTypeSupported(AudioMimeType.MP4)) {
83+
options.mimeType = AudioMimeType.MP4;
84+
} else {
85+
console.warn('No preferred audio format supported, using default');
86+
}
87+
88+
this.mediaRecorder = new MediaRecorder(stream, options);
89+
90+
this.mediaRecorder.ondataavailable = (event) => {
91+
if (event.data.size > 0) {
92+
this.audioChunks.push(event.data);
93+
}
94+
};
95+
96+
this.mediaRecorder.onstop = () => {
97+
this.recordingState = false;
98+
};
99+
100+
this.mediaRecorder.onerror = (event) => {
101+
console.error('MediaRecorder error:', event);
102+
this.recordingState = false;
103+
};
104+
}
105+
106+
private cleanup(): void {
107+
if (this.stream) {
108+
this.stream.getTracks().forEach(track => track.stop());
109+
this.stream = null;
110+
}
111+
this.mediaRecorder = null;
112+
this.audioChunks = [];
113+
this.recordingState = false;
114+
}
115+
}
116+
117+
export async function convertToWav(audioBlob: Blob): Promise<Blob> {
118+
try {
119+
if (audioBlob.type.includes('wav')) {
120+
return audioBlob;
121+
}
122+
123+
const arrayBuffer = await audioBlob.arrayBuffer();
124+
125+
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
126+
127+
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);
128+
129+
const wavBlob = audioBufferToWav(audioBuffer);
130+
131+
audioContext.close();
132+
133+
return wavBlob;
134+
} catch (error) {
135+
console.error('Failed to convert audio to WAV:', error);
136+
return audioBlob;
137+
}
138+
}
139+
140+
function audioBufferToWav(buffer: AudioBuffer): Blob {
141+
const length = buffer.length;
142+
const numberOfChannels = buffer.numberOfChannels;
143+
const sampleRate = buffer.sampleRate;
144+
const bytesPerSample = 2; // 16-bit
145+
const blockAlign = numberOfChannels * bytesPerSample;
146+
const byteRate = sampleRate * blockAlign;
147+
const dataSize = length * blockAlign;
148+
const bufferSize = 44 + dataSize;
149+
150+
const arrayBuffer = new ArrayBuffer(bufferSize);
151+
const view = new DataView(arrayBuffer);
152+
153+
const writeString = (offset: number, string: string) => {
154+
for (let i = 0; i < string.length; i++) {
155+
view.setUint8(offset + i, string.charCodeAt(i));
156+
}
157+
};
158+
159+
writeString(0, 'RIFF'); // ChunkID
160+
view.setUint32(4, bufferSize - 8, true); // ChunkSize
161+
writeString(8, 'WAVE'); // Format
162+
writeString(12, 'fmt '); // Subchunk1ID
163+
view.setUint32(16, 16, true); // Subchunk1Size
164+
view.setUint16(20, 1, true); // AudioFormat (PCM)
165+
view.setUint16(22, numberOfChannels, true); // NumChannels
166+
view.setUint32(24, sampleRate, true); // SampleRate
167+
view.setUint32(28, byteRate, true); // ByteRate
168+
view.setUint16(32, blockAlign, true); // BlockAlign
169+
view.setUint16(34, 16, true); // BitsPerSample
170+
writeString(36, 'data'); // Subchunk2ID
171+
view.setUint32(40, dataSize, true); // Subchunk2Size
172+
173+
let offset = 44;
174+
for (let i = 0; i < length; i++) {
175+
for (let channel = 0; channel < numberOfChannels; channel++) {
176+
const sample = Math.max(-1, Math.min(1, buffer.getChannelData(channel)[i]));
177+
view.setInt16(offset, sample * 0x7FFF, true);
178+
offset += 2;
179+
}
180+
}
181+
182+
return new Blob([arrayBuffer], { type: AudioMimeType.WAV });
183+
}
184+
185+
export function createAudioFile(audioBlob: Blob, filename?: string): File {
186+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
187+
const extension = audioBlob.type.includes('wav') ? 'wav' : 'mp3';
188+
const defaultFilename = `recording-${timestamp}.${extension}`;
189+
190+
return new File([audioBlob], filename || defaultFilename, {
191+
type: audioBlob.type,
192+
lastModified: Date.now()
193+
});
194+
}
195+
196+
export function isAudioRecordingSupported(): boolean {
197+
return !!(
198+
typeof navigator !== 'undefined' &&
199+
navigator.mediaDevices &&
200+
typeof navigator.mediaDevices.getUserMedia === 'function' &&
201+
typeof window !== 'undefined' &&
202+
window.MediaRecorder
203+
);
204+
}

0 commit comments

Comments
 (0)