Skip to content

Commit 78219cd

Browse files
committed
faet: Displays real-time processing details (WIP)
Enhances user feedback during assistant message generation by displaying real-time processing status, including initialization, preparation, and token generation. Provides detailed insights like context usage, temperature, and top-p settings, and speculative decoding status to the user. Introduces a `useProcessingState` hook to manage and track the processing state. It utilizes a polling mechanism via slots service to get processing states.
1 parent a6d846e commit 78219cd

File tree

4 files changed

+334
-2
lines changed

4 files changed

+334
-2
lines changed

tools/server/webui/src/lib/components/app/chat/ChatMessages/ChatMessage.svelte

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import { copyToClipboard } from '$lib/utils/copy';
88
import { parseThinkingContent } from '$lib/utils/thinking';
99
import { isLoading } from '$lib/stores/chat.svelte';
10+
import { useProcessingState } from '$lib/hooks/use-processing-state';
1011
import { fade } from 'svelte/transition';
1112
1213
interface Props {
@@ -31,6 +32,8 @@
3132
let editedContent = $state(message.content);
3233
let textareaElement: HTMLTextAreaElement | undefined = $state();
3334
35+
const processingState = useProcessingState();
36+
3437
let thinkingContent = $derived.by(() => {
3538
if (message.role === 'assistant') {
3639
if (message.thinking) {
@@ -92,11 +95,19 @@
9295
}
9396
9497
function handleSaveEdit() {
95-
if (editedContent.trim() && editedContent !== message.content) {
98+
if (editedContent.trim() !== message.content) {
9699
onUpdateMessage?.(message, editedContent.trim());
97100
}
98101
isEditing = false;
99102
}
103+
104+
$effect(() => {
105+
if (message.role === 'assistant' && !message.content && isLoading()) {
106+
processingState.startMonitoring();
107+
} else {
108+
processingState.stopMonitoring();
109+
}
110+
});
100111
</script>
101112

102113
{#if message.role === 'user'}
@@ -186,8 +197,16 @@
186197
{#if config?.role === 'assistant' && !message.content && isLoading()}
187198
<div class="mx-auto w-full max-w-[48rem] mb-16" in:fade>
188199
<span class="processing-text">
189-
Processing
200+
{processingState.getProcessingMessage()}
190201
</span>
202+
203+
{#if processingState.shouldShowDetails()}
204+
<div class="processing-details">
205+
{#each processingState.getProcessingDetails() as detail}
206+
<span class="processing-detail">{detail}</span>
207+
{/each}
208+
</div>
209+
{/if}
191210
</div>
192211
{/if}
193212

@@ -260,6 +279,24 @@
260279
-webkit-text-fill-color: transparent;
261280
animation: shine 1s linear infinite;
262281
font-weight: 500;
282+
font-size: 0.875rem;
283+
}
284+
285+
.processing-details {
286+
display: flex;
287+
flex-wrap: wrap;
288+
justify-content: center;
289+
gap: 0.75rem;
290+
margin-top: 0.25rem;
291+
}
292+
293+
.processing-detail {
294+
color: var(--muted-foreground);
295+
font-size: 0.75rem;
296+
padding: 0.125rem 0.5rem;
297+
background: var(--muted);
298+
border-radius: 0.375rem;
299+
font-family: ui-monospace, SFMono-Regular, "SF Mono", Consolas, "Liberation Mono", Menlo, monospace;
263300
}
264301
265302
@keyframes shine {
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import { slotsService } from '$lib/services/slots';
2+
import type { ApiProcessingState } from '$lib/types/api';
3+
4+
export function useProcessingState() {
5+
let processingState = $state<ApiProcessingState | null>(null);
6+
let isPolling = $state(false);
7+
let unsubscribe: (() => void) | null = null;
8+
9+
function startMonitoring(): void {
10+
if (isPolling) return;
11+
12+
isPolling = true;
13+
14+
unsubscribe = slotsService.subscribe((state) => {
15+
processingState = state;
16+
});
17+
18+
slotsService.startPolling();
19+
}
20+
21+
function stopMonitoring(): void {
22+
if (!isPolling) return;
23+
24+
isPolling = false;
25+
processingState = null;
26+
27+
if (unsubscribe) {
28+
unsubscribe();
29+
unsubscribe = null;
30+
}
31+
32+
slotsService.stopPolling();
33+
}
34+
35+
function getProcessingMessage(): string {
36+
if (!processingState) {
37+
return 'Processing...';
38+
}
39+
40+
switch (processingState.status) {
41+
case 'initializing':
42+
return 'Initializing...';
43+
case 'preparing':
44+
return 'Preparing response...';
45+
case 'generating':
46+
if (processingState.tokensDecoded > 0) {
47+
return `Generating... (${processingState.tokensDecoded} tokens)`;
48+
}
49+
return 'Generating...';
50+
default:
51+
return 'Processing...';
52+
}
53+
}
54+
55+
function getProcessingDetails(): string[] {
56+
if (!processingState) {
57+
return [];
58+
}
59+
60+
const details: string[] = [];
61+
62+
if (processingState.contextUsed > 0) {
63+
const contextPercent = Math.round((processingState.contextUsed / processingState.contextTotal) * 100);
64+
details.push(`Context: ${processingState.contextUsed}/${processingState.contextTotal} (${contextPercent}%)`);
65+
}
66+
67+
if (processingState.temperature !== 0.8) {
68+
details.push(`Temperature: ${processingState.temperature.toFixed(1)}`);
69+
}
70+
71+
if (processingState.topP !== 0.95) {
72+
details.push(`Top-p: ${processingState.topP.toFixed(2)}`);
73+
}
74+
75+
if (processingState.speculative) {
76+
details.push('Speculative decoding enabled');
77+
}
78+
79+
return details;
80+
}
81+
82+
function shouldShowDetails(): boolean {
83+
return processingState !== null && processingState.status !== 'idle';
84+
}
85+
86+
return {
87+
get processingState() { return processingState; },
88+
get isPolling() { return isPolling; },
89+
startMonitoring,
90+
stopMonitoring,
91+
getProcessingMessage,
92+
getProcessingDetails,
93+
shouldShowDetails
94+
};
95+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import type { ApiSlotData, ApiProcessingState } from '$lib/types/api';
2+
3+
export class SlotsService {
4+
private baseUrl: string;
5+
private pollingInterval: number;
6+
private pollingTimer: number | null = null;
7+
private callbacks: Set<(state: ApiProcessingState) => void> = new Set();
8+
9+
constructor(baseUrl = '', pollingInterval = 500) {
10+
this.baseUrl = baseUrl;
11+
this.pollingInterval = pollingInterval;
12+
}
13+
14+
startPolling(): void {
15+
if (this.pollingTimer) {
16+
return;
17+
}
18+
19+
this.poll();
20+
this.pollingTimer = window.setInterval(() => {
21+
this.poll();
22+
}, this.pollingInterval);
23+
}
24+
25+
stopPolling(): void {
26+
if (this.pollingTimer) {
27+
clearInterval(this.pollingTimer);
28+
this.pollingTimer = null;
29+
}
30+
}
31+
32+
subscribe(callback: (state: ApiProcessingState) => void): () => void {
33+
this.callbacks.add(callback);
34+
return () => {
35+
this.callbacks.delete(callback);
36+
};
37+
}
38+
39+
private async poll(): Promise<void> {
40+
try {
41+
const response = await fetch(`${this.baseUrl}/slots`);
42+
if (!response.ok) {
43+
console.warn('Failed to fetch slots data:', response.statusText);
44+
return;
45+
}
46+
47+
const slots: ApiSlotData[] = await response.json();
48+
const processingState = this.parseProcessingState(slots);
49+
50+
this.callbacks.forEach(callback => {
51+
try {
52+
callback(processingState);
53+
} catch (error) {
54+
console.error('Error in slots callback:', error);
55+
}
56+
});
57+
} catch (error) {
58+
console.warn('Error polling slots:', error);
59+
}
60+
}
61+
62+
private parseProcessingState(slots: ApiSlotData[]): ApiProcessingState {
63+
const activeSlot = slots.find(slot => slot.id_task !== -1) || slots[0];
64+
65+
if (!activeSlot) {
66+
return {
67+
status: 'idle',
68+
tokensDecoded: 0,
69+
tokensRemaining: 0,
70+
contextUsed: 0,
71+
contextTotal: 4096,
72+
temperature: 0.8,
73+
topP: 0.95,
74+
speculative: false,
75+
hasNextToken: false
76+
};
77+
}
78+
79+
let status: ApiProcessingState['status'] = 'idle';
80+
81+
if (activeSlot.is_processing) {
82+
status = 'generating';
83+
} else if (activeSlot.next_token.n_decoded === 0 && activeSlot.id_task !== -1) {
84+
status = 'initializing';
85+
} else if (!activeSlot.next_token.has_next_token && activeSlot.id_task !== -1) {
86+
status = 'preparing';
87+
}
88+
89+
// Calculate context usage (estimate based on prompt length and decoded tokens)
90+
const promptTokens = Math.floor(activeSlot.prompt.length / 4); // Rough estimate
91+
const contextUsed = promptTokens + activeSlot.next_token.n_decoded;
92+
93+
return {
94+
status,
95+
tokensDecoded: activeSlot.next_token.n_decoded,
96+
tokensRemaining: activeSlot.next_token.n_remain,
97+
contextUsed,
98+
contextTotal: activeSlot.n_ctx,
99+
temperature: activeSlot.params.temperature,
100+
topP: activeSlot.params.top_p,
101+
speculative: activeSlot.speculative,
102+
hasNextToken: activeSlot.next_token.has_next_token
103+
};
104+
}
105+
106+
async getCurrentState(): Promise<ApiProcessingState | null> {
107+
try {
108+
const response = await fetch(`${this.baseUrl}/slots`);
109+
if (!response.ok) {
110+
return null;
111+
}
112+
113+
const slots: ApiSlotData[] = await response.json();
114+
return this.parseProcessingState(slots);
115+
} catch (error) {
116+
console.warn('Error fetching current slots state:', error);
117+
return null;
118+
}
119+
}
120+
}
121+
122+
export const slotsService = new SlotsService();

tools/server/webui/src/lib/types/api.d.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,81 @@ export interface ApiChatCompletionResponse {
148148
};
149149
}>;
150150
}
151+
152+
export interface ApiSlotData {
153+
id: number;
154+
id_task: number;
155+
n_ctx: number;
156+
speculative: boolean;
157+
is_processing: boolean;
158+
params: {
159+
n_predict: number;
160+
seed: number;
161+
temperature: number;
162+
dynatemp_range: number;
163+
dynatemp_exponent: number;
164+
top_k: number;
165+
top_p: number;
166+
min_p: number;
167+
top_n_sigma: number;
168+
xtc_probability: number;
169+
xtc_threshold: number;
170+
typical_p: number;
171+
repeat_last_n: number;
172+
repeat_penalty: number;
173+
presence_penalty: number;
174+
frequency_penalty: number;
175+
dry_multiplier: number;
176+
dry_base: number;
177+
dry_allowed_length: number;
178+
dry_penalty_last_n: number;
179+
dry_sequence_breakers: string[];
180+
mirostat: number;
181+
mirostat_tau: number;
182+
mirostat_eta: number;
183+
stop: string[];
184+
max_tokens: number;
185+
n_keep: number;
186+
n_discard: number;
187+
ignore_eos: boolean;
188+
stream: boolean;
189+
logit_bias: any[];
190+
n_probs: number;
191+
min_keep: number;
192+
grammar: string;
193+
grammar_lazy: boolean;
194+
grammar_triggers: any[];
195+
preserved_tokens: any[];
196+
chat_format: string;
197+
reasoning_format: string;
198+
reasoning_in_content: boolean;
199+
thinking_forced_open: boolean;
200+
samplers: string[];
201+
'speculative.n_max': number;
202+
'speculative.n_min': number;
203+
'speculative.p_min': number;
204+
timings_per_token: boolean;
205+
post_sampling_probs: boolean;
206+
lora: any[];
207+
};
208+
prompt: string;
209+
next_token: {
210+
has_next_token: boolean;
211+
has_new_line: boolean;
212+
n_remain: number;
213+
n_decoded: number;
214+
stopping_word: string;
215+
};
216+
}
217+
218+
export interface ApiProcessingState {
219+
status: 'initializing' | 'generating' | 'preparing' | 'idle';
220+
tokensDecoded: number;
221+
tokensRemaining: number;
222+
contextUsed: number;
223+
contextTotal: number;
224+
temperature: number;
225+
topP: number;
226+
speculative: boolean;
227+
hasNextToken: boolean;
228+
}

0 commit comments

Comments
 (0)