Skip to content

Commit 4f56f67

Browse files
committed
Updated WEBUI with context data
1 parent 10d9017 commit 4f56f67

File tree

7 files changed

+1083
-317
lines changed

7 files changed

+1083
-317
lines changed

KK2-0905.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@ MODEL_PATH="/Users/edsilmacstudio/Development/llama.cpp/models/Kimi-K2-Instruct-
1212
--no-warmup \
1313
--host 0.0.0.0 \
1414
--port 3000 \
15-
--log-file "KK2_0905_log_3"
15+
--log-file "KK2_0905_log_4"

tools/server/public/index.html

Lines changed: 605 additions & 0 deletions
Large diffs are not rendered by default.

tools/server/public/index.html.gz

-244 Bytes
Binary file not shown.

tools/server/webui/package-lock.json

Lines changed: 426 additions & 304 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/server/webui/src/components/ChatMessage.tsx

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { useMemo, useState } from 'react';
1+
import { useMemo, useEffect, useState } from 'react';
22
import { useAppContext } from '../utils/app.context';
33
import { Message, PendingMessage } from '../utils/types';
44
import { classNames } from '../utils/misc';
@@ -55,6 +55,22 @@ export default function ChatMessage({
5555
const nextSibling = siblingLeafNodeIds[siblingCurrIdx + 1];
5656
const prevSibling = siblingLeafNodeIds[siblingCurrIdx - 1];
5757

58+
const { getConversationTokenTotal, addTokensToConversation } =
59+
useAppContext();
60+
const [hasAddedTokens, setHasAddedTokens] = useState(false);
61+
62+
// Get current conversation token total
63+
const conversationTotal = getConversationTokenTotal(msg.convId);
64+
65+
// Add tokens to running total when timings are available
66+
useEffect(() => {
67+
if (timings && !hasAddedTokens && msg.role === 'assistant') {
68+
const messageTokens = timings.prompt_n + timings.predicted_n;
69+
addTokensToConversation(msg.convId, messageTokens);
70+
setHasAddedTokens(true);
71+
}
72+
}, [timings, hasAddedTokens, msg.convId, msg.role, addTokensToConversation]);
73+
5874
// for reasoning model, we split the message into content and thought
5975
// TODO: implement this as remark/rehype plugin in the future
6076
const { content, thought, isThinking }: SplitMessage = useMemo(() => {
@@ -175,19 +191,22 @@ export default function ChatMessage({
175191
role="button"
176192
className="cursor-pointer font-semibold text-sm opacity-60"
177193
>
178-
Speed: {timings.predicted_per_second.toFixed(1)} t/s
194+
Speed test: {timings.predicted_per_second.toFixed(1)} t/s |
195+
Tokens: {timings.prompt_n + timings.predicted_n} this msg,{' '}
196+
{conversationTotal} total
179197
</div>
180198
<div className="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
181-
<b>Prompt</b>
182-
<br />- Tokens: {timings.prompt_n}
183-
<br />- Time: {timings.prompt_ms} ms
184-
<br />- Speed: {timings.prompt_per_second.toFixed(1)} t/s
185-
<br />
186-
<b>Generation</b>
187-
<br />- Tokens: {timings.predicted_n}
188-
<br />- Time: {timings.predicted_ms} ms
189-
<br />- Speed: {timings.predicted_per_second.toFixed(1)} t/s
199+
<b>This Exchange</b>
200+
<br />- Prompt: {timings.prompt_n} tokens
201+
<br />- Generation: {timings.predicted_n} tokens
202+
<br />- Subtotal: {timings.prompt_n +
203+
timings.predicted_n}{' '}
204+
tokens
205+
<br />- Speed test:{' '}
206+
{timings.predicted_per_second.toFixed(1)} t/s
190207
<br />
208+
<b>Conversation Total</b>
209+
<br />- Used: {conversationTotal} tokens
191210
</div>
192211
</div>
193212
)}

tools/server/webui/src/components/Header.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ export default function Header() {
4747
className="grow text-xl font-bold ml-2 truncate"
4848
title={`${fullFile}\nllama.cpp build ${build}`}
4949
>
50-
llama.cpp: {modelName}
50+
llama server.cpp: {modelName}
5151
</div>
5252

5353
{/* action buttons (top right) */}

tools/server/webui/src/utils/app.context.tsx

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ interface AppContextValue {
5252

5353
// props
5454
serverProps: LlamaCppServerProps | null;
55+
56+
// Token tracking
57+
getConversationTokenTotal: (convId: string) => number;
58+
addTokensToConversation: (convId: string, tokens: number) => void;
5559
}
5660

5761
// this callback is used for scrolling to the bottom of the chat and switching to the last node
@@ -93,6 +97,9 @@ export const AppContextProvider = ({
9397
const [config, setConfig] = useState(StorageUtils.getConfig());
9498
const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
9599
const [showSettings, setShowSettings] = useState(false);
100+
const [conversationTokenTotals, setConversationTokenTotals] = useState<
101+
Record<string, number>
102+
>({});
96103

97104
// get server props
98105
useEffect(() => {
@@ -386,6 +393,17 @@ export const AppContextProvider = ({
386393
setConfig(config);
387394
};
388395

396+
const getConversationTokenTotal = (convId: string): number => {
397+
return conversationTokenTotals[convId] || 0;
398+
};
399+
400+
const addTokensToConversation = (convId: string, tokens: number) => {
401+
setConversationTokenTotals((prev) => ({
402+
...prev,
403+
[convId]: (prev[convId] || 0) + tokens,
404+
}));
405+
};
406+
389407
return (
390408
<AppContext.Provider
391409
value={{
@@ -402,6 +420,8 @@ export const AppContextProvider = ({
402420
showSettings,
403421
setShowSettings,
404422
serverProps,
423+
getConversationTokenTotal,
424+
addTokensToConversation,
405425
}}
406426
>
407427
{children}

0 commit comments

Comments
 (0)