ggml-org
diff --git a/‎KK2-0905.sh‎
Lines changed: 1 addition & 1 deletion b/‎KK2-0905.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/server/public/index.html‎
Lines changed: 605 additions & 0 deletions b/‎tools/server/public/index.html‎
Lines changed: 605 additions & 0 deletions
diff --git a/‎tools/server/public/index.html.gz‎
-244 Bytes b/‎tools/server/public/index.html.gz‎
-244 Bytes
diff --git a/‎tools/server/webui/package-lock.json‎
Lines changed: 426 additions & 304 deletions b/‎tools/server/webui/package-lock.json‎
Lines changed: 426 additions & 304 deletions
diff --git a/‎tools/server/webui/src/components/ChatMessage.tsx‎
Lines changed: 30 additions & 11 deletions b/‎tools/server/webui/src/components/ChatMessage.tsx‎
Lines changed: 30 additions & 11 deletions
diff --git a/‎tools/server/webui/src/components/Header.tsx‎
Lines changed: 1 addition & 1 deletion b/‎tools/server/webui/src/components/Header.tsx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tools/server/webui/src/utils/app.context.tsx‎
Lines changed: 20 additions & 0 deletions b/‎tools/server/webui/src/utils/app.context.tsx‎
Lines changed: 20 additions & 0 deletions
@@ -12,4 +12,4 @@ MODEL_PATH="/Users/edsilmacstudio/Development/llama.cpp/models/Kimi-K2-Instruct-
   --no-warmup \
   --host 0.0.0.0 \
   --port 3000 \
-  --log-file "KK2_0905_log_3"
+  --log-file "KK2_0905_log_4"
@@ -1,4 +1,4 @@
-import { useMemo, useState } from 'react';
+import { useMemo, useEffect, useState } from 'react';
 import { useAppContext } from '../utils/app.context';
 import { Message, PendingMessage } from '../utils/types';
 import { classNames } from '../utils/misc';
@@ -55,6 +55,22 @@ export default function ChatMessage({
   const nextSibling = siblingLeafNodeIds[siblingCurrIdx + 1];
   const prevSibling = siblingLeafNodeIds[siblingCurrIdx - 1];
 
+  const { getConversationTokenTotal, addTokensToConversation } =
+    useAppContext();
+  const [hasAddedTokens, setHasAddedTokens] = useState(false);
+
+  // Get current conversation token total
+  const conversationTotal = getConversationTokenTotal(msg.convId);
+
+  // Add tokens to running total when timings are available
+  useEffect(() => {
+    if (timings && !hasAddedTokens && msg.role === 'assistant') {
+      const messageTokens = timings.prompt_n + timings.predicted_n;
+      addTokensToConversation(msg.convId, messageTokens);
+      setHasAddedTokens(true);
+    }
+  }, [timings, hasAddedTokens, msg.convId, msg.role, addTokensToConversation]);
+
   // for reasoning model, we split the message into content and thought
   // TODO: implement this as remark/rehype plugin in the future
   const { content, thought, isThinking }: SplitMessage = useMemo(() => {
@@ -175,19 +191,22 @@ export default function ChatMessage({
                     role="button"
                     className="cursor-pointer font-semibold text-sm opacity-60"
                   >
-                    Speed: {timings.predicted_per_second.toFixed(1)} t/s
+                    Speed test: {timings.predicted_per_second.toFixed(1)} t/s |
+                    Tokens: {timings.prompt_n + timings.predicted_n} this msg,{' '}
+                    {conversationTotal} total
                   </div>
                   <div className="dropdown-content bg-base-100 z-10 w-64 p-2 shadow mt-4">
-                    <b>Prompt</b>
-                    <br />- Tokens: {timings.prompt_n}
-                    <br />- Time: {timings.prompt_ms} ms
-                    <br />- Speed: {timings.prompt_per_second.toFixed(1)} t/s
-                    <br />
-                    <b>Generation</b>
-                    <br />- Tokens: {timings.predicted_n}
-                    <br />- Time: {timings.predicted_ms} ms
-                    <br />- Speed: {timings.predicted_per_second.toFixed(1)} t/s
+                    <b>This Exchange</b>
+                    <br />- Prompt: {timings.prompt_n} tokens
+                    <br />- Generation: {timings.predicted_n} tokens
+                    <br />- Subtotal: {timings.prompt_n +
+                      timings.predicted_n}{' '}
+                    tokens
+                    <br />- Speed test:{' '}
+                    {timings.predicted_per_second.toFixed(1)} t/s
                     <br />
+                    <b>Conversation Total</b>
+                    <br />- Used: {conversationTotal} tokens
                   </div>
                 </div>
               )}
 
@@ -47,7 +47,7 @@ export default function Header() {
         className="grow text-xl font-bold ml-2 truncate"
         title={`${fullFile}\nllama.cpp build ${build}`}
       >
-        llama.cpp: {modelName}
+        llama server.cpp: {modelName}
       </div>
 
       {/* action buttons (top right) */}
 
@@ -52,6 +52,10 @@ interface AppContextValue {
 
   // props
   serverProps: LlamaCppServerProps | null;
+
+  // Token tracking
+  getConversationTokenTotal: (convId: string) => number;
+  addTokensToConversation: (convId: string, tokens: number) => void;
 }
 
 // this callback is used for scrolling to the bottom of the chat and switching to the last node
@@ -93,6 +97,9 @@ export const AppContextProvider = ({
   const [config, setConfig] = useState(StorageUtils.getConfig());
   const [canvasData, setCanvasData] = useState<CanvasData | null>(null);
   const [showSettings, setShowSettings] = useState(false);
+  const [conversationTokenTotals, setConversationTokenTotals] = useState<
+    Record<string, number>
+  >({});
 
   // get server props
   useEffect(() => {
@@ -386,6 +393,17 @@ export const AppContextProvider = ({
     setConfig(config);
   };
 
+  const getConversationTokenTotal = (convId: string): number => {
+    return conversationTokenTotals[convId] || 0;
+  };
+
+  const addTokensToConversation = (convId: string, tokens: number) => {
+    setConversationTokenTotals((prev) => ({
+      ...prev,
+      [convId]: (prev[convId] || 0) + tokens,
+    }));
+  };
+
   return (
     <AppContext.Provider
       value={{
@@ -402,6 +420,8 @@ export const AppContextProvider = ({
         showSettings,
         setShowSettings,
         serverProps,
+        getConversationTokenTotal,
+        addTokensToConversation,
       }}
     >
       {children}
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ export default function Header() {`
`47`	`47`	`className="grow text-xl font-bold ml-2 truncate"`
`48`	`48`	title={`${fullFile}\nllama.cpp build ${build}`}
`49`	`49`	`>`
`50`		`- llama.cpp: {modelName}`
	`50`	`+ llama server.cpp: {modelName}`
`51`	`51`	`</div>`
`52`	`52`
`53`	`53`	`{/* action buttons (top right) */}`