Skip to content

Commit c440c9d

Browse files
authored
Merge pull request #28 from andrewginns/merbench-ui-tweaks
Fixes and tweaks to Merbench
2 parents 0844da2 + 3b2c106 commit c440c9d

File tree

7 files changed

+305
-30
lines changed

7 files changed

+305
-30
lines changed

public/_headers

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
Referrer-Policy: strict-origin-when-cross-origin
66
Permissions-Policy: camera=(), microphone=(), geolocation=(), payment=(), usb=(), vr=(), magnetometer=(), gyroscope=(), fullscreen=(self), accelerometer=()
77
Strict-Transport-Security: max-age=31536000; includeSubDomains; preload
8-
Content-Security-Policy: default-src 'self'; script-src 'self' https://cdn.plot.ly https://static.cloudflareinsights.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: https: blob:; font-src 'self' https://fonts.gstatic.com data:; connect-src 'self' https://api.github.com https://api.rss2json.com https://cloudflareinsights.com; media-src 'self'; object-src 'none'; base-uri 'self'; form-action 'self'; frame-ancestors 'none'; upgrade-insecure-requests; block-all-mixed-content
8+
Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.plot.ly https://static.cloudflareinsights.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: https: blob:; font-src 'self' https://fonts.gstatic.com data:; connect-src 'self' https://api.github.com https://api.rss2json.com https://cloudflareinsights.com; media-src 'self'; object-src 'none'; base-uri 'self'; form-action 'self'; frame-ancestors 'none'; upgrade-insecure-requests; block-all-mixed-content
99

1010
/_astro/*
1111
Cache-Control: public, max-age=31536000, immutable

src/components/merbench/CombinedFilters.astro

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -677,11 +677,11 @@ const { difficulties, providers } = Astro.props;
677677
(cb as HTMLInputElement).checked = true;
678678
});
679679

680-
// Update filter count and trigger change event
680+
// Update filter count and trigger change event on all checkboxes
681681
updateFilterCount();
682-
if (checkboxes.length > 0) {
683-
checkboxes[0].dispatchEvent(new Event('change', { bubbles: true }));
684-
}
682+
checkboxes.forEach((cb) => {
683+
cb.dispatchEvent(new Event('change', { bubbles: true }));
684+
});
685685
});
686686
}
687687

@@ -692,11 +692,11 @@ const { difficulties, providers } = Astro.props;
692692
(cb as HTMLInputElement).checked = false;
693693
});
694694

695-
// Update filter count and trigger change event
695+
// Update filter count and trigger change event on all checkboxes
696696
updateFilterCount();
697-
if (checkboxes.length > 0) {
698-
checkboxes[0].dispatchEvent(new Event('change', { bubbles: true }));
699-
}
697+
checkboxes.forEach((cb) => {
698+
cb.dispatchEvent(new Event('change', { bubbles: true }));
699+
});
700700
});
701701
}
702702

src/components/merbench/LeaderboardTable.astro

Lines changed: 85 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,47 @@ export interface Props {
77
}
88
99
const { leaderboard } = Astro.props;
10+
11+
// Calculate cost range for progress bar normalization
12+
const costs = leaderboard.map((entry) => entry.Avg_Cost || calculateCost(entry.Avg_Tokens));
13+
const minCost = Math.min(...costs);
14+
const maxCost = Math.max(...costs);
15+
const costRange = maxCost - minCost;
1016
---
1117

1218
<section class="leaderboard-section">
1319
<h2>Model Leaderboard</h2>
1420
<div class="leaderboard-table">
15-
<table>
21+
<table id="leaderboard-table">
1622
<thead>
1723
<tr>
1824
<th>Rank</th>
19-
<th>Model</th>
20-
<th>Success Rate</th>
21-
<th>Avg Cost/Run</th>
22-
<th>Avg Duration</th>
23-
<th>Avg Tokens</th>
24-
<th>Runs</th>
25-
<th>Provider</th>
25+
<th class="sortable" data-sort-key="Model" data-sort-type="string">
26+
Model <span class="sort-indicator"></span>
27+
</th>
28+
<th
29+
class="sortable active"
30+
data-sort-key="Success_Rate"
31+
data-sort-type="number"
32+
data-sort-direction="desc"
33+
>
34+
Success Rate <span class="sort-indicator">↓</span>
35+
</th>
36+
<th class="sortable" data-sort-key="Avg_Cost" data-sort-type="number">
37+
Avg Cost/Run <span class="sort-indicator"></span>
38+
</th>
39+
<th class="sortable" data-sort-key="Avg_Duration" data-sort-type="number">
40+
Avg Duration <span class="sort-indicator"></span>
41+
</th>
42+
<th class="sortable" data-sort-key="Avg_Tokens" data-sort-type="number">
43+
Avg Tokens <span class="sort-indicator"></span>
44+
</th>
45+
<th class="sortable" data-sort-key="Runs" data-sort-type="number">
46+
Runs <span class="sort-indicator"></span>
47+
</th>
48+
<th class="sortable" data-sort-key="Provider" data-sort-type="string">
49+
Provider <span class="sort-indicator"></span>
50+
</th>
2651
</tr>
2752
</thead>
2853
<tbody>
@@ -46,7 +71,17 @@ const { leaderboard } = Astro.props;
4671
<span class="progress-text">{entry.Success_Rate.toFixed(1)}%</span>
4772
</div>
4873
</td>
49-
<td class="cost">${calculateCost(entry.Avg_Tokens).toFixed(4)}</td>
74+
<td class="cost">
75+
<div class="progress-bar">
76+
<div
77+
class="progress-fill progress-fill--cost"
78+
style={`width: ${costRange > 0 ? ((entry.Avg_Cost || calculateCost(entry.Avg_Tokens)) / maxCost) * 100 : 0}%`}
79+
/>
80+
<span class="progress-text">
81+
${(entry.Avg_Cost || calculateCost(entry.Avg_Tokens)).toFixed(4)}
82+
</span>
83+
</div>
84+
</td>
5085
<td class="duration">{entry.Avg_Duration.toFixed(2)}s</td>
5186
<td class="tokens">{entry.Avg_Tokens.toLocaleString()}</td>
5287
<td class="runs">{entry.Runs}</td>
@@ -163,6 +198,42 @@ const { leaderboard } = Astro.props;
163198
letter-spacing: 0.5px;
164199
}
165200

201+
/* Sortable header styles */
202+
.sortable {
203+
cursor: pointer;
204+
user-select: none;
205+
position: relative;
206+
transition: background-color 0.2s ease;
207+
}
208+
209+
.sortable:hover {
210+
background-color: var(--bg-tertiary);
211+
color: var(--text-primary);
212+
}
213+
214+
.sortable.active {
215+
/* No special styling - only the arrow indicator shows active state */
216+
}
217+
218+
.sort-indicator {
219+
font-size: 0.8rem;
220+
opacity: 0.5;
221+
margin-left: 0.25rem;
222+
}
223+
224+
.sortable.active .sort-indicator {
225+
opacity: 1;
226+
color: var(--accent-primary);
227+
}
228+
229+
.sortable:not(.active) .sort-indicator {
230+
opacity: 0;
231+
}
232+
233+
.sortable:hover:not(.active) .sort-indicator {
234+
opacity: 0.3;
235+
}
236+
166237
tbody tr:hover {
167238
background-color: var(--bg-primary);
168239
}
@@ -265,6 +336,11 @@ const { leaderboard } = Astro.props;
265336
background-color: var(--progress-low);
266337
}
267338

339+
/* Cost progress bar - single muted color */
340+
.progress-fill--cost {
341+
background-color: #9ca3af; /* Muted gray for all cost bars */
342+
}
343+
268344
@media (max-width: 768px) {
269345
.leaderboard-section {
270346
padding: 0.5rem;

src/lib/merbench.ts

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import type {
55
FailureAnalysisData,
66
ParetoData,
77
ModelStats,
8+
LeaderboardEntry,
89
} from './merbench-types';
910

1011
// Calculate cost per run (simplified pricing model) - DEPRECATED
@@ -269,6 +270,60 @@ const calculateParetoFrontier = (data: Array<{ cost: number; Success_Rate: numbe
269270
return paretoPoints;
270271
};
271272

273+
// Sorting utilities
274+
let currentSortKey = 'Success_Rate';
275+
let currentSortDirection: 'asc' | 'desc' = 'desc';
276+
277+
export const sortLeaderboard = (
278+
data: LeaderboardEntry[],
279+
sortKey: string,
280+
direction: 'asc' | 'desc'
281+
): LeaderboardEntry[] => {
282+
const sorted = [...data].sort((a, b) => {
283+
let aVal: any;
284+
let bVal: any;
285+
286+
// Handle special cases for cost calculation
287+
if (sortKey === 'Avg_Cost') {
288+
aVal = a.Avg_Cost || calculateCost(a.Avg_Tokens);
289+
bVal = b.Avg_Cost || calculateCost(b.Avg_Tokens);
290+
} else {
291+
aVal = a[sortKey as keyof LeaderboardEntry];
292+
bVal = b[sortKey as keyof LeaderboardEntry];
293+
}
294+
295+
// Handle null/undefined values
296+
if (aVal == null && bVal == null) return 0;
297+
if (aVal == null) return direction === 'asc' ? -1 : 1;
298+
if (bVal == null) return direction === 'asc' ? 1 : -1;
299+
300+
// Numeric comparison
301+
if (typeof aVal === 'number' && typeof bVal === 'number') {
302+
return direction === 'asc' ? aVal - bVal : bVal - aVal;
303+
}
304+
305+
// String comparison
306+
const aStr = String(aVal).toLowerCase();
307+
const bStr = String(bVal).toLowerCase();
308+
309+
if (aStr < bStr) return direction === 'asc' ? -1 : 1;
310+
if (aStr > bStr) return direction === 'asc' ? 1 : -1;
311+
return 0;
312+
});
313+
314+
return sorted;
315+
};
316+
317+
export const setSortState = (sortKey: string, direction: 'asc' | 'desc'): void => {
318+
currentSortKey = sortKey;
319+
currentSortDirection = direction;
320+
};
321+
322+
export const getSortState = () => ({
323+
key: currentSortKey,
324+
direction: currentSortDirection,
325+
});
326+
272327
// DOM manipulation utilities
273328
export const updateSummaryStats = (filteredData: FilteredData): void => {
274329
const totalRuns = filteredData.rawData.length;
@@ -303,9 +358,20 @@ export const updateLeaderboard = (filteredData: FilteredData): void => {
303358
const tbody = document.querySelector('.leaderboard-table tbody');
304359
if (!tbody) return;
305360

361+
// Calculate cost range for progress bar normalization
362+
const costs = filteredData.leaderboard.map(
363+
(entry) => entry.Avg_Cost || calculateCost(entry.Avg_Tokens)
364+
);
365+
const minCost = Math.min(...costs);
366+
const maxCost = Math.max(...costs);
367+
const costRange = maxCost - minCost;
368+
306369
tbody.innerHTML = filteredData.leaderboard
307-
.map(
308-
(entry, index) => `
370+
.map((entry, index) => {
371+
const currentCost = entry.Avg_Cost || calculateCost(entry.Avg_Tokens);
372+
const costWidth = costRange > 0 ? (currentCost / maxCost) * 100 : 0;
373+
374+
return `
309375
<tr>
310376
<td class="rank">${index + 1}</td>
311377
<td class="model-name">${entry.Model}</td>
@@ -317,14 +383,19 @@ export const updateLeaderboard = (filteredData: FilteredData): void => {
317383
<span class="progress-text">${entry.Success_Rate.toFixed(1)}%</span>
318384
</div>
319385
</td>
320-
<td class="cost">$${(entry.Avg_Cost || calculateCost(entry.Avg_Tokens)).toFixed(4)}</td>
386+
<td class="cost">
387+
<div class="progress-bar">
388+
<div class="progress-fill progress-fill--cost" style="width: ${costWidth}%; background-color: #9ca3af;"></div>
389+
<span class="progress-text">$${currentCost.toFixed(4)}</span>
390+
</div>
391+
</td>
321392
<td class="duration">${entry.Avg_Duration.toFixed(2)}s</td>
322393
<td class="tokens">${entry.Avg_Tokens.toLocaleString()}</td>
323394
<td class="runs">${entry.Runs}</td>
324395
<td class="provider">${entry.Provider}</td>
325396
</tr>
326-
`
327-
)
397+
`;
398+
})
328399
.join('');
329400
};
330401

src/scripts/merbench-filters.ts

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
1-
import {
2-
getFilteredData,
3-
updateSummaryStats,
4-
updateLeaderboard,
5-
showEmptyState,
6-
} from '../lib/merbench';
1+
import { getFilteredData, updateSummaryStats, showEmptyState } from '../lib/merbench';
2+
import { updateLeaderboardData } from './merbench-sorting';
73
import type { RawData, TestGroupData, MerbenchData } from '../lib/merbench-types';
84
import { MerbenchCharts } from './merbench-charts';
95

@@ -317,7 +313,8 @@ export class MerbenchFilters {
317313

318314
private updateUI(filteredData: any): void {
319315
updateSummaryStats(filteredData);
320-
updateLeaderboard(filteredData);
316+
// Use sorting-aware leaderboard update instead of basic update
317+
updateLeaderboardData(filteredData.leaderboard);
321318
}
322319

323320
private showNoDataMessage(): void {

src/scripts/merbench-init-csp.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { MerbenchCharts } from './merbench-charts';
22
import { MerbenchFilters } from './merbench-filters';
3+
import { initializeLeaderboardSorting } from './merbench-sorting';
34
import type { MerbenchData, RawData } from '../lib/merbench-types';
45

56
declare global {
@@ -82,6 +83,9 @@ async function initializeMerbench() {
8283
const filters = new MerbenchFilters(data, charts);
8384
filters.initialize();
8485

86+
// Initialize leaderboard sorting
87+
initializeLeaderboardSorting(originalData.leaderboard);
88+
8589
// Initialize charts with all data
8690
try {
8791
await charts.waitForPlotly();

0 commit comments

Comments
 (0)