Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 52 additions & 59 deletions web/src/groq.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
// Edge-native Groq client — fetch-based, no OpenAI SDK dependency
// Edge-native Groq helpers backed by @stackbilt/llm-providers

import { createLLMProviderFactory, type LLMMessage } from '@stackbilt/llm-providers';
import { tokenize, jaccardSimilarity } from './kernel/memory/index.js';
import { cosineSimilarity } from './kernel/memory/semantic.js';
import type { MemoryServiceBinding } from './types.js';

function buildGroqFactory(apiKey: string, baseUrl: string) {
return createLLMProviderFactory({
groq: { apiKey, baseUrl },
fallbackRules: [],
enableCircuitBreaker: true,
enableRetries: true,
});
}

function coerceText(content: unknown): string {
if (typeof content === 'string') return content;
if (content == null) return '';
return typeof content === 'object' ? JSON.stringify(content) : String(content);
}

export async function askGroq(
apiKey: string,
Expand All @@ -7,39 +27,21 @@ export async function askGroq(
userPrompt: string,
baseUrl = 'https://api.groq.com',
): Promise<string> {
const response = await fetch(`${baseUrl}/openai/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
},
body: JSON.stringify({
try {
const result = await buildGroqFactory(apiKey, baseUrl).generateResponse({
model,
systemPrompt,
temperature: 0.3,
max_tokens: 500,
maxTokens: 500,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: userPrompt },
],
}),
});

if (!response.ok) {
const errText = await response.text();
throw new Error(`Groq API error ${response.status}: ${errText}`);
});
return coerceText(result.message);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`Groq API error: ${msg}`);
}

const data = await response.json<{
choices: { message: { content: unknown } }[];
usage?: { total_tokens: number };
}>();

const content = data.choices[0]?.message?.content;
if (typeof content === 'string') return content;
if (content == null) return '';
// Some Groq-routed models (notably gpt-oss tool-calling variants) return content
// as an array of content blocks. Coerce so downstream string operations don't crash.
return typeof content === 'object' ? JSON.stringify(content) : String(content);
}

// ─── Logprobs-enabled classification ─────────────────────────
Expand Down Expand Up @@ -130,10 +132,6 @@ export async function askGroqWithLogprobs(
// Jaccard when memoryBinding is unavailable.
// Returns σ metric: 0=all agree, 0.5=partial, 1.0=disagree.

import { tokenize, jaccardSimilarity } from './kernel/memory/index.js';
import { cosineSimilarity } from './kernel/memory/semantic.js';
import type { MemoryServiceBinding } from './types.js';

const PROBE_TIMEOUT_MS = 3_000;
const JACCARD_AGREEMENT_THRESHOLD = 0.5;
const COSINE_AGREEMENT_THRESHOLD = 0.85;
Expand Down Expand Up @@ -210,43 +208,38 @@ export async function askGroqJson<T = unknown>(
baseUrl = 'https://api.groq.com',
options?: { maxTokens?: number; temperature?: number; prefill?: string },
): Promise<{ parsed: T; raw: string; usage?: { prompt_tokens: number; completion_tokens: number } }> {
const messages: Array<{ role: string; content: string }> = [
{ role: 'system', content: systemPrompt },
const messages: LLMMessage[] = [
{ role: 'user', content: userPrompt },
];
// Prefilling: seed the assistant response to steer tone/format
if (options?.prefill) {
messages.push({ role: 'assistant', content: options.prefill });
}

const response = await fetch(`${baseUrl}/openai/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
},
body: JSON.stringify({
try {
const result = await buildGroqFactory(apiKey, baseUrl).generateResponse({
model,
systemPrompt,
temperature: options?.temperature ?? 0.2,
max_tokens: options?.maxTokens ?? 2000,
maxTokens: options?.maxTokens ?? 2000,
response_format: { type: 'json_object' },
messages,
}),
});

if (!response.ok) {
const errText = await response.text();
throw new Error(`Groq API error ${response.status}: ${errText}`);
});

const completion = result.message ?? '{}';
// If prefilled, the model continues from the prefill — concatenate for valid JSON
const raw = options?.prefill ? options.prefill + completion : completion;
const parsed = JSON.parse(raw) as T;
return {
parsed,
raw,
usage: {
prompt_tokens: result.usage.inputTokens,
completion_tokens: result.usage.outputTokens,
},
};
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
throw new Error(`Groq API error: ${msg}`);
}

const data = await response.json<{
choices: { message: { content: string } }[];
usage?: { prompt_tokens: number; completion_tokens: number };
}>();

const completion = data.choices[0]?.message?.content ?? '{}';
// If prefilled, the model continues from the prefill — concatenate for valid JSON
const raw = options?.prefill ? options.prefill + completion : completion;
const parsed = JSON.parse(raw) as T;
return { parsed, raw, usage: data.usage };
}
115 changes: 70 additions & 45 deletions web/tests/groq.test.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
// Groq client tests — askGroq, askGroqJson, askGroqWithLogprobs, probeConsistency
// Mocks fetch() to test API interaction without real calls
// Groq helper tests — askGroq, askGroqJson, askGroqWithLogprobs, probeConsistency
// Mocks provider factory and fetch() to test API interaction without real calls

import { describe, it, expect, vi, beforeEach } from 'vitest';

const providerMocks = vi.hoisted(() => ({
createLLMProviderFactory: vi.fn(),
generateResponse: vi.fn(),
}));

vi.mock('@stackbilt/llm-providers', () => ({
createLLMProviderFactory: providerMocks.createLLMProviderFactory,
}));

// Mock tokenize/jaccardSimilarity before importing groq.ts
vi.mock('../src/kernel/memory/index.js', () => ({
tokenize: (text: string) => new Set(text.toLowerCase().split(/\s+/)),
Expand All @@ -19,11 +28,14 @@ vi.stubGlobal('fetch', mockFetch);

const { askGroq, askGroqJson, askGroqWithLogprobs, probeConsistency } = await import('../src/groq.js');

function groqResponse(content: string, usage?: { prompt_tokens: number; completion_tokens: number }) {
return new Response(JSON.stringify({
choices: [{ message: { content } }],
function providerResponse(content: unknown, usage = { inputTokens: 100, outputTokens: 50, totalTokens: 150, cost: 0.001 }) {
return {
message: content,
usage,
}), { status: 200, headers: { 'Content-Type': 'application/json' } });
model: 'llama-test',
provider: 'groq',
responseTime: 10,
};
}

function groqLogprobResponse(content: string, logprobs: Array<{ token: string; logprob: number }>) {
Expand All @@ -36,52 +48,59 @@ function groqLogprobResponse(content: string, logprobs: Array<{ token: string; l
}

describe('askGroq', () => {
beforeEach(() => vi.clearAllMocks());
beforeEach(() => {
vi.clearAllMocks();
providerMocks.createLLMProviderFactory.mockReturnValue({ generateResponse: providerMocks.generateResponse });
});

it('returns content from Groq API', async () => {
mockFetch.mockResolvedValue(groqResponse('Hello!'));
providerMocks.generateResponse.mockResolvedValue(providerResponse('Hello!'));
const result = await askGroq('key', 'model', 'system', 'user');
expect(result).toBe('Hello!');
expect(mockFetch).toHaveBeenCalledOnce();
expect(providerMocks.generateResponse).toHaveBeenCalledOnce();
});

it('sends correct request shape', async () => {
mockFetch.mockResolvedValue(groqResponse('ok'));
providerMocks.generateResponse.mockResolvedValue(providerResponse('ok'));
await askGroq('test-key', 'llama-70b', 'sys prompt', 'user prompt', 'https://custom.api');

const [url, opts] = mockFetch.mock.calls[0];
expect(url).toBe('https://custom.api/openai/v1/chat/completions');
expect(opts.method).toBe('POST');
expect(opts.headers['Authorization']).toBe('Bearer test-key');
const body = JSON.parse(opts.body);
expect(body.model).toBe('llama-70b');
expect(body.messages).toHaveLength(2);
expect(body.messages[0].role).toBe('system');
expect(body.messages[1].role).toBe('user');
expect(providerMocks.createLLMProviderFactory).toHaveBeenCalledWith({
groq: { apiKey: 'test-key', baseUrl: 'https://custom.api' },
fallbackRules: [],
enableCircuitBreaker: true,
enableRetries: true,
});
expect(providerMocks.generateResponse).toHaveBeenCalledWith({
model: 'llama-70b',
systemPrompt: 'sys prompt',
temperature: 0.3,
maxTokens: 500,
messages: [{ role: 'user', content: 'user prompt' }],
});
});

it('throws on API error', async () => {
mockFetch.mockResolvedValue(new Response('rate limited', { status: 429 }));
await expect(askGroq('key', 'model', 'sys', 'user')).rejects.toThrow('Groq API error 429');
providerMocks.generateResponse.mockRejectedValue(new Error('rate limited'));
await expect(askGroq('key', 'model', 'sys', 'user')).rejects.toThrow('Groq API error: rate limited');
});

it('returns empty string when no content', async () => {
mockFetch.mockResolvedValue(new Response(
JSON.stringify({ choices: [{ message: { content: null } }] }),
{ status: 200, headers: { 'Content-Type': 'application/json' } },
));
providerMocks.generateResponse.mockResolvedValue(providerResponse(null));
const result = await askGroq('key', 'model', 'sys', 'user');
expect(result).toBe('');
});
});

describe('askGroqJson', () => {
beforeEach(() => vi.clearAllMocks());
beforeEach(() => {
vi.clearAllMocks();
providerMocks.createLLMProviderFactory.mockReturnValue({ generateResponse: providerMocks.generateResponse });
});

it('parses JSON response', async () => {
mockFetch.mockResolvedValue(groqResponse(
providerMocks.generateResponse.mockResolvedValue(providerResponse(
'{"name":"test","value":42}',
{ prompt_tokens: 100, completion_tokens: 50 },
{ inputTokens: 100, outputTokens: 50, totalTokens: 150, cost: 0.001 },
));
const { parsed, raw, usage } = await askGroqJson<{ name: string; value: number }>(
'key', 'model', 'sys', 'user',
Expand All @@ -93,9 +112,9 @@ describe('askGroqJson', () => {
});

it('handles prefill by concatenating', async () => {
mockFetch.mockResolvedValue(groqResponse(
providerMocks.generateResponse.mockResolvedValue(providerResponse(
'hello","done":true}',
{ prompt_tokens: 50, completion_tokens: 20 },
{ inputTokens: 50, outputTokens: 20, totalTokens: 70, cost: 0.001 },
));
const { parsed } = await askGroqJson<{ greeting: string; done: boolean }>(
'key', 'model', 'sys', 'user', undefined,
Expand All @@ -106,20 +125,24 @@ describe('askGroqJson', () => {
});

it('sends json_object response_format', async () => {
mockFetch.mockResolvedValue(groqResponse('{}'));
providerMocks.generateResponse.mockResolvedValue(providerResponse('{}'));
await askGroqJson('key', 'model', 'sys', 'user');
const body = JSON.parse(mockFetch.mock.calls[0][1].body);
expect(body.response_format).toEqual({ type: 'json_object' });
expect(providerMocks.generateResponse).toHaveBeenCalledWith(expect.objectContaining({
response_format: { type: 'json_object' },
}));
});

it('throws on API error', async () => {
mockFetch.mockResolvedValue(new Response('server error', { status: 500 }));
await expect(askGroqJson('key', 'model', 'sys', 'user')).rejects.toThrow('Groq API error 500');
providerMocks.generateResponse.mockRejectedValue(new Error('server error'));
await expect(askGroqJson('key', 'model', 'sys', 'user')).rejects.toThrow('Groq API error: server error');
});
});

describe('askGroqWithLogprobs', () => {
beforeEach(() => vi.clearAllMocks());
beforeEach(() => {
vi.clearAllMocks();
providerMocks.createLLMProviderFactory.mockReturnValue({ generateResponse: providerMocks.generateResponse });
});

it('parses classification with token confidence', async () => {
mockFetch.mockResolvedValue(groqLogprobResponse(
Expand Down Expand Up @@ -161,30 +184,32 @@ describe('askGroqWithLogprobs', () => {
});

describe('probeConsistency', () => {
beforeEach(() => vi.clearAllMocks());
beforeEach(() => {
vi.clearAllMocks();
providerMocks.createLLMProviderFactory.mockReturnValue({ generateResponse: providerMocks.generateResponse });
});

it('returns sigma=0 when all responses agree', async () => {
// Each call needs a fresh Response (body can only be read once)
mockFetch.mockImplementation(() => Promise.resolve(groqResponse('The answer is 42')));
providerMocks.generateResponse.mockResolvedValue(providerResponse('The answer is 42'));
const result = await probeConsistency('key', 'model', 'sys', 'user');
expect(result.sigma).toBe(0);
expect(result.agreedText).toBe('The answer is 42');
expect(result.responses).toHaveLength(3);
});

it('returns sigma=1.0 when responses completely disagree', async () => {
mockFetch
.mockResolvedValueOnce(groqResponse('alpha beta gamma delta epsilon'))
.mockResolvedValueOnce(groqResponse('one two three four five six seven'))
.mockResolvedValueOnce(groqResponse('red green blue purple orange yellow'));
providerMocks.generateResponse
.mockResolvedValueOnce(providerResponse('alpha beta gamma delta epsilon'))
.mockResolvedValueOnce(providerResponse('one two three four five six seven'))
.mockResolvedValueOnce(providerResponse('red green blue purple orange yellow'));
const result = await probeConsistency('key', 'model', 'sys', 'user');
expect(result.sigma).toBe(1.0);
expect(result.agreedText).toBeNull();
});

it('makes exactly 3 parallel calls', async () => {
mockFetch.mockImplementation(() => Promise.resolve(groqResponse('same')));
providerMocks.generateResponse.mockResolvedValue(providerResponse('same'));
await probeConsistency('key', 'model', 'sys', 'user');
expect(mockFetch).toHaveBeenCalledTimes(3);
expect(providerMocks.generateResponse).toHaveBeenCalledTimes(3);
});
});