Skip to content

Commit 31ea42e

Browse files
VladUZHclaude
andcommitted
feat: v0.2.0 — telemetry + cost attribution parity with @sidclaw/sdk@0.1.11
Adds record_telemetry() on SidClaw/AsyncSidClaw, extends record_outcome with 8 optional telemetry fields, and ships sidclaw.cost with MODEL_PRICING + estimate_cost for 13 models. No breaking changes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 63ac0c8 commit 31ea42e

9 files changed

Lines changed: 237 additions & 2 deletions

File tree

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# Changelog
22

3+
## 0.2.0 (2026-04-17)
4+
5+
Telemetry + cost attribution parity with `@sidclaw/sdk@0.1.11`.
6+
7+
Added:
8+
- `record_telemetry(trace_id, params)` method on `SidClaw` and `AsyncSidClaw` — PATCH `/api/v1/traces/:id/telemetry`. Token usage and cost are accumulated server-side; outcome_summary and model are set-once (first write wins).
9+
- `RecordTelemetryParams` TypedDict with `tokens_in`, `tokens_out`, `tokens_cache_read`, `model`, `cost_estimate`, `outcome_summary` (all optional).
10+
- Extended `RecordOutcomeParams` with eight optional fields: `outcome_summary`, `error_classification`, `exit_code`, `tokens_in`, `tokens_out`, `tokens_cache_read`, `model`, `cost_estimate`.
11+
- `ErrorClassification` literal type: `'timeout' | 'permission' | 'not_found' | 'runtime'`.
12+
- New module `sidclaw.cost``MODEL_PRICING` table for 13 models (Claude 4.x, GPT-4o, Gemini), `estimate_cost(model, tokens_in, tokens_out, tokens_cache_read)`, `register_model_pricing(model, pricing)` for user overrides.
13+
14+
No breaking changes — all new fields are optional.
15+
316
## 0.1.0 (2026-03-23)
417

518
- Initial release

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "sidclaw"
7-
version = "0.1.2"
7+
version = "0.2.0"
88
description = "Python SDK for SidClaw — governance for AI agents"
99
readme = "README.md"
1010
license = "Apache-2.0"

src/sidclaw/__init__.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616
ApprovalStatus,
1717
ApprovalStatusResponse,
1818
DataClassification,
19+
ErrorClassification,
1920
EvaluateParams,
2021
EvaluateResponse,
2122
PolicyEffect,
2223
RecordOutcomeParams,
24+
RecordTelemetryParams,
2325
RiskClassification,
2426
WaitForApprovalOptions,
2527
)
28+
from .cost import MODEL_PRICING, ModelPricing, estimate_cost, register_model_pricing
2629
from .webhooks import verify_webhook_signature
2730

2831
__all__ = [
@@ -31,12 +34,14 @@
3134
"EvaluateParams",
3235
"EvaluateResponse",
3336
"RecordOutcomeParams",
37+
"RecordTelemetryParams",
3438
"ApprovalStatusResponse",
3539
"WaitForApprovalOptions",
3640
"DataClassification",
3741
"PolicyEffect",
3842
"ApprovalStatus",
3943
"RiskClassification",
44+
"ErrorClassification",
4045
"SidClawError",
4146
"APIError",
4247
"ActionDeniedError",
@@ -46,5 +51,9 @@
4651
"AuthenticationError",
4752
"PlanLimitError",
4853
"verify_webhook_signature",
54+
"MODEL_PRICING",
55+
"ModelPricing",
56+
"estimate_cost",
57+
"register_model_pricing",
4958
"__version__",
5059
]

src/sidclaw/_client.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
EvaluateParams,
1515
EvaluateResponse,
1616
RecordOutcomeParams,
17+
RecordTelemetryParams,
1718
WaitForApprovalOptions,
1819
)
1920

@@ -114,6 +115,12 @@ def record_outcome(self, trace_id: str, params: RecordOutcomeParams) -> None:
114115
"""Record the outcome of an action after execution."""
115116
self._request("POST", f"/api/v1/traces/{trace_id}/outcome", json=dict(params))
116117

118+
def record_telemetry(self, trace_id: str, params: RecordTelemetryParams) -> None:
119+
"""Attach token usage or cost data to a trace AFTER its outcome has been
120+
recorded. Used for late-arriving LLM telemetry (e.g. from a Stop hook).
121+
"""
122+
self._request("PATCH", f"/api/v1/traces/{trace_id}/telemetry", json=dict(params))
123+
117124

118125
class AsyncSidClaw(BaseClient):
119126
"""Asynchronous SidClaw client."""
@@ -210,3 +217,9 @@ async def wait_for_approval(
210217
async def record_outcome(self, trace_id: str, params: RecordOutcomeParams) -> None:
211218
"""Record the outcome of an action after execution."""
212219
await self._request("POST", f"/api/v1/traces/{trace_id}/outcome", json=dict(params))
220+
221+
async def record_telemetry(self, trace_id: str, params: RecordTelemetryParams) -> None:
222+
"""Attach token usage or cost data to a trace AFTER its outcome has been
223+
recorded. Used for late-arriving LLM telemetry (e.g. from a Stop hook).
224+
"""
225+
await self._request("PATCH", f"/api/v1/traces/{trace_id}/telemetry", json=dict(params))

src/sidclaw/_constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SDK_VERSION = "0.1.0"
1+
SDK_VERSION = "0.2.0"
22
DEFAULT_BASE_URL = "https://api.sidclaw.com"
33
DEFAULT_MAX_RETRIES = 3
44
DEFAULT_TIMEOUT = 30.0

src/sidclaw/_types.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,32 @@ class EvaluateParams(TypedDict, total=False):
2424
context: dict[str, Any] # optional
2525

2626

27+
ErrorClassification = Literal["timeout", "permission", "not_found", "runtime"]
28+
29+
2730
class RecordOutcomeParams(TypedDict, total=False):
2831
status: Literal["success", "error"] # required
2932
metadata: dict[str, Any] # optional
33+
# Added 2026-04-16 — hooks + cost-attribution telemetry. All optional.
34+
outcome_summary: str
35+
error_classification: ErrorClassification
36+
exit_code: int
37+
tokens_in: int
38+
tokens_out: int
39+
tokens_cache_read: int
40+
model: str
41+
cost_estimate: float
42+
43+
44+
class RecordTelemetryParams(TypedDict, total=False):
45+
"""Late-arriving LLM telemetry attached to a trace after its outcome."""
46+
47+
tokens_in: int
48+
tokens_out: int
49+
tokens_cache_read: int
50+
model: str
51+
cost_estimate: float
52+
outcome_summary: str
3053

3154

3255
class ApprovalDecisionParams(TypedDict):

src/sidclaw/cost.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Per-1M-token pricing for common LLM models.
2+
3+
Keep this table up to date — stale pricing produces bad cost estimates.
4+
Prices are in USD per 1,000,000 tokens.
5+
6+
Used by :func:`estimate_cost` and consumed by the Claude Code Stop hook.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
from typing import TypedDict
12+
13+
14+
class ModelPricing(TypedDict, total=False):
15+
input: float # required
16+
output: float # required
17+
cache_read: float
18+
cache_write: float
19+
20+
21+
MODEL_PRICING: dict[str, ModelPricing] = {
22+
# Anthropic Claude 4.x
23+
"claude-opus-4-7": {"input": 15.0, "output": 75.0, "cache_read": 1.5},
24+
"claude-opus-4-6": {"input": 15.0, "output": 75.0, "cache_read": 1.5},
25+
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
26+
"claude-sonnet-4-5": {"input": 3.0, "output": 15.0, "cache_read": 0.3},
27+
"claude-haiku-4-5": {"input": 0.8, "output": 4.0, "cache_read": 0.08},
28+
# OpenAI
29+
"gpt-4o": {"input": 2.5, "output": 10.0, "cache_read": 1.25},
30+
"gpt-4o-mini": {"input": 0.15, "output": 0.6, "cache_read": 0.075},
31+
"gpt-4-turbo": {"input": 10.0, "output": 30.0},
32+
"o1-preview": {"input": 15.0, "output": 60.0},
33+
"o1-mini": {"input": 3.0, "output": 12.0},
34+
# Google Gemini
35+
"gemini-2.0-flash": {"input": 0.1, "output": 0.4},
36+
"gemini-1.5-pro": {"input": 1.25, "output": 5.0},
37+
"gemini-1.5-flash": {"input": 0.075, "output": 0.3},
38+
}
39+
40+
41+
def estimate_cost(
42+
model: str,
43+
tokens_in: int,
44+
tokens_out: int,
45+
tokens_cache_read: int = 0,
46+
tokens_cache_write: int = 0,
47+
) -> float:
48+
"""Compute a USD cost estimate for a model call.
49+
50+
Returns 0 if the model is not in the pricing table — prefer emitting 0
51+
over a wildly wrong estimate.
52+
"""
53+
pricing = MODEL_PRICING.get(model) or MODEL_PRICING.get(model.lower())
54+
if not pricing:
55+
return 0.0
56+
57+
input_price = pricing.get("input", 0.0)
58+
output_price = pricing.get("output", 0.0)
59+
cache_read_price = pricing.get("cache_read", 0.0)
60+
cache_write_price = pricing.get("cache_write", input_price)
61+
62+
cost = (
63+
tokens_in * input_price
64+
+ tokens_out * output_price
65+
+ tokens_cache_read * cache_read_price
66+
+ tokens_cache_write * cache_write_price
67+
)
68+
69+
return cost / 1_000_000
70+
71+
72+
def register_model_pricing(model: str, pricing: ModelPricing) -> None:
73+
"""Register or override pricing for a model — useful for fine-tuned variants
74+
or self-hosted models where you know the unit cost.
75+
"""
76+
MODEL_PRICING[model] = pricing
77+
78+
79+
__all__ = ["MODEL_PRICING", "ModelPricing", "estimate_cost", "register_model_pricing"]

tests/test_client.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,73 @@ def test_record_outcome_error(self, client, mock_api):
233233
mock_api.post("/api/v1/traces/trace-1/outcome").mock(return_value=httpx.Response(200, json={}))
234234
client.record_outcome("trace-1", {"status": "error", "metadata": {"error": "Something failed"}})
235235

236+
def test_record_outcome_with_extended_telemetry_fields(self, client, mock_api):
237+
route = mock_api.post("/api/v1/traces/trace-1/outcome").mock(
238+
return_value=httpx.Response(200, json={})
239+
)
240+
client.record_outcome(
241+
"trace-1",
242+
{
243+
"status": "success",
244+
"outcome_summary": "Created 2 files, ran 12 tests",
245+
"error_classification": "runtime",
246+
"exit_code": 0,
247+
"tokens_in": 1000,
248+
"tokens_out": 500,
249+
"tokens_cache_read": 200,
250+
"model": "claude-opus-4-7",
251+
"cost_estimate": 0.045,
252+
},
253+
)
254+
import json as _json
255+
256+
body = _json.loads(route.calls[0].request.content)
257+
assert body["status"] == "success"
258+
assert body["outcome_summary"] == "Created 2 files, ran 12 tests"
259+
assert body["error_classification"] == "runtime"
260+
assert body["exit_code"] == 0
261+
assert body["tokens_in"] == 1000
262+
assert body["tokens_out"] == 500
263+
assert body["tokens_cache_read"] == 200
264+
assert body["model"] == "claude-opus-4-7"
265+
assert body["cost_estimate"] == 0.045
266+
267+
268+
class TestRecordTelemetry:
269+
def test_record_telemetry_sends_patch(self, client, mock_api):
270+
route = mock_api.patch("/api/v1/traces/trace-1/telemetry").mock(
271+
return_value=httpx.Response(200, json={})
272+
)
273+
client.record_telemetry(
274+
"trace-1",
275+
{
276+
"tokens_in": 2000,
277+
"tokens_out": 800,
278+
"tokens_cache_read": 400,
279+
"model": "claude-sonnet-4-6",
280+
"cost_estimate": 0.018,
281+
"outcome_summary": "Refactored auth module",
282+
},
283+
)
284+
assert route.call_count == 1
285+
import json as _json
286+
287+
body = _json.loads(route.calls[0].request.content)
288+
assert body["tokens_in"] == 2000
289+
assert body["model"] == "claude-sonnet-4-6"
290+
assert body["outcome_summary"] == "Refactored auth module"
291+
292+
def test_record_telemetry_accepts_partial_payload(self, client, mock_api):
293+
route = mock_api.patch("/api/v1/traces/trace-1/telemetry").mock(
294+
return_value=httpx.Response(200, json={})
295+
)
296+
client.record_telemetry("trace-1", {"cost_estimate": 0.01})
297+
assert route.call_count == 1
298+
import json as _json
299+
300+
body = _json.loads(route.calls[0].request.content)
301+
assert body == {"cost_estimate": 0.01}
302+
236303

237304
class TestWaitForApproval:
238305
def test_wait_for_approval_approved(self, client, mock_api):

tests/test_cost.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from sidclaw.cost import MODEL_PRICING, estimate_cost, register_model_pricing
2+
3+
4+
class TestEstimateCost:
5+
def test_computes_opus_4_7_cost(self):
6+
cost = estimate_cost(
7+
model="claude-opus-4-7",
8+
tokens_in=1_000_000,
9+
tokens_out=500_000,
10+
)
11+
# 1M * $15 + 0.5M * $75 = $15 + $37.5 = $52.5
12+
assert cost == 52.5
13+
14+
def test_handles_cache_read_discount(self):
15+
cost = estimate_cost(
16+
model="claude-sonnet-4-6",
17+
tokens_in=1_000_000,
18+
tokens_out=100_000,
19+
tokens_cache_read=2_000_000,
20+
)
21+
# 1M * $3 + 0.1M * $15 + 2M * $0.3 = 3 + 1.5 + 0.6 = $5.1
22+
assert abs(cost - 5.1) < 1e-5
23+
24+
def test_returns_zero_for_unknown_models(self):
25+
assert estimate_cost(model="unknown-model", tokens_in=1000, tokens_out=1000) == 0
26+
27+
def test_register_model_pricing_overrides_table(self):
28+
register_model_pricing("custom-model", {"input": 10, "output": 20})
29+
assert MODEL_PRICING["custom-model"] == {"input": 10, "output": 20}
30+
cost = estimate_cost(model="custom-model", tokens_in=1_000_000, tokens_out=1_000_000)
31+
assert abs(cost - 30) < 1e-5

0 commit comments

Comments
 (0)