|
| 1 | +"""Per-1M-token pricing for common LLM models. |
| 2 | +
|
| 3 | +Keep this table up to date — stale pricing produces bad cost estimates. |
| 4 | +Prices are in USD per 1,000,000 tokens. |
| 5 | +
|
| 6 | +Used by :func:`estimate_cost` and consumed by the Claude Code Stop hook. |
| 7 | +""" |
| 8 | + |
| 9 | +from __future__ import annotations |
| 10 | + |
| 11 | +from typing import TypedDict |
| 12 | + |
| 13 | + |
| 14 | +class ModelPricing(TypedDict, total=False): |
| 15 | + input: float # required |
| 16 | + output: float # required |
| 17 | + cache_read: float |
| 18 | + cache_write: float |
| 19 | + |
| 20 | + |
| 21 | +MODEL_PRICING: dict[str, ModelPricing] = { |
| 22 | + # Anthropic Claude 4.x |
| 23 | + "claude-opus-4-7": {"input": 15.0, "output": 75.0, "cache_read": 1.5}, |
| 24 | + "claude-opus-4-6": {"input": 15.0, "output": 75.0, "cache_read": 1.5}, |
| 25 | + "claude-sonnet-4-6": {"input": 3.0, "output": 15.0, "cache_read": 0.3}, |
| 26 | + "claude-sonnet-4-5": {"input": 3.0, "output": 15.0, "cache_read": 0.3}, |
| 27 | + "claude-haiku-4-5": {"input": 0.8, "output": 4.0, "cache_read": 0.08}, |
| 28 | + # OpenAI |
| 29 | + "gpt-4o": {"input": 2.5, "output": 10.0, "cache_read": 1.25}, |
| 30 | + "gpt-4o-mini": {"input": 0.15, "output": 0.6, "cache_read": 0.075}, |
| 31 | + "gpt-4-turbo": {"input": 10.0, "output": 30.0}, |
| 32 | + "o1-preview": {"input": 15.0, "output": 60.0}, |
| 33 | + "o1-mini": {"input": 3.0, "output": 12.0}, |
| 34 | + # Google Gemini |
| 35 | + "gemini-2.0-flash": {"input": 0.1, "output": 0.4}, |
| 36 | + "gemini-1.5-pro": {"input": 1.25, "output": 5.0}, |
| 37 | + "gemini-1.5-flash": {"input": 0.075, "output": 0.3}, |
| 38 | +} |
| 39 | + |
| 40 | + |
| 41 | +def estimate_cost( |
| 42 | + model: str, |
| 43 | + tokens_in: int, |
| 44 | + tokens_out: int, |
| 45 | + tokens_cache_read: int = 0, |
| 46 | + tokens_cache_write: int = 0, |
| 47 | +) -> float: |
| 48 | + """Compute a USD cost estimate for a model call. |
| 49 | +
|
| 50 | + Returns 0 if the model is not in the pricing table — prefer emitting 0 |
| 51 | + over a wildly wrong estimate. |
| 52 | + """ |
| 53 | + pricing = MODEL_PRICING.get(model) or MODEL_PRICING.get(model.lower()) |
| 54 | + if not pricing: |
| 55 | + return 0.0 |
| 56 | + |
| 57 | + input_price = pricing.get("input", 0.0) |
| 58 | + output_price = pricing.get("output", 0.0) |
| 59 | + cache_read_price = pricing.get("cache_read", 0.0) |
| 60 | + cache_write_price = pricing.get("cache_write", input_price) |
| 61 | + |
| 62 | + cost = ( |
| 63 | + tokens_in * input_price |
| 64 | + + tokens_out * output_price |
| 65 | + + tokens_cache_read * cache_read_price |
| 66 | + + tokens_cache_write * cache_write_price |
| 67 | + ) |
| 68 | + |
| 69 | + return cost / 1_000_000 |
| 70 | + |
| 71 | + |
| 72 | +def register_model_pricing(model: str, pricing: ModelPricing) -> None: |
| 73 | + """Register or override pricing for a model — useful for fine-tuned variants |
| 74 | + or self-hosted models where you know the unit cost. |
| 75 | + """ |
| 76 | + MODEL_PRICING[model] = pricing |
| 77 | + |
| 78 | + |
| 79 | +__all__ = ["MODEL_PRICING", "ModelPricing", "estimate_cost", "register_model_pricing"] |
0 commit comments