diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000..3586b97 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,80 @@ +name: Bug report +description: The SDK does something unexpected, crashes, or contradicts its docs. +labels: ["bug"] +body: + - type: markdown + attributes: + value: | + Thanks for the report. Please fill in everything you can — the more + reproducible the bug, the faster it gets fixed. + + **For security issues, please use [Private Vulnerability Reporting](https://github.com/TexasCoding/kalshi-python-sdk/security/advisories/new) instead.** + + - type: input + id: sdk_version + attributes: + label: SDK version + description: Output of `python -c "import kalshi; print(kalshi.__version__)"`. + placeholder: "2.0.0" + validations: + required: true + + - type: input + id: python_version + attributes: + label: Python version + placeholder: "3.12.10" + validations: + required: true + + - type: dropdown + id: environment + attributes: + label: Kalshi environment + options: + - demo + - production + - unknown / both + default: 0 + validations: + required: true + + - type: textarea + id: what_happened + attributes: + label: What happened? + description: What did the SDK do? What did you expect instead? + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: Minimal reproducer + description: | + A short script we can run to see the bug. Redact any keys / IDs. + Wrap in a Python code fence. + render: python + validations: + required: true + + - type: textarea + id: traceback + attributes: + label: Traceback or relevant log output + description: Full traceback if any. Redact URLs that contain tokens. + render: shell + validations: + required: false + + - type: checkboxes + id: checks + attributes: + label: Pre-flight checks + options: + - label: I searched existing issues for duplicates. + required: true + - label: I'm on the latest published version (`pip install --upgrade kalshi-sdk`), or noted my version above if not. + required: true + - label: This is not a security issue (those go through Private Vulnerability Reporting). + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..033de13 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,11 @@ +blank_issues_enabled: false +contact_links: + - name: Security vulnerability (private disclosure) + url: https://github.com/TexasCoding/kalshi-python-sdk/security/advisories/new + about: Report a vulnerability privately. Do NOT open a public issue for security issues. + - name: Question / discussion + url: https://github.com/TexasCoding/kalshi-python-sdk/discussions + about: For usage questions and design discussions that aren't bugs or feature requests. + - name: Kalshi API itself + url: https://kalshi.com/support + about: For issues with the Kalshi API (not the SDK), please contact Kalshi support directly. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 0000000..c00d09b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,60 @@ +name: Feature request +description: Suggest a new SDK capability, ergonomics improvement, or API surface change. +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + Thanks for the suggestion. Please describe the *user need* first, + then a proposal — solutions are easier to discuss when the + underlying problem is clear. + + - type: textarea + id: problem + attributes: + label: What problem does this solve? + description: | + What are you trying to do that's awkward or impossible today? + Avoid jumping to a specific API design — describe the use case. + validations: + required: true + + - type: textarea + id: proposal + attributes: + label: Proposed solution + description: | + Concrete suggestion if you have one — an API sketch, a code + example of how it would feel to use, etc. + render: python + validations: + required: false + + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: What workarounds have you tried? Why aren't they enough? + validations: + required: false + + - type: input + id: spec_link + attributes: + label: Kalshi API reference (if applicable) + description: | + Link to the relevant endpoint in https://docs.kalshi.com or the + OpenAPI / AsyncAPI spec. + placeholder: "https://docs.kalshi.com/openapi.yaml#/markets/get" + validations: + required: false + + - type: checkboxes + id: checks + attributes: + label: Pre-flight checks + options: + - label: I searched existing issues for duplicates or related discussions. + required: true + - label: This is in scope for an SDK (i.e. wraps or improves access to the Kalshi API rather than reimplementing exchange logic). + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..ce8bc12 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,44 @@ + + +## Summary + + + +## Changes + + + +- + +## Test plan + +- [ ] `uv run pytest tests/ --ignore=tests/integration -q` — unit +- [ ] `uv run ruff check .` — lint +- [ ] `uv run mypy kalshi/` — type-check +- [ ] If touching live endpoints: `uv run pytest tests/integration/` against demo +- [ ] If adding a new endpoint: registered in `tests/_contract_support.py::METHOD_ENDPOINT_MAP` + +## Notes for reviewers + + + +## Issue links + + + +Closes # diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 27d23ea..c7fe9c6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -10,10 +10,10 @@ jobs: name: Build sdist and wheel runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: Install uv - uses: astral-sh/setup-uv@v6 + uses: astral-sh/setup-uv@bd01e18f51369d5a26f1651c3cb451d3417e3bba # v6.3.1 - name: Set up Python run: uv python install 3.12 @@ -33,7 +33,7 @@ jobs: - name: Check artifacts run: uv run --with twine twine check dist/* - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: name: dist path: dist/ @@ -49,13 +49,13 @@ jobs: id-token: write # required for PyPI trusted publishing + attestations (OIDC) attestations: write # required to upload sigstore attestations steps: - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: dist path: dist/ - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0 with: attestations: true @@ -66,11 +66,11 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 # full history so we can extract the tag's CHANGELOG section - - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 with: name: dist path: dist/ @@ -92,7 +92,7 @@ jobs: fi - name: Create GitHub Release - uses: softprops/action-gh-release@v2 + uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda # v2.2.1 with: files: dist/* body_path: release_notes.md diff --git a/.gitignore b/.gitignore index 272c1c0..84b0580 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ wheels/ # Virtual environments .venv +.venv.stale* # UV lock uv.lock @@ -35,3 +36,12 @@ specs/asyncapi.yaml # MkDocs build output site/ + +# OS noise +**/.DS_Store + +# Internal planning / audit scratch (kept local, not part of the public repo) +.planning/ + +# Agent worktrees (claude-code's isolation: "worktree" mode) +.claude/worktrees/ diff --git a/.planning/audit-N-architecture.md b/.planning/audit-N-architecture.md deleted file mode 100644 index 8b32399..0000000 --- a/.planning/audit-N-architecture.md +++ /dev/null @@ -1,116 +0,0 @@ -# Wave 5 Audit N — Architecture & Code Quality - -Reviewed at commit `0a3fb23580b3497c1e004d1b79f783b2dda38e24` against the v1.1.0 release. - -## Summary - -The SDK has a clear and consistent layered architecture (transport → resource base → per-resource modules → typed models) and the discipline around contract testing, request-body model construction, and price-decimal type handling is unusually good for a Python SDK. The dual sync/async strategy is executed cleanly with shared pure helpers. The largest weaknesses are in three areas: (1) a top-level `kalshi.__all__` that is silently out of sync with `kalshi.models.__all__` so 23 documented response classes are not importable from the package root, (2) sloppy/aspirational type annotations on count/size fields (declared as `DollarDecimal` but commented as `FixedPointCount`) — semantically misleading even though the parser dedup makes it work, and (3) a few real WebSocket-loop concerns (JSON parsed twice, orderbook messages validated twice, private-method reach-through from `KalshiWebSocket` into `ConnectionManager`, broad `except Exception: continue` in the recv loop). None are critical for v1.1.0 callers but several represent debt worth picking up before v1.2. - -## Findings - -### F-N-01 — Public-API contract broken: 23 model classes missing from top-level `__all__` (severity: high) -**File:** `kalshi/__init__.py:116-225` -**Issue:** `kalshi/models/__init__.py` exports 23 user-facing response/data classes that are NOT re-exported from the package root: `Event`, `EventMetadata`, `MarketMetadata`, `SettlementSource`, `Balance`, `MarketPosition`, `EventPosition`, `PositionsResponse`, `Settlement`, `Trade`, `HistoricalCutoff`, `Announcement`, `DailySchedule`, `Schedule`, `WeeklySchedule`, `ExchangeStatus`, `MaintenanceWindow`, `ForecastPercentilesPoint`, `PercentilePoint`, `AssociatedEvent`, `CreateMarketResponse`, `LookupPoint`, `LookupTickersResponse`. Several of these are return types of public client methods (e.g. `client.exchange.status() -> ExchangeStatus`, `client.markets.list_trades_all() -> Iterator[Trade]`, `client.portfolio.balance() -> Balance`, `client.events.get() -> Event`) so users typing those returns must dig into the `kalshi.models.*` submodules. Empirically confirmed: `from kalshi import Trade` raises `ImportError`, as do `Event` and `Balance`. -**Evidence:** Compared `set(kalshi.__all__)` vs `set(kalshi.models.__all__)`; the diff above lists the 23 missing names. `ImportError: cannot import name 'Trade' from 'kalshi'`. -**Suggested fix:** Either re-export the missing names from `kalshi/__init__.py` (preferred — minimal, additive, no breakage), or document publicly that response models live under `kalshi.models`. The first option matches what `kalshi.models.__all__` is already trying to assert. - -### F-N-02 — Volume/size/count fields annotated as `DollarDecimal` but mean `FixedPointCount` (severity: medium) -**File:** `kalshi/models/markets.py:84-111`, `kalshi/models/markets.py:220-227`, `kalshi/models/orders.py:111-114` -**Issue:** `Market.yes_bid_size`, `yes_ask_size`, `no_bid_size`, `no_ask_size`, `volume`, `volume_24h`, `open_interest`, plus `Candlestick.volume`, `Candlestick.open_interest`, and `Fill.count` are all declared as `DollarDecimal` even though their `validation_alias` is the `_fp` (FixedPointCount) wire name and the surrounding comments explicitly say "FixedPointCount". Today this works only because `_to_decimal_dollars` and `_to_decimal_fp` in `kalshi/types.py:13-27,47-60` are byte-identical implementations. If one parser ever diverges (e.g. to apply a scale factor or different precision rule for one of the families) every size/volume field silently breaks. Beyond the latent bug, the wrong type also misleads anyone reading the model — `DollarDecimal` strongly implies a price. -**Evidence:** `kalshi/models/markets.py:83` comment: `# Size/volume fields (FixedPointCount)` immediately above `yes_bid_size: DollarDecimal | None = ...`. `Fill.count: DollarDecimal` at `kalshi/models/orders.py:111` with alias `count_fp`. `_to_decimal_dollars` (types.py:13) and `_to_decimal_fp` (types.py:47) are duplicate definitions. -**Suggested fix:** Switch the size/volume/count fields to `FixedPointCount`. Either consolidate the two parser functions into one shared helper (since they are identical today) or leave them as named identity functions so future divergence is per-family. - -### F-N-03 — `Order.type` field is dead and contradicts the spec (severity: medium) -**File:** `kalshi/models/orders.py:47` -**Issue:** `Order.type: str | None = None` is a leftover from the pre-v0.8.0 `CreateOrderRequest.type` field that v0.8.0 removed because it was never in the OpenAPI spec. The response `Order` model still carries it, which (a) re-exposes the removed concept, (b) shadows the Python `type` builtin in instances, and (c) lets the spec-drift contract test pass only because `Order` has `extra="allow"`. Either Kalshi actually returns `type` on `Order` (in which case it should be in the spec) or it doesn't (in which case the field is dead and `extra="allow"` would catch any surprise return value anyway). -**Evidence:** `kalshi/models/orders.py:47` — `type: str | None = None` inside `class Order`. The matching cleanup in `CreateOrderRequest` (line 144-147 docstring) explicitly removed `type` from the request side; only the response side still has it. -**Suggested fix:** Verify against demo whether the API ever returns a `type` field on `/portfolio/orders` responses. If not, remove the field. If it does return one, either keep it (and add it to the OpenAPI spec drift exclusions explicitly) or rely on `extra="allow"` so callers can still reach it via `order.model_extra`. - -### F-N-04 — `OrderbookManager.apply_delta` does O(n) linear scan per price level (severity: medium) -**File:** `kalshi/ws/orderbook.py:73-94` -**Issue:** Every orderbook delta does a linear `for i, level in enumerate(levels): if level.price == price` scan over the side's levels, then on insert calls `levels.sort(key=lambda lv: lv.price)`. For a side with N levels each delta is O(N) for the lookup and O(N log N) for the insert. Active markets routinely have 50-100 levels per side and deltas arrive multiple times per second per subscription. The data structure should be price-indexed. -**Evidence:** `kalshi/ws/orderbook.py:75-94` — explicit `existing_idx = -1; for i, level in enumerate(levels): if level.price == price: existing_idx = i; break` followed by `levels.sort(...)` on insert. -**Suggested fix:** Maintain levels as `dict[Decimal, Decimal]` (price → quantity) internally and materialize the sorted `list[OrderbookLevel]` lazily in `get()`, or use `sortedcontainers.SortedDict`. Reduces deltas to O(log N) (insert) and O(1) (update/remove), and the materialized snapshot is still O(N) but only happens on observed read. - -### F-N-05 — WebSocket recv loop parses JSON twice and re-validates orderbook messages (severity: medium) -**File:** `kalshi/ws/client.py:144-179`, `kalshi/ws/dispatch.py:68-95` -**Issue:** In `_recv_loop`, the raw frame is `json.loads`-parsed once at line 147 to inspect `sid`/`seq`/`type` for sequence tracking and orderbook state update, then handed to `await self._dispatcher.dispatch(raw)` at line 179, which immediately calls `json.loads(raw)` again at `dispatch.py:71`. For orderbook snapshots/deltas the message is also Pydantic-validated twice: once at `client.py:171-175` for `OrderbookManager`, and again at `dispatch.py:92` for queue routing. On a busy stream this is real cost (orderbook deltas can arrive 100s/sec on a watched market) and a maintenance hazard — two parsers can drift. -**Evidence:** `client.py:147 data = json.loads(raw)`; `client.py:171 snapshot = OrderbookSnapshotMessage.model_validate(data)`; `dispatch.py:71 data = json.loads(raw)`; `dispatch.py:92 parsed = model_cls.model_validate(data)`. -**Suggested fix:** Either (a) move sequence-tracking and orderbook-state update inside `MessageDispatcher.dispatch` so each frame is parsed and validated once, or (b) change the dispatcher signature to accept the pre-parsed `dict` from the recv loop. Option (a) keeps the loop simpler. - -### F-N-06 — `KalshiWebSocket._recv_loop` reaches into a private method on `ConnectionManager` (severity: medium) -**File:** `kalshi/ws/client.py:196` -**Issue:** After successful reconnect, `KalshiWebSocket._recv_loop` calls `await self._connection._set_state(ConnectionState.STREAMING)`. `_set_state` is name-mangled-private on `ConnectionManager`; the rest of `ConnectionManager` treats state transitions as internal. This is a leaky abstraction — the client should not be poking the manager's state machine — and a refactor of `ConnectionManager` that renames or removes `_set_state` will silently break the streaming state report. The on_state_change callback would also fire from the "wrong" caller. -**Evidence:** `kalshi/ws/client.py:196` — `await self._connection._set_state(ConnectionState.STREAMING)`. -**Suggested fix:** Add a public `mark_streaming()` (or similar) on `ConnectionManager` that performs the same transition, or have `ConnectionManager.reconnect()` perform the `CONNECTED → STREAMING` transition itself after resubscribe so the client doesn't need to. - -### F-N-07 — Recv loop swallows all non-`ConnectionClosed` exceptions with `continue` (severity: medium) -**File:** `kalshi/ws/client.py:204-207` -**Issue:** The recv loop's catch-all `except Exception as e: logger.warning("Error processing message: %s", e); continue` swallows every error short of `ConnectionClosed` — including `json.JSONDecodeError` (caught locally), Pydantic `ValidationError` from the dispatcher's typed parsing, `KalshiBackpressureError` from a full queue with `OverflowStrategy.ERROR`, and any bug in callback code. With backpressure-on-error this is exactly the "swallowed `KalshiBackpressureError`" issue already called out for the recv loop in audit-P, but the same swallowing also hides genuine programming errors. The dispatcher already logs parse/dispatch failures itself (`dispatch.py:88,94`), so this outer handler exists mostly to hide bugs. -**Evidence:** `kalshi/ws/client.py:204-207` — broad `except Exception` with `continue`. -**Suggested fix:** Narrow the outer catch to only the exceptions that genuinely should not kill the loop (e.g. `KalshiBackpressureError` if intentional, parse errors that the dispatcher should already have logged). Let everything else propagate or at minimum re-raise after logging. - -### F-N-08 — Inconsistent bool query-param serialization across resources (severity: medium) -**File:** `kalshi/resources/events.py:29,30,49,139,233`, `kalshi/resources/series.py:30,31,43,101,191` -**Issue:** `kalshi/resources/_base.py:22-32` defines `_bool_param(value)` specifically to preserve the explicit-False case so callers can opt out when a server default flips. `markets.py` and `live_data.py` use it correctly. `events.py` and `series.py` instead use inline `"true" if include_volume else None` for every bool param — which is exactly the pattern the helper docstring warns against, and which makes `include_volume=False` (or `with_milestones=False`) indistinguishable from "not specified". For at least `include_volume` (which affects response shape and is volume-load-bearing) callers cannot turn it off explicitly. -**Evidence:** `events.py:29` — `with_nested_markets="true" if with_nested_markets else None`. `series.py:30` — `include_product_metadata="true" if include_product_metadata else None`. Compare `live_data.py:33` — `include_player_stats=_bool_param(include_player_stats)`. Helper docstring at `_base.py:23-29`. -**Suggested fix:** Replace the inline ternaries with `_bool_param(...)` everywhere. Mechanical and safe; the only behavioural difference is that `False` now becomes `"false"` instead of being dropped — which is the documented intent. - -### F-N-09 — `_to_decimal_dollars` and `_to_decimal_fp` are byte-identical (severity: low) -**File:** `kalshi/types.py:13-27,47-60` -**Issue:** The two parser callables for `DollarDecimal` and `FixedPointCount` have identical bodies modulo docstring. This is fine as documentation-of-intent — keeping them separate makes the wire-shape vs. semantic split explicit at the type level — but it's at odds with F-N-02: today's parsers won't catch the wrong-family-annotated fields because they parse identically. Either commit to "they are intentionally distinct types with the same parser" or merge them. -**Evidence:** `kalshi/types.py:13-27` is identical to `kalshi/types.py:47-60` except for the function name and docstring. -**Suggested fix:** Add a `# Parsers are intentionally identical; keep the names distinct so we can diverge per-family later` comment, then have both `Annotated` aliases delegate to a single shared callable. Reduces a copy-paste hazard without losing the type distinction. - -### F-N-10 — `AsyncKalshiClient.ws` creates a new `KalshiWebSocket` on every access; `KalshiClient` has no `.ws` at all (severity: low) -**File:** `kalshi/async_client.py:121-137`, `kalshi/client.py` (no `ws` property) -**Issue:** Every `client.ws` access on the async client constructs a fresh `KalshiWebSocket(auth=..., config=...)` (line 136-137). If a user writes `async with client.ws.connect() as a, client.ws.connect() as b:` (or just reuses `client.ws` twice in a script), they silently get two independent WS clients — not the singleton they probably expected from object-attribute syntax. The sync `KalshiClient` doesn't expose `ws` at all (consistent with KalshiWebSocket being async-only, but undocumented in the sync client; users who don't read async_client.py won't know where to find WS). -**Evidence:** `kalshi/async_client.py:122-137` — `@property def ws(self) -> KalshiWebSocket` does `from kalshi.ws.client import KalshiWebSocket as _KalshiWebSocket; return _KalshiWebSocket(auth=self._auth, config=self._config)` each call. No `ws` attribute on `KalshiClient` (`client.py`). -**Suggested fix:** Either cache the constructed `KalshiWebSocket` on the client (lazy-initialized in `__init__` and reused), or rename to `client.websocket()` so the construct-on-call behavior is explicit. Document on `KalshiClient` (sync) that WebSocket access requires the async client. -**Uncertainty:** This may be intentional so multiple concurrent sessions are possible from one client. If so, the property name is misleading; a `new_websocket()` factory method would communicate the semantics better. - -### F-N-11 — `Fill.count` typed as `DollarDecimal` while sibling `Order.count` is `FixedPointCount` (severity: low) -**File:** `kalshi/models/orders.py:111`, `kalshi/models/orders.py:56` -**Issue:** Same-shape, same-meaning field annotated differently across sibling models. `Order.count: FixedPointCount` (line 56) is correct; `Fill.count: DollarDecimal` (line 111) is wrong, even though both wire-fields are `count_fp`. Subset of F-N-02 but worth calling out separately because it's a same-file inconsistency a maintainer is likely to copy-paste. -**Evidence:** `kalshi/models/orders.py:56,111` — both have `validation_alias=AliasChoices("count_fp", "count")` but different annotation. -**Suggested fix:** Change to `FixedPointCount` on `Fill.count`. - -### F-N-12 — `OrderbookDeltaPayload.side` is `str` rather than `Literal["yes", "no"]` (severity: low) -**File:** `kalshi/ws/models/orderbook_delta.py:48` -**Issue:** The orderbook manager dispatches on `side == "yes"` vs anything-else (`orderbook.py:72 levels = book.yes if side == "yes" else book.no`). If a future spec or server bug emits a different value (e.g. `"YES"`, `"both"`) the message silently routes to the `no` side and corrupts the book without any validation error. Side values are an enum in every other place in the SDK (`SideLiteral = Literal["yes", "no"]` at `kalshi/models/orders.py:18`). -**Evidence:** `kalshi/ws/models/orderbook_delta.py:48` — `side: str`. `kalshi/ws/orderbook.py:72` — `levels = book.yes if side == "yes" else book.no`. -**Suggested fix:** Tighten to `side: Literal["yes", "no"]`. Pydantic will reject unknown values; the orderbook manager's truthiness fallback becomes safe. - -### F-N-13 — Empty `kalshi/cli/` and `kalshi/cli/widgets/` directories left over from removed source (severity: low) -**File:** `kalshi/cli/`, `kalshi/cli/widgets/` -**Issue:** Both directories exist on disk but contain no `.py` files — only stale `__pycache__/` artifacts from since-removed modules. `git ls-tree HEAD kalshi/cli/` returns empty (not tracked). The directories are harmless but invite confusion: anyone scanning the source tree will see a `cli` package and wonder where it went. If a future contributor `os.walk`s `kalshi/` for packaging or doc purposes the empty dirs may show up unexpectedly. -**Evidence:** `find kalshi/cli -name '*.py'` returns nothing; `ls -la kalshi/cli/` shows only `__pycache__/widgets/`. `git ls-tree HEAD kalshi/cli/` returns empty. -**Suggested fix:** Delete both directories from the working tree and add a `.gitignore` pattern (or rely on the existing one) so stray `__pycache__` doesn't get committed. - -### F-N-14 — Pagination-helper recursion uses unconfigurable safety cap of 1000 pages (severity: low) -**File:** `kalshi/resources/_base.py:147-184` (sync), `:260-291` (async) -**Issue:** `_list_all` hard-codes `max_pages: int = 1000`. The argument exists in the signature but is not exposed to the public `*_all` methods on any resource (none of them forward a `max_pages` kwarg). For long histories — e.g. `markets.list_trades_all` over years, `historical.trades_all` with no `min_ts` — 1000 pages × default page size could exhaust the data well before the user wants it to stop, returning a silent partial. Callers cannot raise the limit without subclassing the resource. -**Evidence:** `_base.py:147` — `max_pages: int = 1000` parameter is plumbed only inside `_list_all`; no resource forwards a `max_pages` parameter from its public `*_all()` method. -**Suggested fix:** Either expose `max_pages` as a kwarg on the public `*_all()` methods (with the same 1000 default), or raise a sentinel error when the cap is hit so users at least learn the iterator terminated artificially. Today the iterator just `break`s with no signal. - -### F-N-15 — `WeeklySchedule` is exported via `kalshi.models.__all__` but has no docstring/usage trace, and `kalshi/__init__.py` skips it (severity: low) -**File:** `kalshi/__init__.py:116-225` (omission), `kalshi/models/__init__.py:43` -**Issue:** Specific instance of F-N-01 worth flagging because the schedule classes (`DailySchedule`, `WeeklySchedule`, `Schedule`, `MaintenanceWindow`) appear to be nested response components for `ExchangeStatus`. They are exported from `kalshi.models` but never used in any resource signature — only embedded as fields. Either they are intentionally internal (in which case they shouldn't be in `kalshi.models.__all__` either) or they are part of the public surface (in which case they belong at top level too). -**Evidence:** `kalshi/models/__init__.py:35-43` exports schedule classes; `grep` for `WeeklySchedule`/`DailySchedule` as a return-type or argument shows only the `Schedule` field inside `ExchangeStatus`. -**Suggested fix:** Decide if these are public or internal. If public, add to `kalshi/__init__.py` `__all__` (folds into F-N-01). If internal, drop from `kalshi.models.__all__`. - -### F-N-16 — Sync resource `list_all` returns `Iterator` via a generator delegate inconsistently (severity: low) -**File:** `kalshi/resources/communications.py:206-222` vs `kalshi/resources/orders.py:389-406` -**Issue:** `CommunicationsResource.list_all_rfqs` uses `yield from self._list_all(...)` (line 222), while `OrdersResource.list_all` directly `return self._list_all(...)` (line 406). Both work because `_list_all` is itself a generator — the `yield from` is purely ceremonial — but the inconsistency is confusing. Worse, the async sibling in `communications.py:412-431` uses `async for ... yield item` while `OrdersResource.list_all` (async, line 714-732) returns the async iterator directly. Both styles work; mixing them in the same codebase is a paper cut. -**Evidence:** `communications.py:222` — `yield from self._list_all(...)`; `orders.py:406` — `return self._list_all(...)`. Async: `communications.py:428-431` uses `async for`/`yield`; `orders.py:732` directly `return`s. -**Suggested fix:** Standardize on `return self._list_all(...)` for sync and `return self._list_all(...)` for async (both directly return iterators). The `async for ... yield` wrapper turns an async generator function into an async-generator-returning function, which is the same shape but with an extra coroutine layer. - -### F-N-17 — `Orderbook.yes`/`Orderbook.no` mutable default `= []` on a Pydantic model field (severity: low) -**File:** `kalshi/models/markets.py:156-157` -**Issue:** `yes: NullableList[OrderbookLevel] = []` and `no: NullableList[OrderbookLevel] = []`. Pydantic v2 deep-copies mutable defaults at model construction so the typical Python "shared mutable default" footgun doesn't fire, but ruff lint rule B006 would still flag it and a future migration off Pydantic (or to a strict mode) reintroduces the hazard. Same pattern in `MarketCandlesticks.candlesticks: NullableList[Candlestick] = []` (line 247). -**Evidence:** `kalshi/models/markets.py:156-157,247`. -**Suggested fix:** Switch to `Field(default_factory=list)`. Behaviourally identical under Pydantic v2 but unambiguous to readers and lint-clean. - ---- - -End of findings. diff --git a/.planning/audit-O-security.md b/.planning/audit-O-security.md deleted file mode 100644 index de6bc17..0000000 --- a/.planning/audit-O-security.md +++ /dev/null @@ -1,333 +0,0 @@ -# Wave 5 Audit O — Security - -Reviewed at commit `0a3fb23580b3497c1e004d1b79f783b2dda38e24` against the v1.1.0 release. - -## Summary - -Overall the SDK is reasonably hardened for a trading SDK: RSA-PSS signing is implemented -correctly with the cryptography library (no homebrew crypto), POST/DELETE are correctly -excluded from retries, no PEM material or signatures are written to logs at any level, -the record/replay fixture format intentionally omits request headers and bodies so -saved fixtures cannot leak the API key ID or RSA signatures, and the PyPI release -workflow uses tag-pinned OIDC trusted publishing with a `version == tag` guard. The -strongest finding below is that the spec-sync workflow holds `contents: write` + -`pull-requests: write` and operates on untrusted upstream content; auto-merge of that -PR (if a maintainer ever enables it) plus a compromise of `docs.kalshi.com` would -amount to a supply-chain hijack. Other findings are defense-in-depth: unverified -scheme on `KALSHI_API_BASE_URL`, broad `except Exception` swallowing in the WS recv -loop, `exc_info=True` in dispatch that can dump frame contents into logs, missing -dependabot, and a couple of CI workflows on cron / `pull_request_target`-adjacent -triggers that still use mutable major-version tags. - -## Findings - -### F-O-01 — Spec-sync workflow ingests upstream YAML with write permissions (severity: high) -**File:** `.github/workflows/spec-sync.yml:9-11,53-54,135-216` -**Threat model:** A network attacker who can MITM or compromise `docs.kalshi.com`, or -a malicious party who can get a crafted change merged upstream into the published -OpenAPI/AsyncAPI YAML, can cause arbitrary YAML to be fed into -`scripts/sync_spec.py` and then into `scripts/generate.py` (datamodel-code-generator, -which writes a Python module the test suite imports). The workflow runs on a weekly -cron with `contents: write` and `pull-requests: write`, then opens a PR. If a -maintainer ever switches that PR to auto-merge, or hand-merges without reading -`_generated/models.py`, attacker-controlled Python lands on `main` — and the -release workflow triggers PyPI publish on tag push, propagating downstream. -**Impact:** Supply-chain compromise of `kalshi-sdk` on PyPI. -**Evidence:** -- `spec-sync.yml:9-11` — `permissions: contents: write, pull-requests: write` -- `spec-sync.yml:53-54` — `uv run python scripts/sync_spec.py` (downloads HTTPS YAML) -- `scripts/sync_spec.py:13-17` — `SPECS = {"openapi.yaml": "https://docs.kalshi.com/openapi.yaml", ...}` with no signature/hash pinning -- `spec-sync.yml:135-137` — `scripts/generate.py` runs `datamodel-code-generator` - on attacker-controlled YAML; resulting Python is consumed by the test step on - line 147-149 and would be reviewed in the PR diff but only as generated code. -**Suggested mitigation:** (a) Pin a published checksum or signature for the spec -files and verify before regen; or (b) keep auto-PR but require human approval -plus an explicit "I read `_generated/models.py`" check; or (c) gate the PR -generation behind a workflow that has only `contents: read` and posts the diff -as an issue comment, never as a writable branch. The existing SHA pinning of -third-party actions in this workflow is good — extend the principle to the -content being merged. - -### F-O-02 — `claude.yml` and `claude-code-review.yml` use mutable major-version tags on broad-permission workflows (severity: medium) -**File:** `.github/workflows/claude.yml:25-26,33,38`, `.github/workflows/claude-code-review.yml:23-24,32` -**Threat model:** `claude.yml` runs on `issue_comment`/`pull_request_review_comment` -events (including from forks via PR review comments) with -`contents: write, pull-requests: write, issues: write, id-token: write` and uses -`actions/checkout@v4` and `anthropics/claude-code-action@v1` — both mutable tags. -A tag retag attack on either action would weaponize a workflow that has full -issue/PR write plus the `CLAUDE_CODE_OAUTH_TOKEN` secret. The spec-sync workflow -already SHA-pins its actions and documents *why* (`spec-sync.yml:22-24`). The -Claude workflows have equivalent or larger blast radius and should follow suit. -**Impact:** If `actions/checkout@v4` or `anthropics/claude-code-action@v1` is -retagged maliciously, attacker code runs with write access to repo contents, -PRs, issues, plus the OAuth token in env. The `id-token: write` permission also -exposes the workflow's OIDC token to any compromised step. -**Evidence:** -- `claude.yml:25-26,33` — `uses: actions/checkout@v4`, `uses: anthropics/claude-code-action@v1` -- `claude.yml:20-24` — `contents: write, pull-requests: write, issues: write, id-token: write` -- `claude-code-review.yml:23-24,32` — same pattern with slightly narrower perms -- `spec-sync.yml:22-24` — comment explicitly noting why this *other* workflow - pins to SHA, demonstrating the team is aware of the threat model. -**Suggested mitigation:** Pin both actions to a specific commit SHA (matching -the spec-sync.yml comment's rationale). Subscribe to release notifications and -bump the SHA in PRs the way `spec-sync.yml` does. - -### F-O-03 — No scheme/host validation on `KALSHI_API_BASE_URL`; HTTP base URL silently accepted (severity: medium) -**File:** `kalshi/client.py:137-138`, `kalshi/async_client.py:153-154`, `kalshi/config.py:18-44` -**Threat model:** Anyone who can write to a process's environment -(`docker run -e ...`, CI variable, `.env` checked into a different repo, -shell history) can set `KALSHI_API_BASE_URL=http://attacker.example/trade-api/v2` -or `https://attacker.example/...`. The SDK accepts the URL with no validation — -no scheme check, no host allowlist, no warning. The RSA-PSS signature is still -computed and sent (with the API key ID header) to the attacker's endpoint, then -the attacker proxies on to the real API. This is not a "process memory access" -class of attack; setting env vars is a normal deployment plane, and operators -mis-configuring `KALSHI_API_BASE_URL` (e.g., copy-paste from a forum) is a real -failure mode. -**Impact:** API key ID leakage (the key ID is not as sensitive as the private -key, but is identifying), live signature capture (each is single-use and bound -to method+path+timestamp, so it doesn't let the attacker forge arbitrary -requests — but it does identify the account and confirm the key is active), -and silent MITM of all trading activity. With `http://`, even passive network -observers downstream of the env-var setter capture everything. -**Evidence:** -- `client.py:137` — `base_url = os.environ.get("KALSHI_API_BASE_URL")` -- `config.py:18-44` — `KalshiConfig` accepts any string; only trailing-slash - normalization happens in `__post_init__`. -- `_base_client.py:91-96` — `httpx.Client(base_url=config.base_url, ...)` - consumes the URL directly. -**Suggested mitigation:** In `KalshiConfig.__post_init__`, reject `base_url` -that doesn't start with `https://` (allow `http://` only for `localhost`/ -`127.0.0.1`/`::1` for testing), and log a `logger.warning` if the host is -neither `api.elections.kalshi.com` nor `demo-api.kalshi.co`. This is one-time -work and doesn't break legitimate proxy use cases (operators can still set -their own HTTPS proxy host). - -### F-O-04 — WebSocket `recv_loop` swallows all exceptions with `except Exception` (severity: medium) -**File:** `kalshi/ws/client.py:204-207` -**Threat model:** A misbehaving (or malicious) server that injects malformed -frames, schema-violating payloads, or oversized strings into the WS stream -will cause any exception during dispatch / orderbook apply / sequence track / -backpressure to be logged at `warning` and silently swallowed. This includes -exceptions raised by callbacks the user registered (`MessageDispatcher.dispatch` -awaits `self._callbacks[sub.channel](parsed)`), which means a callback that -performs auth checks or trade-rejection logic cannot fail-fast — it gets logged -and skipped, leaving the consumer ignorant. There's no rate limiter on this -log line either, so an attacker that can inject 1k+ bad frames/sec will fill -disk with log noise. -**Impact:** Defense-in-depth gap. Combined with `KalshiBackpressureError` being -defined but caught here (it should propagate to the consumer per its docstring), -this masks the security-relevant signal "the queue is overflowing, drop the -stream." Also masks any future cryptographic verification logic someone might -add to messages. -**Evidence:** -- `kalshi/ws/client.py:204-207` — - ```python - except Exception as e: - logger.warning("Error processing message: %s", e) - continue - ``` -- `kalshi/errors.py:80-82` — `KalshiBackpressureError` exists for this exact case - and is supposed to escape to the caller; here it is swallowed. -**Suggested mitigation:** Narrow the except clause: re-raise -`KalshiBackpressureError`, `KalshiSubscriptionError`, and `asyncio.CancelledError`; -log+continue only for `pydantic.ValidationError`, `json.JSONDecodeError`, and -`KeyError`. Add a counter (in-memory, no persistence) and break the loop after -N consecutive failures — that's the canonical fail-fast signal for a corrupted -server stream. - -### F-O-05 — `dispatch.py:94` logs full parse exceptions with `exc_info=True`, dumping frame contents into the log (severity: medium) -**File:** `kalshi/ws/dispatch.py:91-95` -**Threat model:** When a Pydantic model fails to parse a message (e.g., a `fill` -message has an unexpected schema), `logger.warning("Failed to parse %s message", -msg_type, exc_info=True)` dumps the full Pydantic `ValidationError`, which -includes the *input dict* in its repr. For private channels like `fill`, -`user_order`, `market_positions`, that dict contains user-identifiable trade -data: order IDs, sizes in USD, side, ticker, fill prices, and the user's -client_order_id. Operators running with default `WARNING` log level (or any -log shipper that ingests warnings into a SIEM/Splunk/Datadog) will write -financial PII to log infrastructure that's typically lower-trust than the -trading environment. -**Impact:** Trade data leakage into logs and downstream log aggregators. -Distinct from the documented record/replay fixture risk; this affects normal -production runs. -**Evidence:** -- `kalshi/ws/dispatch.py:88,94` — `Unknown message type: %s` (logs only the type - string, OK) and `Failed to parse %s message", msg_type, exc_info=True` (the - exc_info flag is the problem — Pydantic includes the input in its error repr). -- `kalshi/ws/dispatch.py:73` — `logger.warning("Received non-JSON frame: %s", - raw[:100])` — bounded to 100 chars but still includes arbitrary frame content. -**Suggested mitigation:** Drop `exc_info=True` from the parse-failure log, or -replace with a sanitized message like `logger.warning("Failed to parse %s -message (validation error)", msg_type)` and only emit the full traceback under -an opt-in `KALSHI_DEBUG_WS=1` env flag. For the non-JSON frame log, log only -the length and first/last 16 bytes, not 100 chars of payload. - -### F-O-06 — Reconnect path does not refresh signatures, but signature is generated per-connect, so timing OK — informational (severity: informational) -**File:** `kalshi/ws/connection.py:122-172` -**Threat model:** N/A. -**Impact:** None. -**Evidence:** `connection.py:156` re-calls `self._build_auth_headers()` on every -reconnect attempt, which re-signs with a fresh timestamp via -`KalshiAuth.sign_request` (auth.py:175-176, `timestamp_ms = int(time.time() * 1000)`). -This is correct — old timestamps would be rejected by the server. Noting it -because the pattern is easy to get wrong in an auto-reconnect loop. -**Suggested mitigation:** None. Leaving as informational documentation. - -### F-O-07 — No dependabot / no automated dependency CVE scanning (severity: medium) -**File:** `.github/` (no `dependabot.yml`), `pyproject.toml:19-24` -**Threat model:** The SDK depends on `httpx>=0.27,<1`, `pydantic>=2.0,<3`, -`cryptography>=43,<45`, `websockets>=14,<17`. `cryptography` in particular -has had repeated CVEs (2024 OpenSSL bundling, 2023 NULL deref). Without -dependabot or `pip-audit` in CI, a CVE in any of these floors the SDK -indefinitely without prompting a release. `cryptography>=43,<45` excludes -v45+ which means the upper bound is conservative but also stale. -**Impact:** Downstream consumers transitively pull whatever `cryptography` -their resolver picks within `>=43,<45`. If a CVE lands in 43.x or 44.x, no -automation flags it. -**Evidence:** -- `pyproject.toml:19-24` — dependency block, no constraints file, no `pip-audit`. -- `.github/workflows/` — no scanning step, no dependabot config. -**Suggested mitigation:** Add `.github/dependabot.yml` with weekly Python and -GitHub-Actions ecosystems. Add a `uv run pip-audit` step (or `safety check`) -to `ci.yml`. The ws upper bound at `<17` is reasonable; the `cryptography<45` -bound should be re-evaluated since v45 is current. - -### F-O-08 — `Retry-After` numeric parser accepts negative values and `inf` (severity: low) -**File:** `kalshi/_base_client.py:57-66`, `kalshi/_base_client.py:165-167` (sync) and `:278-279` (async) -**Threat model:** A malicious or buggy server sending `Retry-After: -1` or -`Retry-After: 1e308` makes `float(retry_after)` succeed; the code then does -`min(error.retry_after, self._config.retry_max_delay)`. For `-1`, `min(-1, 30)` -is `-1`, and `time.sleep(-1)` is fine on POSIX but unintuitive — effectively -no backoff, so the client busy-loops the server-controlled retry. For `inf`, -`min(inf, 30) == 30`, OK. For `NaN`, `min(nan, 30)` returns `nan`, -`time.sleep(nan)` raises `ValueError` and bubbles up unexpectedly. -**Impact:** Server-controlled retry storm against the SDK user (CPU/network), -or unexpected exception type on `NaN`. The cap is documented in CLAUDE.md as -"prevents server-controlled sleep" but actually only caps the upper end. -**Evidence:** -- `_base_client.py:60-63` — `try: retry_after_val = float(retry_after)` with no - range check. -- `_base_client.py:165-167` — `delay = min(error.retry_after, retry_max_delay)`. -**Suggested mitigation:** Add `if retry_after_val < 0 or not math.isfinite( -retry_after_val): retry_after_val = None`. Five lines, removes the lower-bound -gap and the NaN crash. - -### F-O-09 — `KalshiError` exception messages may include full URLs from httpx (severity: low) -**File:** `kalshi/_base_client.py:136,144,249,257`, `kalshi/ws/connection.py:117-120` -**Threat model:** `KalshiError(f"Request timed out: {e}")` and -`KalshiError(f"HTTP error: {e}")` interpolate the str of the httpx exception, -which typically contains the full URL including any query string. For private -endpoints (e.g., `GET /portfolio/positions?ticker=...`), the query string -itself isn't sensitive, but if a user constructs a URL with a token-like value -in a query param the exception message will land in any uncaught-exception -sink (Sentry, stderr, log aggregator). Same applies to WS: -`KalshiConnectionError(f"WebSocket connection failed: {e}")`. -**Impact:** Low-grade info leakage into logs. Not a path to key extraction -because auth headers are not in the URL. -**Evidence:** -- `_base_client.py:136` — `KalshiError(f"Request timed out: {e}")` -- `_base_client.py:144` — `KalshiError(f"HTTP error: {e}") from e` -- `ws/connection.py:119` — `KalshiConnectionError(f"WebSocket connection failed: {e}")` -**Suggested mitigation:** Strip the URL from `e` before interpolation, or use -a fixed message and rely on the `__cause__` (`raise ... from e`) for the -detail. Most error trackers serialize `__cause__` anyway. - -### F-O-10 — `from_env`/`try_from_env` do not zero out PEM strings after loading (severity: informational) -**File:** `kalshi/auth.py:101-156` -**Threat model:** `KALSHI_PRIVATE_KEY` env var content is read into `pem_string`, -passed to `from_pem`, which `encode("utf-8")`s and hands to -`serialization.load_pem_private_key`. The original string remains in -`os.environ` for the process lifetime. This is standard practice for -env-based secret loading and is not realistically a finding — process-local -secrets in env are the deployment plane this SDK targets. Noting for -completeness. -**Impact:** None beyond the normal env-var threat model. A `/proc//environ` -read would already give the attacker the secret regardless of what the SDK does. -**Evidence:** `kalshi/auth.py:115-126` reads env vars and never unsets them. -**Suggested mitigation:** None recommended. If extreme hardening is wanted, -`os.environ.pop("KALSHI_PRIVATE_KEY", None)` after construction, but that -breaks reload-from-env flows. - -### F-O-11 — `integration-nightly.yml` writes the private key with `chmod 600` but does not shred it on exit (severity: low) -**File:** `.github/workflows/integration-nightly.yml:51-65` -**Threat model:** The workflow writes `secrets.KALSHI_PRIVATE_KEY` to -`$RUNNER_TEMP/kalshi_private_key.pem`. GitHub-hosted runners are ephemeral -and `RUNNER_TEMP` is wiped between jobs, so persistence is not the issue. -However, if a later step in the *same job* is compromised (e.g., a malicious -test or installed package), the PEM is still on disk for the duration of -`pytest`. The PEM is also implicitly exposed to anything that can read -`$RUNNER_TEMP` for the rest of the job. -**Impact:** Low. Self-hosted runners would change the risk model but the -guard `if: github.repository == 'TexasCoding/kalshi-python-sdk'` on -`integration-nightly.yml:11` keeps this off forks. -**Evidence:** `integration-nightly.yml:55-65` writes via `printenv` (good — no -heredoc/shell-expansion risk), `chmod 600`, then exports -`KALSHI_PRIVATE_KEY_PATH`. No cleanup step. -**Suggested mitigation:** Add a final `if: always()` step that `shred -u -"${KALSHI_PRIVATE_KEY_PATH}"` (or `rm -f` since GitHub-hosted is ephemeral). -Mostly cosmetic on hosted runners; matters if you ever flip to self-hosted. - -### F-O-12 — PyPI release: tag-to-version check is good, but no provenance / sigstore signing (severity: informational) -**File:** `.github/workflows/release.yml:21-31,41-58` -**Threat model:** The release workflow has the `version == tag` guard which -prevents the simple "push a tag with a different version" attack. It uses -OIDC trusted publishing (`permissions: id-token: write` + no -`PYPI_API_TOKEN`), which is the right pattern. It does *not* upload sigstore -bundles or PEP 740 attestations alongside the artifacts. -**Impact:** None today. Downstream consumers verifying via sigstore would not -get a signal. Noting because the upcoming default of attestations is -opt-in-now / default-later and the project is otherwise well-set-up to take -advantage. -**Evidence:** -- `release.yml:22-29` — tag/version match guard. -- `release.yml:48-50` — `permissions: id-token: write`. -- `release.yml:57-58` — `pypa/gh-action-pypi-publish@release/v1` — supports - `attestations: true` but the workflow does not set it. -**Suggested mitigation:** Add `with: attestations: true` to the -`gh-action-pypi-publish` step. Free, no maintainer action required. - -### F-O-13 — Recorded fixture format does NOT save request headers/body, so the documented "scrub fixtures" warning is well-supported (severity: informational — clean finding) -**File:** `kalshi/testing/_fixtures.py:48-55,76-81`, `kalshi/testing/__init__.py:11-17` -**Threat model:** N/A — verifying the docstring claim in `testing/__init__.py:11-17` -that recorded fixtures might leak sensitive *response* bodies but not auth -headers. -**Impact:** None — confirmed `_request_to_dict` only persists -`method/url/path/query`, never `request.headers` or `request.content`. The -`KALSHI-ACCESS-SIGNATURE` and `KALSHI-ACCESS-TIMESTAMP` headers cannot end up -in fixtures. The module docstring's `.. warning::` block correctly scopes the -risk to response bodies (balances, positions, PII). -**Evidence:** -- `_fixtures.py:48-55` — `_request_to_dict` returns only - `{method, url, path, query}`. No header serialization anywhere. -- `_fixtures.py:76-81` — `record_pair` uses only `_request_to_dict` / - `_response_to_dict`. -- `__init__.py:11-17` — the existing warning is adequate; the audit prompt - asked us to flag only if it's insufficient. -**Suggested mitigation:** None. This is a clean finding confirming the design -is correct. The team may want to add a unit test that asserts no header keys -are ever present in serialized fixtures, as a regression guard against future -edits. - ---- - -## Categories examined with no findings - -- **Auth signing payload formation** (`auth.py:162-207`) — correct: timestamp + - METHOD + path-only, percent-encoding normalized to uppercase per RFC 3986, - trailing slash stripped (with `/` exception), query parameters stripped. -- **PEM string logging** — verified by `grep -rn "logger\." kalshi/` plus - reading each match. No PEM content is ever logged. The only sensitive-ish - log is the `from_env` warning that doesn't include any key material. -- **POST/DELETE retry exclusion** (`_base_client.py:31-32,155-159`) — correct: - `RETRYABLE_METHODS = {"GET", "HEAD", "OPTIONS"}`, DELETE explicitly excluded - with a comment about cancel idempotency. Retry only fires on transient 5xx / - 429 / timeout *and* method in the safe set. -- **Retry-After upper-bound capping** — capped at `retry_max_delay` correctly; - see F-O-08 for the negative/NaN gap. -- **httpx defaults** — TLS verify on by default (httpx default; not overridden), - no `verify=False` anywhere. -- **`.gitignore` coverage** — `.env`, `*.pem`, `*.key`, `.env.*` all present; - verified `.env` and `.keys/*.pem` are actually ignored. -- **SecretStr on `private_key` API response** (`models/api_keys.py:92`) — used - correctly, ensures `repr()` masks the value. diff --git a/.planning/audit-P-websocket.md b/.planning/audit-P-websocket.md deleted file mode 100644 index d7aea50..0000000 --- a/.planning/audit-P-websocket.md +++ /dev/null @@ -1,441 +0,0 @@ -# Wave 5 Audit P — WebSocket Implementation Deep-Dive - -Reviewed at commit `0a3fb23580b3497c1e004d1b79f783b2dda38e24` against the v1.1.0 release. - -## Summary - -The WebSocket layer is well-structured: connection / subscription / dispatch / -sequence / orderbook concerns are cleanly separated, the durable `client_id` -abstraction over server `sid` is the right idea, and the public API -(`subscribe_*`, `on(...)`, `orderbook(...)`) is ergonomic. There is decent -unit-level coverage of each module and one integration suite against a fake -server. - -Concurrency safety is **partly there but not robust**. The main hot paths -(`_recv_loop` → dispatcher → queue) are single-task and therefore mostly safe, -but several real-world scenarios break the invariants the code assumes: -sid-remapping on reconnect can mis-route messages that are in flight, the -sequence tracker leaks state across reconnects and across sid reassignment, -DROP_OLDEST and ERROR overflow have a hole that lets sequence detection -silently drift, and the `on()` decorator-vs-iterator contract is undefined -for the same channel. - -The most material findings are concentrated around reconnect: subscribe -races during a recv-loop reconnect, dropped messages skipping `last_seq` -update, and `resubscribe_all` aborting on the first failure leaving the -sub_mgr in an inconsistent state. - -## Findings - -### F-P-01 — `resubscribe_all` aborts on first failure, partially-resubscribed sub_mgr (severity: high) -**File:** `kalshi/ws/channels.py:199-228` -**Scenario:** Server reconnect succeeds, then resubscribe of (say) `ticker` -succeeds, then the server returns `error` for the next `subscribe` (e.g. the -private channel is rate-limited mid-reconnect, or `_wait_for_response` times -out on one ack). -**Impact:** `resubscribe_all` raises out of `_recv_loop`'s reconnect -`try` block. The except handler at `client.py:197-203` treats it like -"reconnect failed" — pushes sentinels to *all* queues and exits the loop — -even though the actual socket is up and some subs are healthy. From the -caller's perspective every iterator terminates and they get no error, only -the silent end-of-iteration. -Worse, `self._subscriptions` is left half-mutated: `sub.server_sid` has been -cleared to `None` on every subscription before the loop even starts iterating -(line 209), so successfully-resubscribed entries are fine, but the failing -entry has `server_sid=None` while the iterator is still presumed alive. A -subsequent `unsubscribe(client_id)` will return early at line 155. -**Evidence:** -```python -# channels.py:208-228 -for client_id, sub in old_subs.items(): - sub.server_sid = None # Clear old sid - ... - data = await self._wait_for_response(msg_id) # ← can raise - new_sid = data.get("msg", {}).get("sid") - if new_sid is not None: - sub.server_sid = new_sid - self._sid_to_client[new_sid] = client_id -``` -The `_wait_for_response` helper raises `KalshiSubscriptionError` on timeout -or error response. No try/except, no partial-state cleanup. -**Suggested fix:** Wrap each subscription's resubscribe in its own -try/except. On per-sub failure, log + push a sentinel to that one queue and -remove it from `_subscriptions`, but continue resubscribing the rest. -Optionally surface a structured event via `on_error`. Always restore an -invariant: `sub.server_sid is None` ↔ `client_id not in _subscriptions`. - -### F-P-02 — Dropped messages still update `last_seq`, but ERROR-overflow drops do not (severity: high) -**File:** `kalshi/ws/client.py:142-179` + `kalshi/ws/backpressure.py:45-58` -**Scenario:** Slow consumer on `orderbook_delta` (`ERROR` overflow). Queue -fills, `MessageQueue.put` raises `KalshiBackpressureError`. `_recv_loop` -catches it at line 204-207 as a generic `Exception`, logs, and continues. -**Impact:** The sequence tracker has already incremented `last_seq` for that -message at line 161-163 (it ran before dispatch). The orderbook manager has -**already applied** the delta to the local book at line 173-175. The -consumer's iterator missed the message entirely. The user sees: -1. Their book is silently more up-to-date than the messages they consumed. -2. No gap is detected on the next message — `last_seq` already advanced. -3. The docs say `ERROR` overflow prevents corruption, but the local book - built via `orderbook(ticker)` *is* corrupted from the consumer's - perspective (mutations they never observed). -And in the `DROP_OLDEST` ticker case, the dropped frame's `last_seq` was -likewise advanced — fine for ticker since it's not sequenced, but the same -code path would silently drift if a sequenced channel ever runs DROP_OLDEST -(which docs do say "don't do unless you know"). The drift detection -documentation claim that "missed delta is exactly what sequence-gap detection -catches" is **not true** for the ERROR-overflow path. -**Evidence:** `_recv_loop` at `client.py:142-179` does seq-track + apply -orderbook **before** the dispatcher routes to the queue. By the time the -queue raises, both side-effects are committed. The catch at line 204-207 -swallows it. -**Suggested fix:** Either (a) move seq tracking + orderbook apply *after* -dispatch succeeds, so a backpressure error rolls back the implicit state, or -(b) on `KalshiBackpressureError` for an orderbook subscription, treat it like -a gap: clear the local book and reset the seq tracker for that sid so the -next snapshot rebootstraps. - -### F-P-03 — Reconnect: messages arriving on old `sid`s during sid-remap are silently dropped or mis-routed (severity: high) -**File:** `kalshi/ws/channels.py:205-228` and `kalshi/ws/client.py:188-196` -**Scenario:** Server reissues a stale or recycled `sid`. Suppose -pre-reconnect there were two subs: ticker on sid=1, orderbook on sid=2. -After reconnect the server happens to assign sid=1 to the orderbook channel -(server-side recycling). During `resubscribe_all`, the iteration order over -`old_subs.items()` is dict insertion order; ticker is processed first, -acquires sid=1 (or any new sid). Mid-loop, if any `orderbook_snapshot` or -`orderbook_delta` arrives for the freshly-assigned ticker sid=1 but is -processed *while* the loop is still mid-iteration, it will be routed to the -ticker subscription queue. -**Impact:** Mis-routed messages: an `orderbook_snapshot` lands in a ticker -iterator that will `model_validate` it as a `TickerMessage` and fail (caught -silently by dispatch.py:91-95) — message lost. Even worse, after reconnect -some pre-reconnect snapshot messages may still be flushed by the server with -the *old* sid; those will hit the new mapping and get mis-routed silently. -Note `recv_loop` is paused only during one `subscribe()` call (in -`_do_subscribe`); during `resubscribe_all` the connection is in active recv -state. - -Also: `resubscribe_all` is called *from* `_recv_loop` (client.py:195) without -the `_subscribe_lock`, but at the same time **any user-level -`subscribe_*` call** can hold that lock and try to drive the connection. The -resubscribe path and a concurrent user subscribe interleave their `send`s -and their `wait_for_response` consumers (both reading from the same -`connection.recv()`). -**Evidence:** -```python -# client.py:189-196 (in _recv_loop, NO lock held) -await self._connection.reconnect() -if self._sub_mgr: - ... - await self._sub_mgr.resubscribe_all() -``` -```python -# client.py:241-249 (user-facing subscribe path) -async with self._subscribe_lock: - await self._pause_recv_loop() - try: - sub = await self._sub_mgr.subscribe(...) -``` -**Suggested fix:** Acquire `_subscribe_lock` in the reconnect path before -calling `resubscribe_all`. Clear `_sid_to_client` *before* sending any -resubscribe so any stale message arriving on an old sid hits -`get_subscription_by_sid → None` and is dropped instead of mis-routed -(currently done at line 206 ✓, but the new-mapping window still overlaps -with delivery from in-flight messages on the new socket). Consider draining -any frame in the recv buffer that has a sid not in the just-built mapping. - -### F-P-04 — `_pause_recv_loop` races: messages received but not dispatched are lost (severity: high) -**File:** `kalshi/ws/client.py:129-135` + `137-179` -**Scenario:** `_recv_loop` is mid-iteration, has just `await -self._connection.recv()` returned a frame `raw` and is now in the JSON-parse -/ seq-track / orderbook block. A user calls `subscribe_*`. `_pause_recv_loop` -cancels the task. The cancel hits while we're between `recv()` and -`dispatcher.dispatch(raw)` — possibly while we're awaiting the `seq_tracker.track()` -call (which can await `on_gap` callback). -**Impact:** The frame held in `raw` is silently dropped. For -`orderbook_delta` this is a missed delta; the next frame on the same sid -will be `last_seq + 2` and trigger gap detection — so we **resync**, which -is graceful, but for a `ticker` or `fill` channel the message just vanishes -with no signal. -Worse, if cancellation happens *after* seq-track succeeded (advanced -`last_seq`) but before dispatch, the missed delta will never be re-counted -on the next frame — `last_seq` is set to e.g. 5, next frame is 6, no gap -detected, but the consumer never saw seq=5. -**Evidence:** The recv loop catches `asyncio.CancelledError` at line 181-182 -with `break` — no attempt to dispatch the in-flight frame before exiting. The -`_pause_recv_loop` is called on every user-facing subscribe (line 242). -**Suggested fix:** Pause the recv loop via a flag + asyncio.Event rather -than cancellation. Or: cancel only if no frame is in-flight (track a -"processing" flag and wait for it to clear). The current approach assumes -cancel during `connection.recv()` (idle wait) is safe; it is, but -`_pause_recv_loop` doesn't enforce that timing. - -### F-P-05 — Reconnect drops authentic-but-mid-flight ack frames, deadlocking subscribe (severity: high) -**File:** `kalshi/ws/channels.py:73-101` + `kalshi/ws/client.py:241-249` -**Scenario:** User calls `subscribe_ticker`. `_do_subscribe` pauses the recv -loop, sends the subscribe command, and starts `_wait_for_response(msg_id, -timeout=5.0)` reading frames from `connection.recv()`. The server connection -dies mid-subscribe (network blip). `connection.recv()` raises -`ConnectionClosed`. -**Impact:** `_wait_for_response` does not catch `ConnectionClosed`. It -propagates out of `subscribe()`, out of `_do_subscribe`, and up to the user -as an unhandled `websockets.ConnectionClosed` (not a `KalshiConnectionError`). -The subscription is not in `_subscriptions` (we never reached the assignment -at line 143). The recv loop is still cancelled (paused). The `finally` -restarts the recv loop at line 249, which will then see the closed -connection on its first `recv()` and trigger reconnect → resubscribe_all, -but the user's original subscribe call has already errored. The user retries -and the second subscribe succeeds, but the *first* attempt's sub is lost. -**Evidence:** `_wait_for_response` has no try/except for `ConnectionClosed`; -neither does `_do_subscribe`. The 5s `timeout` is the only escape, which is -fine for "server is slow" but not for "server is gone". -**Suggested fix:** Wrap the subscribe-and-await in `_do_subscribe` with a -catch for `ConnectionClosed` → wait for reconnect to land (via -state-change) then retry, or surface as `KalshiConnectionError` so the user -can retry deterministically. - -### F-P-06 — Auth re-signing on reconnect uses fresh timestamp ✓ (severity: low — confirm) -**File:** `kalshi/ws/connection.py:122-172` -**Scenario:** Reconnect after token-ish auth expiry. -**Impact:** Each reconnect calls `_build_auth_headers()` (line 156) which -calls `self._auth.sign_request(...)` fresh — so a new timestamp/signature is -generated per attempt. **Good**. -**Evidence:** `connection.py:155-163` re-signs on every attempt inside the -retry loop. -**Suggested fix:** None needed; flagging as positive verification because -the audit prompt asked. One adjacent caveat: if the server's clock drifts vs. -client during a long sleep (`retry_max_delay` could be many seconds), and the -RSA-PSS signature is timestamp-bound, the signature timestamp is taken at -sign-time (just before `connect`), so should be safe. - -### F-P-07 — `OrderbookDeltaPayload.delta` typed as `FixedPointCount` (string) but used as `Decimal` (severity: medium) -**File:** `kalshi/ws/models/orderbook_delta.py:42-47` + `kalshi/ws/orderbook.py:68-94` -**Scenario:** Any delta application path. -**Impact:** `OrderbookDeltaPayload.delta` is annotated `FixedPointCount` and -`price` is `DollarDecimal`. The orderbook code treats both as `Decimal` -(`new_qty = existing.quantity + delta`, `levels.append(... quantity=delta)`). -This works at runtime because both `DollarDecimal` and `FixedPointCount` are -`Decimal` subtypes via Pydantic custom types. But the test fixture -(`tests/ws/test_orderbook.py:44-55`) constructs `OrderbookDeltaPayload` -directly with `Decimal("10")` — `FixedPointCount` may accept it, but the -field type annotation as documented in the comment is "fixed-point count -string". This is a type-vs-runtime mismatch that could bite a downstream -user typing on the field annotation. -**Evidence:** `kalshi/types.py` defines `FixedPointCount` (need to verify -it's a `Decimal` alias and not a `str` alias). Quick check would resolve -this; if it's a `Decimal` alias, the docstring on `OrderbookDeltaPayload` is -misleading. If it's a `str` alias, the orderbook arithmetic crashes. -**Suggested fix:** Verify `FixedPointCount` is a Decimal-coercing type; -update the docstring on `OrderbookDeltaPayload` to say "parsed as Decimal" -not "fixed-point count string". - -### F-P-08 — `unsubscribe` mutates state but doesn't push sentinel; iterator hangs (severity: high) -**File:** `kalshi/ws/channels.py:152-168` -**Scenario:** A caller obtains an iterator from `subscribe_ticker`, then -later calls `sub_mgr.unsubscribe(client_id)` (or — for some reason — -exposes this through a higher-level API in the future). The -`SubscriptionManager` deletes the entry and clears `sid_to_client`. But the -caller's iterator is still awaiting `queue.get()`. -**Impact:** The iterator hangs forever because no sentinel is pushed. -Also: `_recv_loop` will silently drop subsequent messages for the -unsubscribed sid (dispatch.py:104-106 → `Message for unknown sid`). There's -no public unsubscribe today on `KalshiWebSocket`, so user-facing impact is -zero — but the `_stop` path *does* push sentinels (client.py:117-119) and -that's the only graceful-shutdown contract. If any future code calls -`sub_mgr.unsubscribe` while the connection is alive, the iterator on that -sub becomes a zombie. -**Evidence:** `channels.py:152-168` `unsubscribe()` does not push a sentinel -into `sub.queue`. -**Suggested fix:** In `unsubscribe`, after the server ack, call -`await sub.queue.put_sentinel()` before removing the entry. Same for -`update_subscription` when removing markets (`delete_markets`) so the -caller's iterator can know the market is gone. - -### F-P-09 — `_handle_seq_gap` clears only the first ticker's orderbook (severity: high) -**File:** `kalshi/ws/client.py:209-223` -**Scenario:** A single `orderbook_delta` subscription with multiple tickers -(e.g. `tickers=["A", "B"]`). The SDK allows this and the server returns one -sid for the multi-ticker sub. A gap is detected on this sid. -**Impact:** Only `tickers[0]` (e.g. `"A"`) is cleared from the orderbook -manager. Ticker `"B"`'s local book is left untouched and now diverges from -server truth. The next snapshot from the server (sent automatically on -resync? Actually — there's no automatic resync logic; the SDK relies on the -**server** to push a fresh snapshot after a gap, which is only guaranteed -on resubscribe). So `B`'s book stays corrupted until reconnect. -**Evidence:** -```python -# client.py:216-223 -sub = self._sub_mgr.get_subscription_by_sid(gap.sid) -if sub and sub.channel == "orderbook_delta": - tickers = sub.params.get("market_tickers", []) - if tickers and self._orderbook_mgr: - self._orderbook_mgr.remove(tickers[0]) # ← only first ticker! -``` -Worse: even if the SDK iterated all tickers and removed them all, the gap -hits a single sid that covers multiple books, but the gap doesn't tell us -*which* ticker had the missed update. Removing all books is the safe option -but is destructive. -**Suggested fix:** Iterate the full `tickers` list, not just `[0]`. Also -trigger an `update_subscription` with `send_initial_snapshot=True` (or -unsubscribe+resubscribe) so the server pushes fresh snapshots for all -affected books. Without that step, the docs' "next snapshot from the server -re-bootstraps it" is wishful thinking. - -### F-P-10 — `OrderbookManager.apply_delta` returns updated book but mutates Pydantic model in place (severity: medium) -**File:** `kalshi/ws/orderbook.py:56-96` -**Scenario:** Any delta application. -**Impact:** `book.yes` and `book.no` are lists on the `Orderbook` Pydantic -model. The code does `levels.pop`, `levels[idx] = ...`, `levels.append`, -`levels.sort` directly on these. If a consumer holds a reference to a -previously-returned `Orderbook` (e.g. snapshot of "state at delta N"), it -sees subsequent mutations leak in. The `_OrderbookIterator.__anext__` -returns `book = self._mgr.get(...)` which is the same shared instance — so -every yielded `Orderbook` is the same object, mutating across iterations. -**Evidence:** -```python -# orderbook.py:72-94 -levels = book.yes if side == "yes" else book.no -... -levels.pop(existing_idx) -levels[existing_idx] = OrderbookLevel(...) -levels.append(...) -levels.sort(...) -``` -The Pydantic model isn't frozen, so this mutation is allowed. -**Suggested fix:** Either (a) document that `Orderbook` yielded by -`orderbook()` is a live view (not a snapshot) and consumers must deep-copy -if they want to keep history, or (b) construct a fresh `Orderbook(ticker=..., -yes=list(new_yes), no=list(new_no))` per delta. Option (b) is -defensive-coding cost but matches the natural expectation. - -### F-P-11 — `on()` decorator + iterator on same channel: iterator silently never receives (severity: medium) -**File:** `kalshi/ws/client.py:384-398` + `kalshi/ws/dispatch.py:108-113` -**Scenario:** User registers `@ws.on("ticker")` AND calls -`await ws.subscribe_ticker(tickers=["T1"])`. -**Impact:** Dispatcher routes every ticker message to the callback (line -109-110) and the iterator's queue stays empty forever. The iterator hangs -on `queue.get()`. The docs do call this out ("Registering a callback for a -channel takes over routing…"), but there's no runtime warning. A user who -adds a callback after-the-fact has their iterator silently stop yielding. -**Evidence:** `dispatch.py:109-113` — the callback branch returns; -`sub.queue.put` is never called. -**Suggested fix:** When `register_callback(channel, ...)` is called and -there exists an active subscription on that channel with a non-empty queue -consumer, log a warning. Or document this as a hard one-or-the-other API. -The current behavior is documented but unsignalled. - -### F-P-12 — `_recv_loop` exception handler too broad: all errors are coerced to "log + continue" (severity: medium) -**File:** `kalshi/ws/client.py:204-207` -**Scenario:** A bug in user callback (raises any exception), a bug in -parsing (KeyError on dispatch), a `KalshiBackpressureError`, a -`KalshiSequenceGapError`, or any other unexpected condition. -**Impact:** All are collapsed into a single `logger.warning("Error -processing message: %s", e)` with `continue`. The user has no signal that -their callback crashed (no traceback shown — `logger.warning` doesn't pass -`exc_info=True`). Debugging is painful. A repeating callback bug would log -hundreds of warnings without indicating where it came from. The dispatcher -already swallows parse errors at `dispatch.py:91-95` with -`exc_info=True`, but the recv-loop-level catch hides everything beyond. -**Evidence:** -```python -# client.py:204-207 -except Exception as e: - logger.warning("Error processing message: %s", e) - continue -``` -**Suggested fix:** Add `exc_info=True` to the warning. Better: catch -`KalshiBackpressureError` and `KalshiSequenceGapError` explicitly and let -other exceptions propagate (or treat them as fatal — log error + push -sentinels + break). - -### F-P-13 — Error envelope on unrecognized `sid` is dropped silently (severity: medium) -**File:** `kalshi/ws/dispatch.py:76-83` -**Scenario:** Server sends an `error` envelope with a `sid` field pointing -to a sid the client doesn't know (e.g., post-unsubscribe race). -**Impact:** The `on_error` handler is called only if `msg_type == "error"` -at the top of dispatch. But control messages skip the sid-lookup branch -entirely, so `on_error` does fire — good. However, the AsyncAPI spec also -allows server-side `error` messages tied to a specific subscription's sid -(`ErrorPayload` carries `market_ticker` / `market_id` but not sid by -default). If the server sends an error with a sid in the envelope but the -type is something other than `"error"` (e.g. a malformed message), it falls -through to the unknown-type log. The asymmetry between top-level -`type=="error"` (handled) vs. error-as-payload-on-a-channel (ignored) isn't -documented. -**Evidence:** `dispatch.py:79-83` only routes to `on_error` when -`msg_type == "error"`. `dispatch.py:86-95` returns silently on unknown -types. -**Suggested fix:** Log unknown types at WARNING (currently warning ✓), and -document that server-side errors on a channel sid are not routed to -`on_error` — only top-level error envelopes are. - -### F-P-14 — `SequenceTracker._last_seq` leaks sids forever (severity: low) -**File:** `kalshi/ws/sequence.py:34, 77-83` -**Scenario:** Long-running session with many sids assigned over time -(reconnects, unsubscribes, re-subscribes — every iteration assigns a fresh -sid because the server uses a monotonic counter, e.g. fake server uses -`_next_sid`). -**Impact:** `_last_seq` grows unbounded. Memory leak. After 24h with many -subscription churns this is non-trivial for high-volume callers. `reset_all` -is called on full reconnect but not on `unsubscribe` (and `unsubscribe` -itself isn't exposed publicly today). -**Evidence:** No call to `seq_tracker.reset(sid)` from `unsubscribe`. The -gap-handler only resets the current sid. `reset_all` is only invoked from -the reconnect branch. -**Suggested fix:** When `SubscriptionManager.unsubscribe` succeeds, call -`self._seq_tracker.reset(server_sid)`. Same when a sid is replaced in -`resubscribe_all` (the old sid's entry is leaked because line 206 clears -`_sid_to_client` but doesn't touch `_seq_tracker._last_seq`). - -### F-P-15 — `MessageQueue._buffer` deque has no `maxlen`, defeating its purpose (severity: low) -**File:** `kalshi/ws/backpressure.py:41` -**Scenario:** Cosmetic / defensive code review. -**Impact:** None functionally — the manual `len(self._buffer) >= -self._maxsize` check before append enforces the bound. But -`collections.deque(maxlen=None)` is the same as no maxlen and only obscures -intent. A reader expects `maxlen=self._maxsize` and naturally trusts the -deque to enforce it. -**Evidence:** -```python -self._buffer: collections.deque[T | object] = collections.deque(maxlen=None) -``` -**Suggested fix:** Either set `maxlen=self._maxsize + 1` (allow one slot -for sentinel) with explicit drop logic, or drop the `maxlen=None` -declaration. Add a comment explaining why manual bounding is used (to fire -the ERROR overflow before append). - -### F-P-16 — `run_forever` returns immediately if no subscribe was made (silent no-op) (severity: low) -**File:** `kalshi/ws/client.py:400-403` -**Scenario:** User registers `@ws.on("ticker")` then `async with ws.connect()` -then immediately `await ws.run_forever()` — without calling `subscribe_*`. -**Impact:** `run_forever` checks `if self._recv_task:` but the recv task is -only started by `_ensure_recv_loop`, which is only called from -`_do_subscribe`. So `_recv_task is None` → `run_forever` returns -immediately. The callback never fires, the user gets no error. The test at -`tests/ws/test_client.py:264-271` even asserts this behavior. This is a -foot-gun: the docs imply "register a callback then `run_forever`" works. -**Evidence:** `client.py:400-403`: -```python -async def run_forever(self) -> None: - if self._recv_task: - await self._recv_task -``` -**Suggested fix:** `run_forever` should call `_ensure_recv_loop()` itself so -that callback-only consumers don't have to issue a dummy subscribe. Or -document that a callback-only path requires at least one `subscribe_*` call -to drive the recv loop. - ---- - -## Coverage gaps in existing tests - -The integration tests cover happy paths. Notably **not** covered: - -- Reconnect during an in-flight `subscribe_*` call. -- Multiple sids assigned to the same channel (multi-ticker orderbook_delta). -- Backpressure overflow on a sequenced channel (orderbook_delta + ERROR + slow consumer). -- `resubscribe_all` partial failure. -- Callback exception inside the recv loop. -- `unsubscribe` while iterator is held. -- Reconnect that exceeds `ws_max_retries` mid-stream (sentinel push). - -These tests would catch most of F-P-01 through F-P-08 above if added. diff --git a/.planning/audit-Q-tests.md b/.planning/audit-Q-tests.md deleted file mode 100644 index b140e99..0000000 --- a/.planning/audit-Q-tests.md +++ /dev/null @@ -1,123 +0,0 @@ -# Wave 5 Audit Q — Test Coverage Gaps - -Reviewed at commit `0a3fb23580b3497c1e004d1b79f783b2dda38e24` against the v1.1.0 release. - -## Summary - -Surface coverage at 1455 tests is genuinely strong: every public resource has a respx-mocked happy path plus an error path, sync and async transports mirror each other, and the contract/drift infrastructure is itself unit-tested (`tests/test_contract_support.py`). The biggest real gaps are concentrated in three areas. First, the **retry/backoff state machine** has good coverage of "does this status retry?" but no test exercises the safety properties that protect callers when the server misbehaves: `Retry-After` cap to `retry_max_delay`, HTTP-date `Retry-After` fallback, or `httpx.TimeoutException` retry on idempotent methods. Second, **pagination edge cases** miss the `max_pages` safety cap (cursor-loop detection is tested, but the bare numeric cap isn't) and `Page.to_dataframe`/`to_polars` only handle flat models — nested-model and Decimal-column behavior beyond the smoke test is unpinned. Third, **WebSocket** routing has a coverage hole around callback+queue collision on the same channel and "reconnect lost a sid mapping" scenarios beyond the happy resubscribe. Smaller issues: `KalshiConfig` trailing-slash stripping isn't tested, `KalshiConfig.extra_headers` is plumbed but never asserted, the `TypeError` fallback in `_to_decimal_dollars`/`_to_decimal_fp` is unreachable in tests, the recorder's non-JSON `body_kind="text"` branch is unexercised (HTML 502 bodies are the realistic case), and `KalshiAuth.from_key_path`'s `PermissionError` + passphrase-protected `TypeError` branches have no tests. - -## Findings - -### F-Q-01 — `Retry-After` is not capped to `retry_max_delay` in any test (severity: high) -**Coverage gap:** `_base_client.py:165-166` and `:278-279` apply `min(error.retry_after, config.retry_max_delay)` so a hostile/misconfigured server returning `Retry-After: 86400` cannot park the SDK for a day. `test_429_rate_limit` only asserts that `retry_after == 2.5` survives the error mapper; no test exercises the transport's clamp inside the retry loop. -**Why it matters:** A regression that drops the `min(...)` (or flips `retry_max_delay` argument order) would silently introduce unbounded sleeps in production. The whole point of `retry_max_delay` is to be a server-untrusted ceiling and that property is unverified. -**Affected code:** `kalshi/_base_client.py:163-179` (sync) and `kalshi/_base_client.py:277-292` (async). -**Suggested test(s):** Mock a `429` with `Retry-After: "9999"` and `retry_max_delay=0.05`, then a `200`. Patch `time.sleep`/`asyncio.sleep` with a recording stub. Assert the recorded delay equals `0.05`, not `9999`. Mirror in both sync and async. - -### F-Q-02 — HTTP-date `Retry-After` fallback to computed backoff is untested (severity: medium) -**Coverage gap:** `_map_error` catches `ValueError` on `float(retry_after)` and sets `retry_after_val = None` — the comment says "HTTP-date format, fall back to computed backoff." No test passes a date string ("Wed, 21 Oct 2026 07:28:00 GMT") and confirms the SDK still retries via `_compute_backoff` rather than crashing or skipping retry. -**Why it matters:** Most upstream gateways (Cloudflare, AWS ALB) emit HTTP-date Retry-After when stalling, not seconds. A regression that re-raised on `ValueError` would convert every rate-limit retry into a hard failure in front of those gateways. -**Affected code:** `kalshi/_base_client.py:57-66`. -**Suggested test(s):** Mock `429` with `Retry-After: "Wed, 21 Oct 2026 07:28:00 GMT"`. Assert: (a) `_map_error` returns `KalshiRateLimitError` with `retry_after is None`, (b) the transport still retries via exponential backoff and eventually gets the 200. - -### F-Q-03 — `httpx.TimeoutException` retry path is unexercised (severity: high) -**Coverage gap:** The transport catches `TimeoutException` separately from `HTTPError` and retries it (only for `RETRYABLE_METHODS`). `_base_client.py:135-142` (sync) and `:248-255` (async) are entirely uncovered. A search for `TimeoutException` in `tests/` finds zero hits. -**Why it matters:** Read timeouts are routine in real usage; the SDK ships a retry behavior for them with no regression test. A refactor that converted the second `except` into `raise` would silently break list/get paths on flaky networks, while POST/DELETE timeouts would suddenly trigger spurious retries (the safety check `method.upper() in RETRYABLE_METHODS` is also untested). -**Affected code:** `kalshi/_base_client.py:135-142`, `:248-255`. -**Suggested test(s):** Use `respx.mock` with `side_effect=httpx.TimeoutException("read timed out")` then a 200 — assert GET retries and succeeds; assert POST raises a wrapped `KalshiError` on the first timeout with `route.call_count == 1`. Mirror sync/async. - -### F-Q-04 — `_list_all` `max_pages` safety cap has no regression test (severity: medium) -**Coverage gap:** Cursor-loop detection is well tested (`tests/test_base_helpers.py:135-209`), but `max_pages: int = 1000` is also a safety net for the "server never repeats a cursor but never returns empty either" case — and no test passes a small `max_pages` to confirm the iterator terminates after exactly N pages. Note that `_list_all` is not currently called anywhere with a non-default `max_pages`, so users can't even pass a smaller value externally, but the cap is still load-bearing. -**Why it matters:** If the cursor-loop check is ever bypassed (e.g., server gives `cursor="A1", "A2", "A3", …` forever), only `max_pages` prevents an infinite generator. A refactor turning the `for _ in range(max_pages)` into `while page.cursor` would compile, ship, and hang. -**Affected code:** `kalshi/resources/_base.py:147-185` (sync), `:260-291` (async). -**Suggested test(s):** Mock an endpoint returning a fresh unique cursor on every call (`f"cursor-{counter}"`). Call `_list_all` with `max_pages=3`. Assert exactly 3 requests fired and 3 items yielded (no exception). Mirror sync/async. - -### F-Q-05 — `KalshiConfig` trailing-slash stripping is untested (severity: medium) -**Coverage gap:** `KalshiConfig.__post_init__` rstrips trailing slashes from `base_url` and `ws_base_url` — the comment says "trailing slash is stripped automatically … to prevent double-slash in auth signing paths." Auth signing strips trailing slash from the *path* (covered by `test_strips_trailing_slash`), but the config-level normalization that prevents the bug at construction time has no test. -**Why it matters:** A user passing `base_url="https://api.elections.kalshi.com/trade-api/v2/"` would otherwise produce double-slash paths and break RSA signing — exactly the failure mode the post-init is supposed to prevent. If `__post_init__` gets removed during a dataclass refactor, the regression slips through. -**Affected code:** `kalshi/config.py:39-44`. -**Suggested test(s):** Construct `KalshiConfig(base_url="https://x/y/")` and `KalshiConfig(ws_base_url="wss://x/y/")`. Assert both end without `/`. Add a transport-level integration test that constructs with a trailing slash and verifies a signed GET still hits `/y/markets` (no `//markets`). - -### F-Q-06 — `KalshiConfig.extra_headers` is plumbed but never asserted (severity: medium) -**Coverage gap:** `KalshiConfig.extra_headers: dict[str, str]` is passed to `httpx.Client(headers=...)` and `httpx.AsyncClient(headers=...)` in `_base_client.py:94` and `:205`. No test verifies that user-supplied extras (e.g., `{"X-Trace-Id": "abc"}`) are actually sent on each request, nor that they don't conflict with the auth headers added per-request. -**Why it matters:** Users who set custom headers for distributed tracing or routing have no regression coverage. A refactor that moved `headers=config.extra_headers` from the client constructor to per-request would silently break them if collision/precedence weren't preserved. -**Affected code:** `kalshi/config.py:35`, `kalshi/_base_client.py:94`, `kalshi/_base_client.py:205`. -**Suggested test(s):** Build a config with `extra_headers={"X-Trace-Id": "trace-1", "User-Agent": "kalshi-sdk/test"}`, fire a respx-mocked GET, and assert both headers were on the wire. Also assert per-request auth headers (`KALSHI-ACCESS-KEY`) are still present (verifies they don't get overwritten). - -### F-Q-07 — Callback + queue collision on the SAME channel is untested (severity: medium) -**Coverage gap:** `test_callback_and_iterator_coexist` (`tests/ws/test_integration.py:422`) covers callback-on-channel-A + iterator-on-channel-B, which is the easy case. The interesting case is: register a callback for `"ticker"` AND call `subscribe_ticker()` to get an iterator. Per `dispatch.py:108-113`, the callback wins and the iterator's queue receives nothing — but this isn't documented and isn't tested. Users who do both will silently see empty iterators. -**Why it matters:** This is a real-world footgun. The dispatcher's "check for callback first" routing means a stale registered callback can silently swallow all messages from a later `async for ... in stream` loop. Either the behavior should be tested as intentional, or a warning should fire — neither happens today. -**Affected code:** `kalshi/ws/dispatch.py:108-113`. -**Suggested test(s):** Register a `@session.on("ticker")` callback, then `await session.subscribe_ticker()`. Push a `ticker` frame. Assert the callback received it and the iterator's queue is empty (or, if behavior changes, vice versa). Pin the current routing decision so a refactor doesn't silently flip it. - -### F-Q-08 — Recorder's `body_kind="text"` branch is unexercised (severity: medium) -**Coverage gap:** `kalshi/testing/_fixtures.py:62-66` falls back to `body_kind = "text"` when `json.loads` raises (`ValueError`/`UnicodeDecodeError`), which is exactly what happens when a Cloudflare/ALB 502 returns `...`. Tests at `tests/test_mock_transport.py` only ever record JSON bodies (`{"exchange_active": True, ...}` etc.). The build_response path for `body_kind="text"` (`_fixtures.py:91-92`) likewise has zero coverage. -**Why it matters:** Recording a real session against the demo or production API will eventually capture an HTML error page. If the text branch silently corrupts the payload (e.g., latin-1 vs utf-8 mismatch, missing content-type preservation), the recording becomes a landmine that explodes the first time someone replays it. The transport claims to handle this; the tests don't prove it. -**Affected code:** `kalshi/testing/_fixtures.py:58-73` and `:84-99`. -**Suggested test(s):** Wire a `_make_stub_transport` that returns `httpx.Response(502, content=b"502 Bad Gateway", headers={"content-type": "text/html"})`. Record once, assert the JSON fixture has `"body_kind": "text"` and the body string round-trips. Replay and assert the client surfaces a `KalshiServerError` with the HTML body in the error message. - -### F-Q-09 — `KalshiAuth.from_key_path` `PermissionError` branch and passphrase-protected key `TypeError` branch are untested (severity: low) -**Coverage gap:** Two specific exception-handler branches in `auth.py` have no test: -- `auth.py:72-73`: `except PermissionError` when reading a key file the user lacks permission for. -- `auth.py:83-87`: `except TypeError` from `load_pem_private_key` when the key is passphrase-protected (the helpful "remove the passphrase" message). -**Why it matters:** Both are user-facing error messages with operational guidance. A refactor that lost the helpful messaging or re-raised the underlying cryptography error would degrade UX for the very users most likely to hit these paths (locked-down production deployments). Both branches are short and trivially testable. -**Affected code:** `kalshi/auth.py:72-73` (PermissionError); `kalshi/auth.py:83-87` (passphrase TypeError). -**Suggested test(s):** For PermissionError: `chmod 000` a tempfile, attempt `from_key_path`, assert the wrapped `KalshiAuthError` contains "Permission denied". For passphrase: generate an RSA key encrypted with `BestAvailableEncryption(b"pw")`, write to disk, call `from_key_path` / `from_pem`, assert error message includes "Passphrase-protected" and the openssl hint. - -### F-Q-10 — `_to_decimal_dollars` / `_to_decimal_fp` `TypeError` fallback is unreachable from tests (severity: low) -**Coverage gap:** `kalshi/types.py:27` raises `TypeError(f"Cannot convert {type(value).__name__} to Decimal")` for unexpected types (e.g., `list`, `dict`, `bool`, `None` when the field isn't `Optional`). Same mirror at `:60` for `FixedPointCount`. Search for that error string in `tests/` returns nothing. -**Why it matters:** Pydantic v2 will usually intercept type mismatches before this validator runs, but a model declared with a `DollarDecimal` field plus an upstream `BeforeValidator` that doesn't strictly type-check could pass a `bool` or `list` through. Without a regression test, the friendly error message could be silently replaced by a less helpful Pydantic stack trace. -**Affected code:** `kalshi/types.py:27`, `:60`. -**Suggested test(s):** Define a tiny test model `class M(BaseModel): x: DollarDecimal` and call `M.model_validate({"x": [1, 2]})`; assert the `ValidationError` wraps `TypeError("Cannot convert list to Decimal")`. Mirror for `FixedPointCount`. - -### F-Q-11 — `Page.to_dataframe` / `to_polars` don't pin nested-model serialization behavior (severity: medium) -**Coverage gap:** `test_page_dataframe.py` uses a flat `_Row` (str, Decimal, int). Real SDK pages return models with nested structures: `Market` has `OrderbookLevel` sub-objects when constructed locally; `Candlestick` has nested `OHLCBar`; `Multivariate` results have lists of dicts. `model_dump(mode="python")` on these produces nested dicts in the DataFrame column, which has DataFrame-engine-specific behavior (pandas: object column with dicts; polars: struct column). Neither is asserted. -**Why it matters:** Users who run `client.markets.list().to_dataframe()` and try to query a nested column will hit subtle behavior they need to know about. If a future refactor flips `mode="python"` to `mode="json"`, nested Decimals become strings and silently break `.sum()` on price columns. The smoke tests don't catch this — they assert Decimal stays Decimal, but only for top-level fields. -**Affected code:** `kalshi/models/common.py:43-54`, `:56-67`. -**Suggested test(s):** Add a `_NestedRow(BaseModel)` with a nested `_Inner` BaseModel field plus a `list[Decimal]` field. Assert `to_dataframe()` produces an object-dtype column containing the nested dict (not a string), `to_polars()` produces a struct/list column, and a top-level Decimal column still has Decimal values (not str). This pins the `mode="python"` contract that's only commented today. - -### F-Q-12 — `is_authenticated` property is asserted on the transport but not on the client facades (severity: low) -**Coverage gap:** `KalshiClient.is_authenticated` and `AsyncKalshiClient.is_authenticated` are public properties (`client.py:119`, `async_client.py:117`) but no test in `test_client.py`/`test_async_client.py` exercises them. Only `tests/integration/test_client_construction.py` uses them, and those tests skip without live credentials. -**Why it matters:** This is a documented public API surface (used in user code like `if client.is_authenticated: client.orders.list()`). A refactor that lost the property (e.g., during a constructor cleanup) wouldn't fail unit CI; it'd only fail the integration suite that runs nightly. -**Affected code:** `kalshi/client.py:119`, `kalshi/async_client.py:117`. -**Suggested test(s):** In `test_client.py`/`test_async_client.py`: assert `KalshiClient(auth=test_auth).is_authenticated is True` and `KalshiClient().is_authenticated is False`. Mirror for async. - -### F-Q-13 — WS timing-dependent assertions use bare `asyncio.sleep` rather than awaiting a deterministic signal (severity: low) -**Coverage gap:** Several WS tests use `await asyncio.sleep(0.1)` / `sleep(0.2)` / `sleep(0.3)` and then assert on a list (e.g., `test_callback_and_iterator_coexist`, `test_on_decorator_registers`, `test_on_error_called`, run_forever tests). On a loaded CI runner with GIL contention, these will be flaky. Counted 7+ instances in `tests/ws/test_client.py` and `tests/ws/test_integration.py`. -**Why it matters:** Slow CI = false-fail. The pattern `sleep then assert` is also pedagogically harmful: it teaches readers that the dispatcher is "eventually consistent" when it's actually deterministic on the recv loop. The fix is to await a deterministic completion event (e.g., an `asyncio.Event` set inside the callback). -**Affected code:** `tests/ws/test_client.py:212, 260, 297`; `tests/ws/test_integration.py:408, 464, 553, 582`. -**Suggested test(s):** Replace the sleep-then-assert pattern with an `asyncio.Event` that the callback `set()`s. `await asyncio.wait_for(event.wait(), timeout=2.0)` instead of `await asyncio.sleep(0.3)`. No flake, faster passes. - -### F-Q-14 — Server-initiated unsubscribe (`type: "unsubscribed"`) routing is unverified for sid cleanup (severity: medium) -**Coverage gap:** `dispatch.py:79-83` treats `"unsubscribed"` as a CONTROL_TYPE — it's logged and dropped, but `SubscriptionManager._sid_to_client` is NOT cleaned up when the server unilaterally unsubscribes a sid (e.g., due to an admin action or session expiry). Subsequent messages with that sid will silently hit `get_subscription_by_sid` → returns `None` → dispatcher logs "Message for unknown sid". No test exercises this. -**Why it matters:** This is a slow leak. If the server unilaterally drops 100 subscriptions over a long-running session, `_sid_to_client` keeps growing because only client-initiated `unsubscribe()` calls clean up (`channels.py:163-164`). The user-facing symptom is stale subscriptions sticking around in `active_subscriptions`. Either the dispatcher should reap on server-initiated unsubscribe, or this should be tested as intentional and documented. -**Affected code:** `kalshi/ws/dispatch.py:79-83`; `kalshi/ws/channels.py:230-235`. -**Suggested test(s):** Subscribe to `ticker`, snapshot `active_subscriptions`. Have the fake server send `{"type": "unsubscribed", "msg": {"sid": }}`. Assert: pin current behavior (subscription stays / leaks) OR assert the manager reaps it. Either way, the test forces a deliberate choice rather than the current silent gap. - -### F-Q-15 — Multiple sids for the same channel on the same connection is untested (severity: low) -**Coverage gap:** The audit prompt called this out specifically. The fake WS server (`tests/ws/conftest.py`) appears to assign sequential sids per subscribe call, but no test subscribes to `orderbook_delta` twice for two different ticker lists and verifies that messages for sid=1 vs sid=2 route to distinct queues. `test_two_channels_on_same_connection` covers two *different* channels (ticker + fill), not two subscriptions to the same channel. -**Why it matters:** Real usage: a user wants two orderbook subscriptions sharded by ticker. If `_sid_to_client` somehow collapsed both onto the second client_id (lookup bug, dict overwrite), one of the queues would silently never receive messages. The bug class is "shared map collision" and is exactly the kind of thing a unit test catches. -**Affected code:** `kalshi/ws/channels.py:103-150` (subscribe), `:230-235` (lookup). -**Suggested test(s):** Subscribe twice to `orderbook_delta` (or `ticker`) with different `market_tickers` params. Capture both `sub.server_sid` values, assert they differ. Push two messages with the two sids, read both iterators with `wait_for`, assert each gets exactly its own message. - -### F-Q-16 — `_join_tickers` test coverage doesn't pin behavior for non-string elements (severity: low) -**Coverage gap:** `_join_tickers` (`resources/_base.py:47-67`) validates empty and comma-containing strings. No test covers what happens when the caller passes `[1, 2, 3]` (ints) or `[True, "ABC"]` (bool) — the function would either coerce silently or raise a less-helpful error at the `"," in elem` step. The function's type signature is `list[str] | tuple[str, ...] | str | None`, so runtime type abuse is the caller's fault, but the failure mode isn't pinned. -**Why it matters:** Mostly a Python typing-hygiene issue. The current code does no `isinstance(elem, str)` check, so a `bool` element would crash at `"," in True` with a `TypeError: argument of type 'bool' is not iterable`. A `1` element would crash similarly. The error message is unhelpful — fix or pin. -**Affected code:** `kalshi/resources/_base.py:47-67`. -**Suggested test(s):** Either add `isinstance(elem, str)` validation and test it, OR pin the current crash behavior with `pytest.raises(TypeError, match="argument of type 'bool'")` so future readers know it's intentionally a duck-type failure. - -### F-Q-17 — `Page` cursor empty-string vs None equivalence is asserted on `has_next`, but not on `_list_all` continuation (severity: low) -**Coverage gap:** `test_has_next_false_empty` (`tests/test_pagination.py:35`) confirms `cursor=""` makes `has_next` False at the model level. But `_list_all` reads `page.cursor` directly: `if not page.cursor: break`. The `_list` factory normalizes cursor via `cursor if cursor else None` (`_base.py:145`). So an empty-string cursor in the response envelope becomes `None`, and `_list_all` terminates. This whole chain works, but no test starts from "server returns `cursor: \"\"`" and verifies `_list_all` stops cleanly after that page. The cursor-loop tests use `cursor: ""` only as the terminator in their HEALTHY case (`test_normal_pagination_does_not_trip`), but they don't explicitly assert "an empty-string cursor must not be treated as a sentinel of 'continue with empty cursor param'". -**Why it matters:** Borderline pinned. If `_list_all` were ever refactored to forward the cursor unconditionally (`current_params["cursor"] = page.cursor` without the `if not page.cursor: break` guard), an empty-string cursor would loop forever, hitting the same first page. Cursor-loop detection would eventually catch it, but only after `max_pages` of "same first page" — wasting bandwidth. A direct test of "cursor: '' → stop" makes the intent explicit. -**Affected code:** `kalshi/resources/_base.py:175-176`, `:282-283`. -**Suggested test(s):** Mock a single-page response with `{"items": [{"id": "a"}], "cursor": ""}` and explicitly assert `route.call_count == 1` after `list(_list_all(...))`. Already similar to existing tests but make the assertion explicit. - -### F-Q-18 — `_recv_loop` "reconnect failed" sentinel-broadcast path is untested (severity: medium) -**Coverage gap:** `ws/client.py:197-203`: when `_connection.reconnect()` raises in the recv loop, the code broadcasts a sentinel to every active queue so iterators don't hang forever, then breaks out of the loop. Search for this branch in tests returns nothing — `test_reconnect_max_retries_exceeded` (`tests/ws/test_connection.py:352`) tests the underlying connection manager's failure, but not the higher-level KalshiWebSocket consumer's "iterator gets sentinel and exits cleanly" promise. -**Why it matters:** If reconnect permanently fails (network partition, expired credentials), users running `async for msg in stream:` need the loop to exit, not hang. This is the "don't deadlock the user" property, and it's the kind of thing that breaks silently on refactor. The path also `break`s out of `_recv_loop` — if that `break` were removed, the loop would tight-loop on the dead connection. -**Affected code:** `kalshi/ws/client.py:197-203`. -**Suggested test(s):** Connect via `KalshiWebSocket`, subscribe to ticker, get an iterator. Cause the fake server to close uncleanly (forcing reconnect) AND configure `ws_max_retries=1` with the fake server permanently rejecting. Assert `async for msg in stream:` exits via `StopAsyncIteration` within a bounded timeout rather than hanging. - ---- - -End of audit Q. 18 findings total. diff --git a/.planning/audit-R-performance.md b/.planning/audit-R-performance.md deleted file mode 100644 index fa41834..0000000 --- a/.planning/audit-R-performance.md +++ /dev/null @@ -1,153 +0,0 @@ -# Wave 5 Audit R — Performance & Async - -Reviewed at commit `0a3fb23580b3497c1e004d1b79f783b2dda38e24` against the v1.1.0 release. - -## Summary - -The hot path is mostly tight. Sync/async are cleanly split — `time.sleep` only appears in `SyncTransport`, `asyncio.sleep` is used in both `AsyncTransport` and `ConnectionManager.reconnect`. The httpx Client/AsyncClient is constructed once per `KalshiClient` and reused (connection pool persists, no per-request DNS). The dispatcher lookup is `O(1)`. The two clearest wins are: (1) `MessageQueue.qsize()` does an `O(n)` linear scan over the deque on every call (~22 µs at 1000 items — measured); (2) `Page.to_dataframe()` does `model_dump(mode='python')` per row, which is unnecessary churn versus letting pandas/polars read attributes directly. The retry backoff math has one subtle issue: jitter is `random.uniform(0, 0.5)` (fixed magnitude, not proportional), so on the first attempt jitter dominates the base delay. There is also a duplicated `json.loads` in the WS recv loop, and `urlparse(base_url).path` is recomputed on every REST request when it could be cached at transport construction. - -## Findings - -### F-R-01 — `MessageQueue.qsize()` is O(n) linear scan (severity: medium) -**File:** `kalshi/ws/backpressure.py:80-82` -**Measurement:** -``` -qsize() with 1000 items: 22.474 us/call -len(buffer): 0.035 us/call -``` -**Impact:** Anyone polling `qsize()` from a monitoring loop or hot consumer pays 22 µs per call at maxsize=1000 — three orders of magnitude more than `len(self._buffer)`. The scan exists only to subtract the sentinel; at most one sentinel ever sits in the deque, and only at shutdown. -**Evidence:** `def qsize(self) -> int: return sum(1 for item in self._buffer if item is not _SENTINEL)` -**Suggested fix:** Track a counter incremented in `put` and decremented in `get`, or just return `len(self._buffer)` and subtract 1 when `self._closed and self._buffer and self._buffer[-1] is _SENTINEL`. Either approach is O(1). - -### F-R-02 — `MessageQueue` uses unbounded `deque(maxlen=None)` (severity: low) -**File:** `kalshi/ws/backpressure.py:41` -**Measurement:** qualitative, not benchmarked -**Impact:** The bound is enforced manually in `put()` via `len(self._buffer) >= self._maxsize`, which is fine — but the `deque` is constructed with `maxlen=None`, meaning if any external caller (or future code path) appends past `maxsize` without going through `put()`, the deque will grow unbounded. The whole point of `maxsize` is partially defeated by the choice. -**Evidence:** `self._buffer: collections.deque[T | object] = collections.deque(maxlen=None)` -**Suggested fix:** Use `collections.deque(maxlen=maxsize+1)` (the +1 reserves a slot for the sentinel). When `put()` would overflow, deque's ring-buffer behavior also enforces the cap as a defense-in-depth. Or just keep `maxlen=None` but document that all writes must go through `put()`. - -### F-R-03 — REST retry jitter is additive, not proportional (severity: low) -**File:** `kalshi/_base_client.py:73-76` -**Measurement:** qualitative, not benchmarked -**Impact:** `delay = config.retry_base_delay * (2**attempt) + random.uniform(0, 0.5)` adds a fixed-magnitude jitter regardless of the current backoff. At `retry_base_delay=0.5` and attempt 0 this gives a delay in `[0.5, 1.0]` — jitter is up to 100% of base. At attempt 5 (delay=16s) jitter is `<3%`. Standard practice (AWS "Full Jitter") is `random.uniform(0, base * 2**attempt)`, which spreads colliding retries evenly. With the current scheme, many clients that all hit a rate limit at the same time will retry within a 500ms window of each other on the first attempt. -**Evidence:** `delay = config.retry_base_delay * (2**attempt) + random.uniform(0, 0.5)` -**Suggested fix:** Replace with `random.uniform(0, config.retry_base_delay * (2**attempt))` (Full Jitter) or `delay/2 + random.uniform(0, delay/2)` (Equal Jitter). Cap with `retry_max_delay` as today. - -### F-R-04 — `urlparse(base_url).path` recomputed on every REST request (severity: low) -**File:** `kalshi/_base_client.py:113` and `:226` -**Measurement:** -``` -urlparse(base).path: 0.364 us/call -cached attr access: 0.013 us/call -``` -**Impact:** ~0.35 µs saved per request. Negligible on its own, but free — the base_url is immutable on `KalshiConfig` (frozen dataclass). -**Evidence:** `sign_path = urlparse(self._config.base_url).path + path` -**Suggested fix:** Cache the parsed path on the transport in `__init__`: `self._base_path = urlparse(config.base_url).path`. Then `sign_path = self._base_path + path`. - -### F-R-05 — Async `_list_all` paginates strictly serially (severity: low) -**File:** `kalshi/resources/_base.py:260-291` -**Measurement:** qualitative, not benchmarked -**Impact:** Cursor pagination is inherently sequential (next cursor comes from prior response), so this is not really a defect — but it's worth calling out that `async for x in client.markets.list_all()` does N round-trips of `RTT + parse` in series. For high-page-count endpoints (`/portfolio/fills` with full history), a parallel-prefetch strategy (kick off next page while caller consumes current page items) could overlap network with consumer processing. The current code yields each item, blocks for cursor fetch, yields the next batch. -**Evidence:** `for _ in range(max_pages): page = await self._list(...); for item in page.items: yield item; ...` -**Suggested fix:** Optional `prefetch=True` mode that schedules the next page fetch as an `asyncio.Task` while the consumer iterates the current page. Adds complexity; only worth doing if users report slow `list_all` over async. - -### F-R-06 — WS recv loop double-parses every JSON frame (severity: low) -**File:** `kalshi/ws/client.py:144-179` and `kalshi/ws/dispatch.py:70-71` -**Measurement:** -``` -json.loads small msg: 0.906 us/call -``` -**Impact:** `_recv_loop` calls `json.loads(raw)` to peek `sid`/`seq`/`type`, then passes `raw` (string) to `dispatcher.dispatch()` which calls `json.loads(raw)` *again*. Wasted ~0.9 µs per message — at 10000 msg/s that's ~0.9% of one CPU. Pure savings, no correctness change. -**Evidence:** -- `kalshi/ws/client.py:147` `data = json.loads(raw)` -- `kalshi/ws/dispatch.py:71` `data = json.loads(raw)` -**Suggested fix:** Have `_recv_loop` pass the already-parsed `data` dict to `dispatcher.dispatch()` (rename/overload signature). Or: do the snapshot/delta peek inside the dispatcher so the parse happens once. - -### F-R-07 — Orderbook `apply_delta` does linear scan over price levels (severity: medium) -**File:** `kalshi/ws/orderbook.py:74-94` -**Measurement:** -``` -apply_delta middle of 99 levels: 1.91 us/call -apply_delta worst case (end): 3.06 us/call -``` -**Impact:** Each delta walks the list to find the matching price. For ~100 levels per side, this is fine (~2-3 µs). For a deep book (thousands of levels — possible on certain Kalshi multivariate markets), this becomes O(n) per delta and the `levels.sort()` on insert is O(n log n). At HFT-style update rates this dominates. -**Evidence:** -```python -for i, level in enumerate(levels): - if level.price == price: ... -# ... -levels.append(...); levels.sort(key=lambda lv: lv.price) -``` -**Suggested fix:** Replace the side-as-list-of-levels with a `dict[Decimal, Decimal]` (price → quantity) internally; convert to sorted `OrderbookLevel` list only when returning from `get()`. O(1) update, O(n log n) only on read. Cleaner code too. - -### F-R-08 — `Page.to_dataframe()` builds intermediate dicts via `model_dump` (severity: medium) -**File:** `kalshi/models/common.py:43-67` -**Measurement:** qualitative — `Orderbook(180 levels) model_dump python: 28.48 us/call` (measured), per-row cost on a flat `Market` row will be smaller but still ~1-5 µs each -**Impact:** For a 5000-row `Page[Market]` the `[item.model_dump(mode='python') for item in self.items]` list comprehension allocates 5000 dicts (each with full keys), then pandas re-parses them into columnar storage. With pandas `DataFrame.from_records(items, columns=...)` or directly building from `model.__dict__` we'd avoid the dict roundtrip. The same applies to polars. -**Evidence:** -```python -records = [item.model_dump(mode="python") for item in self.items] -return pd.DataFrame(records) -``` -**Suggested fix:** For polars: `pl.from_dicts([m.__dict__ for m in self.items])` skips Pydantic's serializer. For pandas: same trick. Need to confirm nested models (e.g. nested settlements) flatten correctly — if not, document the perf tradeoff and ship a `to_dataframe(flatten=False)` knob. - -### F-R-09 — `RecordingTransport` re-reads and re-writes the entire fixture file per request (severity: medium) -**File:** `kalshi/testing/_recorder.py:34-44` and `:64-73` -**Measurement:** qualitative, not benchmarked -**Impact:** For each recorded request the transport does `load_pairs` (read + JSON parse of every prior pair) → append → `save_pairs` (re-serialize all pairs, write through `.tmp` rename). With N pairs to a single endpoint this is O(N²) over the recording session — for `/markets` with 1000 sequential calls, the last call re-parses 999 pairs and re-writes them. Recording is offline so this is bounded, but the file-rewrite-per-request pattern is also a hazard for slow disks. -**Evidence:** `pairs = load_pairs(...); pairs.append(...); save_pairs(...)` — load + save on every call. -**Suggested fix:** Hold an in-memory `dict[(method, path), list[pair]]` for the session, flush on `close()` or every N pairs. The docstring already warns "Recordings are expected to run sequentially," so a session-scoped buffer is safe. - -### F-R-10 — RSA-PSS signature computed on every retry attempt, even when only sleep changes (severity: low) -**File:** `kalshi/_base_client.py:117, 230` -**Measurement:** -``` -RSA-PSS sign: 0.786 ms/op -``` -**Impact:** ~0.8 ms per request for signing — that's the dominant non-network cost in the SDK. The retry loop *intentionally* re-signs each attempt (timestamp changes, prevents replay rejection), so this is correct, not a bug. But on a 5-attempt retry storm (3 retries + 2 burst), 4 ms is spent re-signing; on async this is sync-blocking the event loop (the `cryptography` library releases the GIL but still blocks the calling coroutine). -**Evidence:** `auth_headers = self._auth.sign_request(method.upper(), sign_path) if self._auth else {}` — inside the retry `for` loop. -**Suggested fix:** Document, don't change. Optionally: in `AsyncTransport`, run `sign_request` via `loop.run_in_executor(None, ...)` to keep the event loop responsive when many concurrent requests are signing simultaneously. Only worth it for high-fanout async workloads. - -### F-R-11 — `model_dump(mode='json')` vs `mode='python'` for request bodies is a wash (severity: low) -**File:** `kalshi/resources/orders.py:102, 116, 136, 181, 207` (and similar across all resources) -**Measurement:** -``` -mode=json: 0.87 us -mode=python: 0.86 us -ratio: 1.01x -``` -**Impact:** No measurable difference. The audit prompt suggested this might be a `2-5x` win — measurement says no, at least for `CreateOrderRequest`. The reason is that pydantic v2's Rust core does both modes equivalently fast when fields are plain strings/ints; the `mode='json'` only matters when a serializer differs between Python and JSON (e.g. `datetime` → ISO string). Since httpx accepts the dict and re-serializes to JSON itself, `mode='json'` here is essentially redundant but harmless. -**Evidence:** see benchmark output above -**Suggested fix:** None — keep `mode='json'` for explicit clarity (DollarDecimal serializes to string in both modes via `PlainSerializer`, which httpx needs). - -### F-R-12 — `Order.model_validate` is the per-response hot path; ~3 µs per row (severity: low) -**File:** `kalshi/resources/_base.py:143, 256` (list paths), `kalshi/resources/orders.py:356, 681` (single) -**Measurement:** -``` -Order.model_validate: 3.24 us/call -Order.model_validate_json: 3.75 us/call -``` -**Impact:** On a 1000-order `list_all` paginate, validation costs ~3.2 ms. `model_validate_json` (which would let pydantic-core parse JSON directly from bytes, skipping the intermediate `dict`) is actually slightly slower here because httpx already gave us a parsed dict. The current ordering — `response.json()` → `model.model_validate(...)` — is correct. -**Evidence:** see benchmark output above -**Suggested fix:** None. If we wanted to squeeze more, we'd thread raw bytes from httpx straight into `model_validate_json` and skip httpx's parse. Not worth the API contortion. - -### F-R-13 — `SubscriptionManager._wait_for_response` busy-waits inside `asyncio.wait_for` for non-matching frames (severity: low) -**File:** `kalshi/ws/channels.py:73-101` -**Measurement:** qualitative, not benchmarked -**Impact:** During subscribe, frames that arrive before the subscribe-ack (data frames already in-flight on the wire) are read, parsed, and *discarded*. If a subscribe is issued on a busy channel that's already streaming under a different sid, this can drop many real messages. The recv-loop pause logic in `_do_subscribe` is supposed to prevent this race for the *current* connection, but on `resubscribe_all` after reconnect, the server may already be pushing data for newly-resubscribed channels before we've read all the acks. -**Evidence:** `logger.debug("Discarding non-matching frame during subscribe: type=%s", data.get("type"))` -**Suggested fix:** If `data.get("type")` is a data message (not error/subscribed/ok), route it through the dispatcher rather than discard. Requires `_wait_for_response` to hold a reference to the dispatcher (cycle risk) — alternatively, push such frames onto a "stash" queue that the recv loop drains when it resumes. Correctness/perf tradeoff; current behavior may lose ordered messages on resubscribe. - -### F-R-14 — `SequenceTracker._last_seq` grows unbounded across reconnects (severity: low) -**File:** `kalshi/ws/sequence.py:34, 41-75` -**Measurement:** qualitative, not benchmarked -**Impact:** `reset_all()` is called on reconnect, which clears the dict — so in practice this is bounded by active subscription count. Verified clean. Including as a "checked and OK" so a future reader doesn't worry. -**Evidence:** `self._last_seq: dict[int, int] = {}` cleared via `reset_all()` in `_recv_loop` reconnect branch. -**Suggested fix:** None. - -### F-R-15 — httpx clients are constructed once per `KalshiClient`, no pool tuning (severity: low) -**File:** `kalshi/_base_client.py:91-96, 202-207` -**Measurement:** qualitative, not benchmarked — verified by code read -**Impact:** httpx defaults to 100 max connections, 20 keepalive, no HTTP/2. For a single-client app this is fine. For users doing burst-fanout (e.g. fetching 50 markets in parallel via `asyncio.gather`), 20 keepalive may force connection churn. Also: `http2=True` would let httpx multiplex requests on a single TCP/TLS connection to `api.elections.kalshi.com`, cutting handshake cost — but it also requires `h2` as an install extra and Kalshi's edge needs to support HTTP/2 (very likely, but unverified). -**Evidence:** `httpx.Client(base_url=..., timeout=..., headers=..., transport=...)` — no `limits=` or `http2=` argument. -**Suggested fix:** Expose `KalshiConfig.http2: bool = False` and `KalshiConfig.limits: httpx.Limits | None = None`. Default off for compat; power users can opt in. Document in the config docstring. diff --git a/.planning/v1.1-waves.md b/.planning/v1.1-waves.md deleted file mode 100644 index 4ceef67..0000000 --- a/.planning/v1.1-waves.md +++ /dev/null @@ -1,128 +0,0 @@ -# v1.1 Milestone — Parallel Execution Plan - -Tracking doc for parallel multi-agent execution of the v1.1 milestone. -This is local-only (not committed to repo). See GitHub Issues for source of truth. - -Generated: 2026-05-16 - ---- - -## Wave 1 — Six agents in parallel (no shared files) — ✅ MERGED - -All six PRs merged to `main` via squash on 2026-05-16. Main advanced from `055cd9a` → -`3996473`. Worktrees and local branches cleaned up. Issues #16, #47, #48, #54, #55 -auto-closed via `Closes #N`; #14 stayed open as the documentation umbrella (scaffold -done; content fill tracked at #57). - -| Agent | Issue | Branch | Status | Commit | Result | -|---|---|---|---|---|---| -| A | [#16](https://github.com/TexasCoding/kalshi-python-sdk/issues/16) Weekly spec sync CI | `feat/issue-16-spec-sync-ci` | DONE | `fb4df0f` | `.github/workflows/spec-sync.yml` (+151) — cron Mon 06:00 UTC, uses `scripts/sync_spec.py` + `scripts/generate.py`, opens/updates PR via `peter-evans/create-pull-request@v7` | -| B | [#55](https://github.com/TexasCoding/kalshi-python-sdk/issues/55) Nightly integration CI | `feat/issue-55-integration-ci` | DONE | `8ad6d68` | `.github/workflows/integration-nightly.yml` (+98) — cron 03:00 UTC, secrets-gated, dedupes failure issues by title. **Originally committed to main by accident; relocated to feature branch and main reset.** | -| C | [#54](https://github.com/TexasCoding/kalshi-python-sdk/issues/54) Constructor-variant integration tests | `feat/issue-54-constructor-tests` | DONE | `62b4d2e` | `tests/integration/test_client_construction.py` (+127) — 5 ctor variants, skips cleanly without secrets | -| D | [#48](https://github.com/TexasCoding/kalshi-python-sdk/issues/48) Standardize `list_all` iteration idiom | `chore/issue-48-iteration-idiom` | DONE | `872adb2` | `test_events.py` + `test_multivariate.py` (+12/-9). `test_series.py` had no `list_all` tests. Added `# noqa: SIM113` on manual counters. | -| E | [#47](https://github.com/TexasCoding/kalshi-python-sdk/issues/47) Async `batch_cancel` via shared helper | `fix/issue-47-async-delete-with-body` | DONE | `2ef8b9e` | `kalshi/resources/_base.py` (+28), `kalshi/resources/orders.py` (+2/-7), `tests/test_async_orders.py` (+51). Helper symmetric on Sync/AsyncResource. | -| F | [#14](https://github.com/TexasCoding/kalshi-python-sdk/issues/14) Docs scaffold (MkDocs + mkdocstrings) | `docs/issue-14-mkdocs-scaffold` | DONE | `bda1dae` | 12 files (+473) — `mkdocs.yml`, full Getting Started + Auth pages, stubs elsewhere, `docs.yml` GH Pages workflow, `pyproject.toml` docs extra. **#14 left open**; follow-up issue **#57** created for content fill. | - -### Surprise during execution - -Agent B (#55) wrote its workflow file to the main repo path and committed to `main` instead of its declared worktree. Recovery: - -1. Created `feat/issue-55-integration-ci` at the orphan commit `8ad6d68`. -2. `git reset --hard 055cd9a` on main (commit was local-only, not pushed). -3. Renamed the other five `worktree-agent-*` branches to canonical `feat/`, `fix/`, `chore/`, `docs/` names per the original plan. - -The work itself was correct; only the placement was wrong. - -### File-overlap matrix (Wave 1) - -| | A | B | C | D | E | F | -|-------|---|---|---|---|---|---| -| A #16 | — | ✓ workflows/ different files | ✓ no overlap | ✓ no overlap | ✓ no overlap | ✓ no overlap | -| B #55 | | — | ✓ no overlap | ✓ no overlap | ✓ no overlap | ✓ no overlap | -| C #54 | | | — | ✓ different test files | ✓ no overlap | ✓ no overlap | -| D #48 | | | | — | ✓ no overlap | ✓ no overlap | -| E #47 | | | | | — | ✓ no overlap | -| F #14 | | | | | | — | - -No two agents touch the same file. - ---- - -## Wave 2 — Four agents in parallel (after Wave 1 merges) — ✅ MERGED - -All four PRs merged to `main` via squash on 2026-05-16. Main advanced -`3996473` → `7a540e3`. Issues #12, #13, #49, #51, #52 auto-closed via -`Closes #N`. GitNexus index refreshed (5,563 nodes / 12,059 edges). -Follow-up issue **#68** opened for the `wire_normalization` ExclusionKind split. - -All four branches off `main` at `3996473`. Awaiting integration review (not pushed). - -| Agent | Issue(s) | Branch | Commit | Result | -|---|---|---|---|---| -| G | [#51](https://github.com/TexasCoding/kalshi-python-sdk/issues/51) + [#52](https://github.com/TexasCoding/kalshi-python-sdk/issues/52) | `fix/typed-exclusion-kind-nested-drift` | `5358fbd` | `tests/_contract_support.py` + `tests/test_contracts.py` (+263/−42). Typed `Exclusion.kind: Literal`, all 47 EXCLUSIONS classified, nested-body drift recursion. Kept `TickerPair.extra="allow"` — used in both request and response (LookupPoint echoes provider keys); existing pin test documents the carve-out. | -| H | [#49](https://github.com/TexasCoding/kalshi-python-sdk/issues/49) | `fix/issue-49-auth-guards` | `fbac7a1` | `kalshi/resources/markets.py` + 2 tests (+14/−1). Spec walk found only `markets.orderbook` (sync + async) missing the guard. Flagged: `exchange.user_data_timestamp` has a guard but spec says no auth — left untouched per "don't remove existing guards" rule. Worth a follow-up. | -| I | [#13](https://github.com/TexasCoding/kalshi-python-sdk/issues/13) | `feat/mock-transport-issue-13` | `b3e7db2` | new `kalshi/testing/` module (4 files), extends `KalshiClient`/`AsyncKalshiClient`/`SyncTransport`/`AsyncTransport` with optional `transport=` kwarg. Recording + Replay transports for both sync/async; fingerprint ignores signature/timestamp headers. 9 new tests. Total +632/−14. | -| J | [#12](https://github.com/TexasCoding/kalshi-python-sdk/issues/12) | `feat/issue-12-dataframes` | `6af7cd1` | `kalshi/models/common.py` + `pyproject.toml` + `tests/test_page_dataframe.py` (+199/−1). `Page.to_dataframe()` / `.to_polars()` with lazy imports, optional extras `kalshi-sdk[pandas]` / `[polars]`. 6 new tests. **Note:** agent skipped `gitnexus_impact` on `Page` (purely additive — no signature edits — so zero blast radius for existing callers, but the gate was bypassed). | - -### Wave 2 execution surprises - -- **Agent I's worktree was auto-cleaned**: its branch `feat/mock-transport-issue-13` ended up checked out in the **main repo's** working tree instead of in a worktree. Recovery: `git checkout main` in the main repo. Commit `b3e7db2` is intact. -- **Agent J never committed**: it modified files but left them unstaged in its worktree. I committed manually with `Closes #12` and renamed the branch from the auto-generated `worktree-agent-…` name. Agent J also briefly edited files in the main repo by absolute path before realizing the harness was resetting CWD; it reverted main repo, no damage. -- **Agents H and J both ended up on auto-generated `worktree-agent-…` branch names** — renamed both to canonical `fix/`/`feat/` names manually. - ---- - -## Wave 3 — Sequential (broad signature sweeps; rebase chain) - -| Order | Issue | Branch | Status | -|---|---|---|---| -| 1 | [#56](https://github.com/TexasCoding/kalshi-python-sdk/issues/56) model-first request API | merged `d057c42` (#69) | ✅ | -| 2 | [#50](https://github.com/TexasCoding/kalshi-python-sdk/issues/50) `Literal` enum kwargs | merged `dd72fea` (#70) | ✅ | -| 3 | [#46](https://github.com/TexasCoding/kalshi-python-sdk/issues/46) sync/async dedup | merged `9de0cee` (#71) | ✅ | - ---- - -## Wave 4 — Follow-ups from earlier reviews — ✅ MERGED - -All three PRs merged to `main`. Issues #57, #68, #72 closed. - -| Agent | Issue | Squash commit | -|---|---|---| -| K | [#57](https://github.com/TexasCoding/kalshi-python-sdk/issues/57) MkDocs content guides | `e24c8fe` (#73) | -| L | [#68](https://github.com/TexasCoding/kalshi-python-sdk/issues/68) split `wire_normalization` ExclusionKind | `c224ec8` (#74) | -| M | [#72](https://github.com/TexasCoding/kalshi-python-sdk/issues/72) async `lookup_tickers` guard | `1afa5a2` (#75) | - -## Wave 5 — Post-v1.1.0 audit swarm — ✅ DONE - -Five read-only audits produced **79 findings**, consolidated into **30 GitHub -issues** (#77–#106). Audit reports in `.planning/audit-{N,O,P,Q,R}-*.md`. - -| Agent | Focus | Findings | Issues | -|---|---|---|---| -| N | Architecture & code quality | 17 (1 H, 7 M, 9 L) | bundled into #87–#91 + #106 | -| O | Security audit | 13 (1 H, 5 M, 3 L, 4 info) | #92–#96 | -| P | WebSocket deep-dive | 16 (6 H, 5 M, 4 L) | #77–#86 | -| Q | Test coverage | 18 (2 H, 9 M, 7 L) | #97–#102 | -| R | Performance & async | 15 (4 M, 11 L) | #103–#106 | - -First-attempt agents `octo:droids:octo-code-reviewer`, -`octo:droids:octo-security-auditor`, and `octo:droids:octo-performance-engineer` -all failed (rejected, hung in "let me actually execute" loop, hallucinated a -report respectively). `general-purpose` agents completed reliably. **Don't use -octo droids for audit work in this environment.** - -## Deferred / blocked - -| Issue | Why | Unblocks when | -|---|---|---| -| [#45](https://github.com/TexasCoding/kalshi-python-sdk/issues/45) verify `json={}` prod | Needs production credentials | Prod API key acquired | -| [#53](https://github.com/TexasCoding/kalshi-python-sdk/issues/53) nested `$ref` resolver | Premature — no nested refs in spec yet | Spec introduces nested `$ref` in body schema | - ---- - -## Conventions - -- Each agent commits with `Closes #N` so the issue auto-closes on merge. -- Each agent runs `uv run ruff check .`, `uv run mypy kalshi/`, and `uv run pytest tests/ -v` (scope-appropriate) before declaring done. -- No agent pushes or opens a PR — user reviews local worktree branches before integration. -- Agents do NOT touch files outside their declared scope. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..fdea50e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,37 @@ +# Code of Conduct + +This project follows the **[Contributor Covenant v2.1](https://www.contributor-covenant.org/version/2/1/code_of_conduct/)**. + +The Contributor Covenant is a widely-adopted code of conduct used across +the Python and broader open-source ecosystem. The canonical text is at +the link above and is the authoritative reference. + +## Scope + +This Code of Conduct applies within all project spaces — the GitHub +repository (issues, pull requests, discussions, code review), the +documentation site, and any other forum maintained by the project. It +also applies when an individual is officially representing the project +in public spaces. + +## Reporting + +To report a Code of Conduct concern, contact a project maintainer +directly via their GitHub profile (DM or the email listed there). Avoid +public issues — they're indexed and can amplify the harm to the +reporter. + +GitHub's Private Vulnerability Reporting is **not** the right channel +for CoC reports: it's wired into security-advisory tooling (CVSS, +advisory drafts, security alerts) and will confuse both reporter and +maintainer workflows. Reserve it for actual vulnerabilities per +[`SECURITY.md`](SECURITY.md). + +Reports are read by maintainers only and handled per the enforcement +guidelines in the linked Covenant. + +## Enforcement + +Maintainers will follow the **Enforcement Guidelines** section of the +linked Covenant when responding to reports. Possible responses range +from a private correction to a permanent ban, scaled to severity. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..b898ac7 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,97 @@ +# Contributing + +Thanks for considering a contribution. This project is a Kalshi API SDK +and benefits from contributions that improve correctness, coverage, +performance, and developer experience. + +## Before you start + +- **Open an issue first** for anything non-trivial (new resource, public + API change, behavior change). A 5-minute discussion saves a 50-line + rewrite at PR time. +- **Search existing issues** — many ideas are already tracked. +- Read `CLAUDE.md` (root) for project conventions. They apply equally to + human and AI contributors. + +## Dev setup + +```bash +git clone https://github.com/TexasCoding/kalshi-python-sdk +cd kalshi-python-sdk +uv sync +uv run pytest tests/ --ignore=tests/integration -v +uv run mypy kalshi/ +uv run ruff check . +``` + +Python 3.12+ required. `uv` is the package manager. + +## Project conventions + +These are enforced by reviewers and CI: + +- **Simplicity first.** Minimum code that solves the problem. No + speculative abstractions or features beyond what was asked. +- **Surgical changes.** Touch only what you need. Don't refactor adjacent + code in the same PR. +- **Type-safe.** `mypy --strict` clean. Public methods are typed end-to-end. +- **No comments referencing the current task or PR number** in + production code. Those belong in the PR description and commit + message — they rot in code as the codebase evolves. +- **`*Request` body models use `extra="forbid"`.** Phantom kwargs fail + at call time, not silently on the wire. +- **Response models use `extra="allow"`.** Enforced by + `tests/test_model_extra_policy.py`. +- **POST and DELETE are never retried.** Duplicate-order risk. + +See `CLAUDE.md` for the full list, including the price/`_dollars` alias +convention, RSA-PSS signing payload format, and the spec-drift test +framework. + +## Tests + +Three suites: + +- **Unit** — `uv run pytest tests/ --ignore=tests/integration -q`. Must + pass on every PR. No network access; uses `respx` to mock httpx. +- **Integration** — `uv run pytest tests/integration/ -q`. Hits the + Kalshi demo API. Requires `KALSHI_KEY_ID` + `KALSHI_PRIVATE_KEY_PATH` + env vars. Skipped automatically if creds are absent. +- **Contract drift** — `tests/test_contracts.py` parametrizes over + every endpoint in `METHOD_ENDPOINT_MAP`. Hard-fails CI if the SDK + signature drifts from the OpenAPI spec without an explicit exclusion. + +If your change adds a new endpoint, register it in +`tests/_contract_support.py` `METHOD_ENDPOINT_MAP`. + +## PR checklist + +- [ ] Tests added or updated (regression test for bug fixes; happy + + error + edge cases for new features). +- [ ] `uv run ruff check .` clean. +- [ ] `uv run mypy kalshi/` clean. +- [ ] `uv run pytest tests/ --ignore=tests/integration -q` passes. +- [ ] PR title is a concise summary; PR body explains the *why*. +- [ ] Commit messages reference issues via `Closes #N` or `Refs #N` so + they auto-close on merge. +- [ ] For breaking changes: `CHANGELOG.md` entry under + `[Unreleased] → Breaking`, plus a note in `docs/migration.md`. + +## Review cycle + +Most PRs receive an automated code review from Claude (the +`claude-review` check). This is **advisory, not required** — it fails +by design on Dependabot PRs and on PRs that modify the review workflow +itself. When it does run, please address the substantive feedback or +reply with a brief justification for skipping it. Maintainers +squash-merge after the required CI checks are green and review is +addressed. + +## Reporting bugs / requesting features + +Use the GitHub issue templates. For security issues, see `SECURITY.md`. + +## License + +By contributing, you agree that your contributions are licensed under +the project's MIT License (see `LICENSE`). diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..8022594 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,75 @@ +# Security Policy + +## Supported versions + +We follow [semantic versioning](https://semver.org/). Security fixes land +on the latest minor release of the most recent major version. Older +majors receive fixes for **critical** issues at maintainer discretion. + +| Version | Supported | +|---------|-----------| +| 2.x | ✅ | +| 1.x | ❌ (please upgrade — see [docs/migration.md](docs/migration.md)) | +| < 1.0 | ❌ | + +## Reporting a vulnerability + +**Please do not open a public issue for security vulnerabilities.** + +Use GitHub's [Private Vulnerability Reporting](https://github.com/TexasCoding/kalshi-python-sdk/security/advisories/new) +to disclose privately. Reports go directly to maintainers and are not +visible until a coordinated disclosure. + +When reporting, please include: + +- A description of the issue and its potential impact. +- Steps to reproduce (minimal proof-of-concept preferred). +- Affected versions and environment (Python version, OS). +- Suggested mitigation, if known. + +### Response timeline + +We aim for: + +- **Acknowledgement** within 72 hours. +- **Triage + severity assessment** within 7 days. +- **Fix or mitigation plan** within 30 days for high/critical issues. + +Reporters who follow this process and request credit will be named in +the CHANGELOG entry for the fix release, unless they prefer anonymity. + +## Scope + +In scope: + +- Credential or PII leakage from `KalshiError` / log output. +- Unsafe deserialization or remote code execution paths. +- Auth-bypass / signing-bypass in `kalshi.auth`. +- Supply-chain compromise vectors in the build, release, or + spec-sync pipelines. + +Out of scope: + +- Vulnerabilities in upstream dependencies — please report those to the + affected project. We track upstream CVEs via Dependabot security + updates and `pip-audit`. +- Issues in the Kalshi API itself — please contact + [Kalshi support](https://kalshi.com/support). +- DoS via legitimate API rate limits (the SDK respects `Retry-After`). + +## Security measures in this repo + +- Secret scanning + push protection enabled on this repository. +- Dependabot version updates (weekly) + security updates (CVE-triggered). +- Nightly `pip-audit` workflow against the resolved dev environment. +- `release.yml` uses [PyPI Trusted Publishers](https://docs.pypi.org/trusted-publishers/) + with [sigstore attestations](https://docs.pypi.org/attestations/) — + no API tokens, no manual upload step. +- Spec-sync workflow runs with `contents: read` only; it cannot push, + open PRs, or execute upstream-derived Python. +- Third-party Actions SHA-pinned in workflows that hold elevated + permissions (release, spec-sync, claude review). +- RSA-PSS request signing with timestamp; signatures rejected if + clock-skew exceeds the API's tolerance. + +If you spot a gap in these controls, please report via the channel above. diff --git a/scripts/audit_demo_feasibility.py b/scripts/audit_demo_feasibility.py deleted file mode 100644 index a2bf6b1..0000000 --- a/scripts/audit_demo_feasibility.py +++ /dev/null @@ -1,243 +0,0 @@ -"""Path B demo-feasibility audit for v0.10-v0.13 endpoints. - -Probes every REST endpoint in the spec that is NOT yet in -``tests/_contract_support.METHOD_ENDPOINT_MAP`` against the Kalshi demo server -and classifies each as ``demo-supported`` / ``demo-501`` / ``auth-gated``. - -Classification rules (applied to the HTTP status returned by demo): - 200/204 → demo-supported (happy path responds) - 400/404/422 → demo-supported (route exists; our probe payload was - intentionally minimal, a validation error is expected) - 401/403 → auth-gated (endpoint exists but demo account lacks - permission; real creds may unlock it) - 405 → method-not-allowed (spec/server disagreement) - 501 → demo-501 (demo refuses to implement this one) - 5xx → transient (retried once; if still failing, flagged) - -Output: a grouped table printed to stdout. Run with ``uv run python -scripts/audit_demo_feasibility.py``. - -This script is read-only against demo: every probe uses either GET/DELETE -with a placeholder ID (which cannot match a real resource) or POST/PUT with -an empty body (which will 400 before creating anything). Nothing is mutated. -""" - -from __future__ import annotations - -import os -import re -import sys -import time -from collections import defaultdict -from pathlib import Path -from typing import Any - -import yaml - -ROOT = Path(__file__).resolve().parent.parent -sys.path.insert(0, str(ROOT)) - -# Bridge .env-style KALSHI_DEMO_* to KALSHI_* before importing the client. -try: - from dotenv import load_dotenv - - load_dotenv(ROOT / ".env") -except ImportError: - pass - -if os.environ.get("KALSHI_DEMO_KEY_ID") and not os.environ.get("KALSHI_KEY_ID"): - os.environ["KALSHI_KEY_ID"] = os.environ["KALSHI_DEMO_KEY_ID"] -if os.environ.get("KALSHI_DEMO_PRIVATE_KEY_PATH") and not os.environ.get( - "KALSHI_PRIVATE_KEY_PATH" -): - os.environ["KALSHI_PRIVATE_KEY_PATH"] = os.environ["KALSHI_DEMO_PRIVATE_KEY_PATH"] -os.environ.setdefault("KALSHI_DEMO", "true") - -from kalshi.client import KalshiClient # noqa: E402 -from kalshi.errors import ( # noqa: E402 - KalshiAuthError, - KalshiError, - KalshiNotFoundError, - KalshiRateLimitError, - KalshiServerError, - KalshiValidationError, -) - -PLACEHOLDER = "AUDIT-NONEXISTENT-ID" - - -def load_spec_endpoints() -> set[tuple[str, str]]: - with open(ROOT / "specs" / "openapi.yaml") as f: - spec = yaml.safe_load(f) - out: set[tuple[str, str]] = set() - for path, methods in spec["paths"].items(): - for m, _op in methods.items(): - if m in ("get", "post", "put", "delete", "patch"): - out.add((m.upper(), path)) - return out - - -def load_covered_endpoints() -> set[tuple[str, str]]: - src = (ROOT / "tests" / "_contract_support.py").read_text() - blocks = re.findall( - r"MethodEndpointEntry\([^)]*?(?:\([^)]*?\)[^)]*?)*\)", src, re.DOTALL - ) - covered: set[tuple[str, str]] = set() - for b in blocks: - m = re.search(r'http_method="([A-Z]+)"', b) - p = re.search( - r'path_template=(?:"([^"]+)"|\(\s*"([^"]+)"\s*\))', b - ) - if m and p: - covered.add((m.group(1), p.group(1) or p.group(2))) - return covered - - -def substitute_path(path: str) -> str: - return re.sub(r"\{[^}]+\}", PLACEHOLDER, path) - - -def classify(status: int) -> str: - if 200 <= status < 300: - return "demo-supported (2xx)" - if status in (400, 422): - return "demo-supported (4xx validation)" - if status == 404: - return "demo-supported (404 — route exists)" - if status in (401, 403): - return "auth-gated" - if status == 405: - return "method-not-allowed (spec/server drift)" - if status == 501: - return "demo-501" - if status == 429: - return "rate-limited" - if 500 <= status < 600: - return f"demo-5xx (status {status})" - return f"unknown-{status}" - - -def probe_one(client: KalshiClient, method: str, path_template: str) -> tuple[int, str]: - """Return (status_code, error_class_name) for one probe.""" - concrete_path = substitute_path(path_template) - body: dict[str, Any] | None = {} if method in ("POST", "PUT", "PATCH") else None - params: dict[str, Any] | None = None - # a handful of GETs require query params up front; send none and accept a 400 - try: - resp = client._transport.request( - method, concrete_path, params=params, json=body - ) - return (resp.status_code, "OK") - except KalshiNotFoundError as e: - return (e.status_code or 404, "KalshiNotFoundError") - except KalshiValidationError as e: - return (e.status_code or 400, "KalshiValidationError") - except KalshiAuthError as e: - return (e.status_code or 401, "KalshiAuthError") - except KalshiRateLimitError as e: - return (e.status_code or 429, "KalshiRateLimitError") - except KalshiServerError as e: - return (e.status_code or 500, "KalshiServerError") - except KalshiError as e: - # Generic KalshiError: check if it carries a status_code - sc = getattr(e, "status_code", None) or 0 - return (sc, type(e).__name__) - - -def phase_for(path: str) -> str: - """Map endpoint to its TODOS phase.""" - if path.startswith("/portfolio/order_groups"): - return "v0.10.0" - if path.startswith("/communications/"): - return "v0.11.0 (RFQ)" - if path.startswith("/portfolio/subaccounts"): - return "v0.11.0 (Subaccounts)" - if path.startswith("/api_keys"): - return "v0.12.0 (API Keys)" - if path.startswith("/markets/candlesticks") or path == "/markets/orderbooks": - return "v0.12.0 (Bulk/Batch)" - if path.startswith("/markets/trades"): - return "v0.12.0 (Bulk/Batch)" - if path.startswith("/milestones"): - return "v0.12.0 (Milestones)" - if path.startswith("/live_data"): - return "v0.12.0 (Milestones/live_data)" - if path.startswith("/fcm/"): - return "v0.13.0 (FCM)" - if path.startswith("/structured_targets"): - return "v0.13.0 (Structured Targets)" - if path.startswith("/search/") or path == "/account/limits": - return "v0.13.0 (Search/Account)" - if path == "/incentive_programs": - return "v0.13.0 (Incentives)" - if path == "/exchange/user_data_timestamp": - return "v0.13.0 (Exchange)" - if path.startswith("/portfolio/summary"): - return "v0.13.0 (Portfolio Summary)" - return "v0.13.0 (other)" - - -def main() -> None: - if not os.environ.get("KALSHI_KEY_ID"): - print("ERROR: KALSHI_KEY_ID not set (check .env)", file=sys.stderr) - sys.exit(1) - - spec = load_spec_endpoints() - covered = load_covered_endpoints() - uncovered = sorted(spec - covered) - print(f"Spec endpoints: {len(spec)}") - print(f"Covered by METHOD_ENDPOINT_MAP: {len(covered)}") - print(f"Uncovered (audit targets): {len(uncovered)}\n") - - client = KalshiClient.from_env() - # Safety gate - if "demo" not in client._config.base_url: - print(f"ABORT: base_url is {client._config.base_url}, refusing to audit", file=sys.stderr) - sys.exit(2) - print(f"Probing {client._config.base_url}\n") - - results: list[tuple[str, str, str, int, str, str]] = [] # phase,method,path,status,class,err - for i, (method, path) in enumerate(uncovered, 1): - status, err = probe_one(client, method, path) - klass = classify(status) - ph = phase_for(path) - results.append((ph, method, path, status, klass, err)) - print(f"[{i:>2}/{len(uncovered)}] {method:<6} {path:<60} → {status} {klass}") - time.sleep(0.1) # gentle on demo - - client.close() - - # Summary grouped by phase - print("\n" + "=" * 96) - print("SUMMARY BY PHASE") - print("=" * 96) - by_phase: dict[str, list[tuple[str, str, int, str, str]]] = defaultdict(list) - for ph, m, p, s, k, e in results: - by_phase[ph].append((m, p, s, k, e)) - - for ph in sorted(by_phase): - print(f"\n## {ph}") - for m, p, s, k, _e in by_phase[ph]: - print(f" {m:<6} {p:<58} {s:>3} {k}") - - # Top-level classification counts - print("\n" + "=" * 96) - print("TOTALS") - print("=" * 96) - counts: dict[str, int] = defaultdict(int) - for _ph, _m, _p, _s, k, _e in results: - # bucket into the three TODOS categories - if k.startswith("demo-supported"): - counts["demo-supported"] += 1 - elif k == "auth-gated": - counts["auth-gated"] += 1 - elif k == "demo-501": - counts["demo-501"] += 1 - else: - counts[f"other ({k})"] += 1 - for k, n in sorted(counts.items(), key=lambda x: -x[1]): - print(f" {k:<40} {n}") - - -if __name__ == "__main__": - main()