Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions .github/workflows/_ci-gate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
# Filter name convention (caller defines these in `filters:` input):
#
# nix -> gates `nix_validate`
# markdown -> gates `markdown_lint` and `file_size`
# markdown -> gates `markdown_lint`, `file_size`, and `token_limits`
# python -> gates `python_security`
#
# Callers may include additional filters; this workflow ignores them. To add
Expand Down Expand Up @@ -85,6 +85,13 @@ on:
description: Enable `File Size` (gated on `nix` OR `markdown` filter)
type: boolean
default: false
token_limits:
description: >-
Enable `Token Limits` (gated on `markdown` filter). Budgets AI-read
docs via tiktoken per .token-limits.yaml; pairs with `file_size`,
which skips token-gated files. No secret required.
type: boolean
default: false
python_security:
description: Enable `Python Security` (gated on `python` filter)
type: boolean
Expand Down Expand Up @@ -167,6 +174,14 @@ jobs:
with:
runner_label: ${{ inputs.runner_label }}

token-limits:
name: Token Limits
needs: changes
if: ${{ inputs.token_limits && needs.changes.outputs.markdown == 'true' }}
uses: dryvist/.github/.github/workflows/_token-limits.yml@main
with:
runner_label: ${{ inputs.runner_label }}

python-security:
name: Python Security
needs: changes
Expand Down Expand Up @@ -218,7 +233,7 @@ jobs:
# ============================================================================
gate:
name: Merge Gate
needs: [changes, watchdog, nix-validate, markdown-lint, file-size, python-security]
needs: [changes, watchdog, nix-validate, markdown-lint, file-size, token-limits, python-security]
if: ${{ always() && !cancelled() }}
runs-on: ${{ inputs.runner_label }}
steps:
Expand All @@ -227,5 +242,5 @@ jobs:
with:
# `watchdog` is always-success-on-completion; treating it as
# allowed-skip lets `alls-green` ignore its result either way.
allowed-skips: nix-validate, markdown-lint, file-size, python-security, watchdog
allowed-skips: nix-validate, markdown-lint, file-size, token-limits, python-security, watchdog
jobs: ${{ toJSON(needs) }}
18 changes: 18 additions & 0 deletions .github/workflows/_file-size.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@
# scan: [.md, .nix] # replaces default scan list
# extended: { limit: 32768, files: [AGENTS] } # additive higher limit
# exempt: [RUNBOOK] # additive to default [CHANGELOG]
#
# Token partition: when a .token-limits.yaml is present, Markdown (.md) docs are
# token-gated (governed by _token-limits.yml) and dropped from this byte gate's
# scan, so every file is checked by exactly one gate. Repos without a
# .token-limits.yaml are unaffected. (.token-limits.yaml should budget all .md —
# e.g. a '*.md' catch-all — or exclude any it intentionally leaves ungated.)
name: _file-size

on:
Expand Down Expand Up @@ -70,6 +76,18 @@ jobs:
[ -n "$cfg_exempt" ] && EXEMPT="$EXEMPT$cfg_exempt "
fi

# When a token gate is active (.token-limits.yaml present), Markdown
# docs are governed by _token-limits.yml. Drop .md from this byte
# gate's scan so each file is checked by exactly one gate.
if [ -f ".token-limits.yaml" ]; then
new_scan=""
for ext in $DEFAULT_SCAN; do
[ "$ext" = ".md" ] && continue
new_scan="$new_scan $ext"
done
DEFAULT_SCAN="$new_scan"
fi

# Build find name arguments from scan extensions
name_args=(); first=true
for ext in $DEFAULT_SCAN; do
Expand Down
58 changes: 58 additions & 0 deletions .github/workflows/_token-limits.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Reusable: Token Limit Check
#
# Token-budgets AI-read docs (the prose files an agent loads for background)
# using the public, open-source `tiktoken` tokenizer — NO API key, NO secret.
# Per-repo config in `.token-limits.yaml` (see scripts/check-token-limits.py).
#
# Pairs with `_file-size.yml`: a file is "token-gated" iff it matches a
# `limits` pattern, and the byte gate skips token-gated files — so every file
# is governed by exactly one gate. Repos with no `.token-limits.yaml` get a
# no-op here and keep the byte gate's original behavior.
name: _token-limits

on:
workflow_call:
inputs:
runner_label:
description: >-
GitHub Actions runner label. Defaults to ubuntu-latest. Pass a
RunsOn label to opt the calling repo into self-hosted runners.
type: string
required: false
default: ubuntu-latest

permissions: {}

concurrency:
group: token-limits-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
check:
name: Check
runs-on: ${{ inputs.runner_label }}
permissions:
contents: read
steps:
- name: Checkout caller repo
uses: actions/checkout@v6

- name: Sparse-checkout the shared token-counter from this repo
uses: actions/checkout@v6
with:
repository: dryvist/.github
ref: main
path: .gh-shared
sparse-checkout: scripts/check-token-limits.py
sparse-checkout-cone-mode: false

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install tiktoken
run: pip install --quiet tiktoken pyyaml

- name: Check token limits
run: python3 .gh-shared/scripts/check-token-limits.py
50 changes: 50 additions & 0 deletions scripts/check-token-limits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python3
"""Fail if a token-gated file exceeds its .token-limits.yaml budget.

Counts with the public, offline tiktoken tokenizer (no API key). First matching
`limits` glob wins (list specific patterns first); `exclude` globs are skipped.
Pairs with the byte file-size gate, which drops .md when this config is present.
"""
import fnmatch
import os
import sys

import tiktoken
import yaml

cfg = yaml.safe_load(open(".token-limits.yaml")) if os.path.exists(".token-limits.yaml") else {}
cfg = cfg if isinstance(cfg, dict) else {}
limits = cfg.get("limits")
limits = limits if isinstance(limits, dict) else {}
exclude = cfg.get("exclude")
exclude = exclude if isinstance(exclude, list) else []
if not limits:
sys.exit(0)

enc = tiktoken.get_encoding("o200k_base")
SKIP = {".git", "node_modules", "result", ".terraform", ".terragrunt-cache", ".direnv", ".gh-shared"}


def hit(path, name, pat):
return fnmatch.fnmatch(path, pat) or fnmatch.fnmatch(name, pat)


errors = 0
for root, dirs, files in os.walk("."):
dirs[:] = [d for d in dirs if d not in SKIP]
for name in files:
path = os.path.relpath(os.path.join(root, name), ".")
if any(hit(path, name, e) for e in exclude):
continue
lim = next((v for p, v in limits.items() if isinstance(v, int) and hit(path, name, p)), None)
if lim is None:
continue
try:
tokens = len(enc.encode(open(os.path.join(root, name), encoding="utf-8").read()))
except (UnicodeDecodeError, OSError):
continue
if tokens > lim:
print(f"::error file={path}::{path} is {tokens} tokens (exceeds {lim})")
errors += 1

sys.exit(1 if errors else 0)