darkresearch · edgarpavlovsky · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/.ai-rules.md b/.ai-rules.md
diff --git a/.claude-plugin/commands/fireteam.md b/.claude-plugin/commands/fireteam.md
@@ -0,0 +1,65 @@
+# /fireteam
+
+Multi-phase autonomous task execution with complexity-based routing.
+
+## Usage
+
+```
+/fireteam <goal>
+```
+
+## Configuration
+
+Set these environment variables to configure fireteam behavior:
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `ANTHROPIC_API_KEY` | (required) | API key for Claude |
+| `FIRETEAM_MAX_ITERATIONS` | (none/infinite) | Maximum loop iterations. Leave unset for infinite. |
+| `FIRETEAM_LOG_LEVEL` | INFO | Logging verbosity (DEBUG, INFO, WARNING, ERROR) |
+
+## Examples
+
+```
+/fireteam Fix the authentication bug in auth.py
+/fireteam Refactor the user module to use dependency injection
+/fireteam Add comprehensive tests for the payment service
+```
+
+## How It Works
+
+1. **Complexity Estimation**: Analyzes your goal and estimates complexity (TRIVIAL, SIMPLE, MODERATE, COMPLEX)
+2. **Mode Selection**: Routes to appropriate execution strategy:
+   - TRIVIAL/SIMPLE → SINGLE_TURN (one-shot execution)
+   - MODERATE → Execute → Review loop until >95% complete
+   - COMPLEX → Plan → Execute → 3 Parallel Reviews loop until 2/3 majority says >95%
+3. **Loop Until Complete**: MODERATE and FULL modes loop continuously until the task is complete or max_iterations is reached (if set)
+
+## Configuration via Code
+
+When using fireteam as a library:
+
+```python
+from fireteam import execute, ExecutionMode
+
+# Infinite iterations (default)
+result = await execute(
+    project_dir="/path/to/project",
+    goal="Implement feature X",
+)
+
+# Limited iterations
+result = await execute(
+    project_dir="/path/to/project",
+    goal="Implement feature X",
+    max_iterations=10,  # Stop after 10 iterations if not complete
+)
+
+# Force a specific mode
+result = await execute(
+    project_dir="/path/to/project",
+    goal="Implement feature X",
+    mode=ExecutionMode.FULL,
+    max_iterations=5,
+)
+```
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
@@ -0,0 +1,7 @@
+{
+  "name": "fireteam",
+  "version": "0.1.0",
+  "description": "Multi-phase autonomous task execution with complexity estimation, planning, execution, and review",
+  "commands": ["commands/fireteam.md"],
+  "hooks": "hooks/hooks.json"
+}
diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md
@@ -0,0 +1,43 @@
+# Fireteam Agent Principles
+
+These principles are automatically loaded by the Claude Agent SDK and guide all fireteam operations.
+
+## Testing
+
+- Write tests as you implement (not as an afterthought)
+- Run tests after every code change
+- Don't consider a task complete until tests pass
+- If tests fail, fix them before moving on
+
+## Quality Gates
+
+- All CI checks must pass locally before completion
+- Run linting, type checking, and tests before considering work done
+- If any quality check fails, address it immediately
+
+## Progress Checkpoints
+
+- After significant progress, step back and reassess
+- Ask yourself: How are we doing? What's left? Is this more complex than expected?
+- Update your todo list to reflect current understanding
+- If the task has grown beyond the original estimate, flag it for re-evaluation
+
+## Escalation
+
+- If stuck after 3 attempts on the same issue, consider a different approach
+- If a task turns out to be more complex than estimated, communicate this
+- Don't silently struggle - surface blockers early
+
+## Code Quality
+
+- Write clean, readable code with clear intent
+- Follow existing patterns in the codebase
+- Add comments only where the logic isn't self-evident
+- Don't over-engineer - solve the problem at hand
+
+## Minimal Changes
+
+- Make the smallest change that solves the problem
+- Don't refactor unrelated code
+- Don't add features that weren't requested
+- Keep diffs focused and reviewable
diff --git a/.cursorrules b/.cursorrules
diff --git a/.env.example b/.env.example
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -4,88 +4,76 @@ on:
   pull_request:
     branches: [ main ]
   push:
-    branches: [ main ]  # Only run on direct pushes to main
+    branches: [ main ]
 
 jobs:
   fast-tests:
     name: Fast Tests (Unit + Lightweight)
     runs-on: ubuntu-latest
-    
+
     steps:
     - uses: actions/checkout@v4
-    
+
     - name: Set up Python 3.12
       uses: actions/setup-python@v5
       with:
         python-version: '3.12'
-    
+
     - name: Install uv
       run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    
+
     - name: Create virtual environment
       run: uv venv
-    
+
     - name: Install dependencies
       run: |
         source .venv/bin/activate
-        uv pip install -r requirements.txt
-    
-    - name: Run all fast tests
+        uv pip install -e ".[dev]"
+
+    - name: Run unit tests
       run: |
         source .venv/bin/activate
-        pytest tests/ -m "not slow and not e2e and not integration" -v --tb=short
+        pytest tests/ -v --tb=short
 
   e2e-tests:
     name: End-to-End Tests (API)
     runs-on: ubuntu-latest
-    timeout-minutes: 20  # Fail fast if tests hang
-    # Run on main branch and e/* branches for testing
+    timeout-minutes: 20
     if: |
-      github.ref == 'refs/heads/main' || 
+      github.ref == 'refs/heads/main' ||
       startsWith(github.ref, 'refs/heads/e/') ||
       startsWith(github.head_ref, 'e/')
-    
+
     steps:
     - uses: actions/checkout@v4
-
-    - name: Set up Node.js
-      uses: actions/setup-node@v4
-      with:
-        node-version: '20'
-
-    - name: Install Claude CLI
-      run: |
-        npm install -g @anthropic-ai/claude-code
-        echo "Claude CLI installed at: $(which claude)"
-        claude --version
-
+
     - name: Set up Python 3.12
       uses: actions/setup-python@v5
       with:
         python-version: '3.12'
-    
+
     - name: Install uv
       run: curl -LsSf https://astral.sh/uv/install.sh | sh
-    
+
     - name: Create virtual environment
       run: uv venv
-    
+
     - name: Install dependencies
       run: |
         source .venv/bin/activate
-        uv pip install -r requirements.txt
-    
-    - name: Run E2E tests
-      timeout-minutes: 15  # Per-step timeout
+        uv pip install -e ".[dev]"
+
+    - name: Run integration tests
+      timeout-minutes: 15
       env:
         ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-        PYTHONUNBUFFERED: "1"  # Force immediate output
+        PYTHONUNBUFFERED: "1"
       run: |
         source .venv/bin/activate
-        echo "Starting e2e tests at $(date)"
-        pytest tests/ -m "e2e" -v --tb=short -s --log-cli-level=INFO
-        echo "E2E tests completed at $(date)"
-    
+        echo "Starting integration tests at $(date)"
+        pytest tests/ --run-integration -v --tb=short -s
+        echo "Integration tests completed at $(date)"
+
     - name: Upload logs on failure
       if: failure()
       uses: actions/upload-artifact@v4
@@ -95,48 +83,3 @@ jobs:
           /tmp/fireteam-test-*/
           tests/**/*.log
         retention-days: 7
-
-  integration-tests:
-    name: Terminal-bench Integration
-    runs-on: ubuntu-latest
-    # Temporarily disabled - needs debugging
-    if: false
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Set up Python 3.12
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.12'
-
-    - name: Set up Docker
-      uses: docker/setup-buildx-action@v3
-
-    - name: Install uv
-      run: curl -LsSf https://astral.sh/uv/install.sh | sh
-
-    - name: Install terminal-bench
-      run: uv tool install terminal-bench
-
-    - name: Create virtual environment
-      run: uv venv
-
-    - name: Install dependencies
-      run: |
-        source .venv/bin/activate
-        uv pip install -r requirements.txt
-
-    - name: Install Fireteam adapter
-      run: |
-        source .venv/bin/activate
-        cd benchmark
-        uv pip install -e .
-
-    - name: Run terminal-bench integration test
-      env:
-        ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
-      run: |
-        source .venv/bin/activate
-        pytest tests/ -m "integration" -v --tb=short
-