tensorzero
diff --git a/‎.github/workflows/ci-failure-diagnosis.yml‎
Lines changed: 1 addition & 8 deletions b/‎.github/workflows/ci-failure-diagnosis.yml‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎.github/workflows/provide-pull-request-feedback.yml‎
Lines changed: 8 additions & 31 deletions b/‎.github/workflows/provide-pull-request-feedback.yml‎
Lines changed: 8 additions & 31 deletions
diff --git a/‎.gitignore‎
Lines changed: 12 additions & 0 deletions b/‎.gitignore‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎.prettierignore‎
Lines changed: 7 additions & 0 deletions b/‎.prettierignore‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 105 additions & 0 deletions b/‎README.md‎
Lines changed: 105 additions & 0 deletions
@@ -6,12 +6,6 @@ on:
     types:
       - completed
 
-# Workflow-level permissions (needed for Tailscale action to work)
-permissions:
-  contents: write
-  pull-requests: write
-  actions: read
-
 jobs:
   generate-patch:
     if: ${{ github.event.workflow_run.conclusion == 'failure' }}
@@ -49,14 +43,13 @@ jobs:
 
       - name: Generate patch
         id: generate
-        uses: tensorzero/experimental-ci-bot/generate-pr-patch@viraj/pr-only
+        uses: tensorzero/experimental-ci-bot/generate-pr-patch@main
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
           mode: patch-only
           tensorzero-base-url: http://localhost:3000
-          tensorzero-diff-patched-successfully-metric-name: tensorzero_github_ci_bot_diff_patched_successfully
           output-artifacts-dir: debug-logs
           clickhouse-url: ${{ secrets.CI_BOT_CLICKHOUSE_URL }}
           clickhouse-table: GitHubBotPullRequestToInferenceMap
 
@@ -14,42 +14,19 @@ jobs:
     runs-on: ubuntu-latest
     name: Create PR Feedback
     steps:
-      - name: Checkout TensorZero config file
-        uses: actions/checkout@v5
+      - name: Connect to Tailscale
+        uses: tailscale/github-action@v4
         with:
-          repository: tensorzero/experimental-ci-bot
-          sparse-checkout: |
-            tensorzero
-
-      - name: Start TensorZero gateway
-        run: |
-          docker pull tensorzero/gateway:latest
-          docker run -d --rm \
-            --name tensorzero-gateway \
-            -e TENSORZERO_CLICKHOUSE_URL=${{ secrets.CI_BOT_CLICKHOUSE_URL }} \
-            -e OPENAI_API_KEY=${{ secrets.CI_BOT_OPENAI_API_KEY }} \
-            -p 3000:3000 \
-            --volume ./tensorzero:/action-config \
-            tensorzero/gateway:latest --config-file /action-config/tensorzero.toml
-
-          for _i in {1..100}; do
-            curl -fsS http://localhost:3000/health && exit 0
-            sleep 3
-          done
-          echo "Gateway never became ready" >&2
-          exit 1
+          oauth-client-id: ${{ secrets.CI_BOT_TS_OAUTH_CLIENT_ID }}
+          oauth-secret: ${{ secrets.CI_BOT_TS_OAUTH_CLIENT_SECRET }}
+          tags: tag:ci
 
       - name: Send PR Feedback
         # TODO: currently pinned to miniswe-agent branch; switch back to main when ready.
-        uses: tensorzero/experimental-ci-bot/create-pr-feedback@viraj/pr-only
+        uses: tensorzero/experimental-ci-bot/create-pr-feedback@main
         with:
-          tensorzero-base-url: http://localhost:3000
+          tensorzero-base-url: http://ci-bot-gateway:3000
           # TODO: Switch to tensorzero_github_ci_bot_agent_pr_merged for episode-level feedback when agent creates PRs
-          tensorzero-pr-merged-metric-name: tensorzero_github_ci_bot_pr_merged
+          tensorzero-pr-merged-metric-name: ci_fix_pr_merged_agent
           clickhouse-url: ${{ secrets.CI_BOT_CLICKHOUSE_URL }}
           clickhouse-table: GitHubBotPullRequestToInferenceMap
-
-      - name: Stop TensorZero gateway
-        if: always()
-        run: docker stop tensorzero-gateway
-        continue-on-error: true
@@ -1,6 +1,7 @@
 # Dependency directory
 node_modules
 
+
 # Rest pulled from https://github.com/github/gitignore/blob/master/Node.gitignore
 # Logs
 logs
@@ -100,3 +101,14 @@ __tests__/runner/*
 # IDE files
 .idea
 *.code-workspace
+
+# Python
+.venv
+venv
+env
+__pycache__
+*.py[cod]
+*$py.class
+*.so
+.Python
+.envrc
@@ -3,3 +3,10 @@
 dist/
 node_modules/
 coverage/
+
+# Python
+.venv/
+venv/
+env/
+__pycache__/
+*.pyc
@@ -6,6 +6,111 @@
 [![CodeQL](https://github.com/actions/typescript-action/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/actions/typescript-action/actions/workflows/codeql-analysis.yml)
 [![Coverage](./badges/coverage.svg)](./badges/coverage.svg)
 
+## Running Locally
+
+You can run the mini-swe-agent locally to test PRs before deploying to GitHub
+Actions.
+
+### Prerequisites
+
+1. Install dependencies:
+
+   ```bash
+   npm install
+   npm run bundle  # Build the CLI
+   ```
+
+1. Set up required environment variables:
+
+   ```bash
+   # GitHub authentication (choose one):
+   export GITHUB_TOKEN=$(gh auth token)  # If using gh CLI
+   # OR
+   export GITHUB_TOKEN=ghp_your_token_here
+
+   # Model API keys (at least one required):
+   export ANTHROPIC_API_KEY=your_anthropic_key
+   # OR
+   export OPENAI_API_KEY=your_openai_key
+   ```
+
+### Usage
+
+#### Dry Run (Local Testing)
+
+Test the agent without creating PRs or comments on GitHub:
+
+```bash
+npm run cli -- --repo owner/repo --pr 123 --dry-run
+```
+
+This will:
+
+- Clone the PR repository
+- Run the mini-swe-agent to analyze and fix issues
+- Display the generated patch locally
+- Not make any changes to GitHub
+
+#### Live Mode (Create PRs/Comments)
+
+Run the agent and create actual PRs or inline comments on GitHub:
+
+```bash
+npm run cli -- --repo owner/repo --pr 456
+```
+
+This will:
+
+- Clone the PR repository
+- Run the mini-swe-agent
+- Create a follow-up PR or post inline comments based on the agent's decision
+
+#### With CI Failure Context
+
+If you have a specific workflow run that failed, you can provide its ID:
+
+```bash
+npm run cli -- --repo owner/repo --pr 789 --workflow-run-id 12345
+```
+
+### CLI Options
+
+```text
+-r, --repo <owner/repo>          Repository in "owner/repo" format
+-p, --pr <number>                Pull request number (required)
+-d, --dry-run                    Show patch locally without PRs/comments
+-t, --token <token>              GitHub token (default: GITHUB_TOKEN or gh)
+-w, --workflow-run-id <id>       Workflow run ID for failure logs
+-o, --output-dir <path>          Directory for debug artifacts
+--clickhouse-url <url>           ClickHouse URL for tracking
+--clickhouse-table <name>        ClickHouse table name
+-c, --cost-limit <dollars>       Cost limit (default: 3.0)
+--timeout <minutes>              Timeout in minutes (default: 30)
+-h, --help                       Show help message
+```
+
+### Examples
+
+```bash
+# Dry run on a public repository
+npm run cli -- --repo tensorzero/tensorzero --pr 100 --dry-run
+
+# Run on your own repository with custom settings
+export GITHUB_TOKEN=$(gh auth token)
+npm run cli -- \
+  --repo myorg/myrepo \
+  --pr 42 \
+  --cost-limit 5.0 \
+  --timeout 45 \
+  --output-dir ./debug-output
+
+# Analyze a specific failed workflow run
+npm run cli -- \
+  --repo owner/repo \
+  --pr 123 \
+  --workflow-run-id 9876543210
+```
+
 ## Developing
 
 - `npm install`