braintrustdata · Olmo Maldonado (ibolmo) · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/.envrc b/.envrc
@@ -0,0 +1 @@
+dotenv_if_exists
diff --git a/.github/workflows/check-dist.yml b/.github/workflows/check-dist.yml
@@ -38,7 +38,7 @@ jobs:
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
       - name: Get pnpm store directory
         shell: bash

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -29,7 +29,7 @@ jobs:
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
       - name: Get pnpm store directory
         shell: bash

diff --git a/.github/workflows/eval-py-uv.yml b/.github/workflows/eval-py-uv.yml
@@ -35,6 +35,8 @@ jobs:
           cd test-eval-py
           uv lock --check
           uv sync --no-dev
+          echo "VIRTUAL_ENV=$(pwd)/.venv" >> $GITHUB_ENV
+          echo "$(pwd)/.venv/bin" >> $GITHUB_PATH
 
       - name: Run Evals
         uses: ./

diff --git a/.github/workflows/eval-single.yaml b/.github/workflows/eval-single.yaml
@@ -30,7 +30,7 @@ jobs:
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
       - name: Get pnpm store directory
         shell: bash

diff --git a/.github/workflows/eval.yml b/.github/workflows/eval.yml
@@ -30,7 +30,7 @@ jobs:
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
       - name: Get pnpm store directory
         shell: bash
@@ -55,7 +55,6 @@ jobs:
         with:
           api_key: ${{ secrets.BRAINTRUST_API_KEY }}
           root: test-eval
-          runtime: node
 
       # - name: Start terminal session
       #   uses: mxschmitt/action-tmate@v3

diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
@@ -33,7 +33,7 @@ jobs:
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
       - name: Get pnpm store directory
         shell: bash

diff --git a/.husky/pre-commit b/.husky/pre-commit
@@ -0,0 +1,6 @@
+cd eval
+npm run format:check
+npm run lint
+npm run package
+cd ..
+git add eval/dist/
diff --git a/README.md b/README.md
@@ -1,88 +1,159 @@
 # Braintrust eval action
 
-This project enables you to run [Braintrust evals](braintrust.dev) as part of
-your CI/CD workflow in Github, using
-[Github actions](https://github.com/features/actions). To use this action,
-simply include the following step in an action file:
+This project enables you to run [Braintrust evals](https://braintrust.dev) as
+part of your CI/CD workflow in GitHub, using
+[GitHub Actions](https://github.com/features/actions). To use this action,
+include the following step in an action file:
 
 ```yaml
 - name: Run Evals
   uses: braintrustdata/eval-action@v1
   with:
     api_key: ${{ secrets.BRAINTRUST_API_KEY }}
-    runtime: node
 ```
 
-You can configure the following variables:
+## Inputs
 
-- `api_key`: Your
+- `api_key` (**required**): Your
   [Braintrust API key](https://www.braintrust.dev/app/settings/api-keys).
-- `root`: The root directory containing your evals (defaults to `'.'`). The root
-  directory must either have `node` or `python` configured.
-- `paths`: Specific paths, relative to the root, containing evals you'd like to
-  run.
-- `runtime`: Either `node` or `python`
-- `package_manager`: Either `npm`, `pnpm`, or `yarn` for a `node` runtime, or
-  `pip` or `uv` for a `python` runtime.
-- `use_proxy`: Either `true` or `false`. If set, `OPENAI_BASE_URL` will be set
-  to `https://braintrustproxy.com/v1`, which will automatically cache repetitive
-  LLM calls and run your evals faster. Defaults to `true`.
-- `terminate_on_failure`: Either `true` or `false`. If set to `true`, the
-  evaluation process will stop when an error occurs. Defaults to `false`.
-
-## Full example
+- `root`: The root directory containing your evals (defaults to `'.'`).
+- `paths`: Specific paths or glob patterns, relative to the root, to include.
+  Defaults to `'.'` (auto-discovers `*.eval.ts`, `*.eval.js`, `eval_*.py`,
+  etc.).
+- `runner`: The eval runner binary to use (e.g. `tsx`, `vite-node`, `bun`,
+  `python3`). When omitted, `bt` auto-detects from file extensions.
+- `use_proxy`: If `true`, sets `OPENAI_BASE_URL` to the Braintrust proxy to
+  cache LLM calls. Defaults to `true`.
+- `terminate_on_failure`: If `true`, stops on the first eval error. Defaults to
+  `false`.
+- `bt_version`: Version of the `bt` CLI to install. Leave empty for the latest
+  stable release. Pass a semver (e.g. `'0.2.0'`) to pin a version, or a release
+  tag (e.g. `'canary-add-glob-support'`) to install a canary build.
+
+> [!NOTE]
+> The `runtime` and `package_manager` inputs are deprecated and have no effect.
+> Use `runner` instead.
+
+## Full examples
+
+### Node.js (pnpm)
 
 ```yaml
-name: Run pnpm evals
+name: Run evals
 
-on:
-  push:
-    # Uncomment to run only when files in the 'evals' directory change
-    # - paths:
-    #     - "evals/**"
+on: push
 
 permissions:
   pull-requests: write
   contents: read
 
 jobs:
   eval:
-    name: Run evals
     runs-on: ubuntu-latest
-
     steps:
-      - name: Checkout
-        id: checkout
-        uses: actions/checkout@v4
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - name: Setup Node.js
-        id: setup-node
-        uses: actions/setup-node@v4
+      - uses: actions/setup-node@v4
         with:
           node-version: 20
 
       - uses: pnpm/action-setup@v3
         with:
-          version: 8
+          version: 10
 
-      - name: Install Dependencies
-        id: install
+      - name: Install dependencies
         run: pnpm install
+        working-directory: my_eval_dir
+
+      - name: Run Evals
+        uses: braintrustdata/eval-action@v1
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          root: my_eval_dir
+```
+
+### Python (pip)
+
+```yaml
+name: Run evals
+
+on: push
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+        working-directory: my_eval_dir
+
+      - name: Run Evals
+        uses: braintrustdata/eval-action@v1
+        with:
+          api_key: ${{ secrets.BRAINTRUST_API_KEY }}
+          root: my_eval_dir
+          runner: python3
+```
+
+### Python (uv)
+
+```yaml
+name: Run evals
+
+on: push
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  eval:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - uses: astral-sh/setup-uv@v5
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: |
+          uv sync --no-dev
+          echo "VIRTUAL_ENV=$(pwd)/.venv" >> $GITHUB_ENV
+          echo "$(pwd)/.venv/bin" >> $GITHUB_PATH
+        working-directory: my_eval_dir
 
       - name: Run Evals
         uses: braintrustdata/eval-action@v1
         with:
           api_key: ${{ secrets.BRAINTRUST_API_KEY }}
-          runtime: node
           root: my_eval_dir
+          runner: python3
 ```
 
-> [!IMPORTANT] You must specify `permissions` for the action to leave comments
-> on your PR. Without these permissions, you'll see Github API errors.
+> [!IMPORTANT]
+> You must specify `permissions` for the action to leave comments on your PR.
+> Without these permissions, you'll see GitHub API errors.
 
-To see examples of fully configured templates, see the `examples` directory:
+For more fully configured templates, see the `examples` directory:
 
 - [`node with npm`](examples/node/npm.yml)
 - [`node with pnpm`](examples/node/pnpm.yml)
@@ -91,10 +162,11 @@ To see examples of fully configured templates, see the `examples` directory:
 
 ## How it works
 
-The action runs `braintrust eval` and collects experiment results, which are
-posted as a comment in the PR alongside a link to Braintrust. For example:
+The action installs the [`bt` CLI](https://github.com/braintrustdata/bt) and
+runs `bt eval`, collecting experiment results and posting them as a PR comment
+with a link to Braintrust. For example:
 
-### Example braintrust eval report
+### Example eval report
 
 **[Say Hi Bot (HEAD-1714341466)](https://www.braintrustdata.com/app/braintrustdata.com/p/Say%20Hi%20Bot/experiments/HEAD-1714341466)**
 

diff --git a/action.yml b/action.yml
@@ -21,12 +21,35 @@ inputs:
     required: false
     default: "."
   runtime:
-    description: "The runtime to use for evals. Valid values: node, python."
-    required: true
+    description:
+      "Deprecated. Use 'runner' instead. The runtime to use for evals. Valid
+      values: node, python."
+    required: false
+    default: ""
   package_manager:
     description:
-      "The package manager to use for evals. Valid values: npm, pnpm, yarn, pip,
-      or uv depending on the runtime."
+      "Deprecated. Use 'runner' instead. The package manager to use for evals."
+    required: false
+    default: ""
+  runner:
+    description:
+      "The eval runner binary to use (e.g. tsx, vite-node, ts-node, bun,
+      python3). When omitted, bt auto-detects from file extensions. Overrides
+      the deprecated 'runtime' and 'package_manager' inputs."
+    required: false
+    default: ""
+  filter:
+    description:
+      "Comma-separated filter expression(s) used to select which evaluators to
+      run."
+    required: false
+    default: ""
+  bt_version:
+    description:
+      "Version of the bt CLI to install. Leave empty for the latest stable
+      release. Pass a semver (e.g. '0.2.0') to pin a stable version. Pass a full
+      release tag (e.g. 'canary-add-glob-support') to install a canary build.
+      Note: bt eval is currently Unix (Linux/macOS) only."
     required: false
     default: ""
   use_proxy:
@@ -41,6 +64,51 @@ inputs:
       'true' or 'false'."
     required: false
     default: "false"
+  verbose:
+    description:
+      "Show verbose evaluator errors and stderr output. Set to 'true' or
+      'false'."
+    required: false
+    default: "false"
+  no_send_logs:
+    description:
+      "Run evals locally without sending logs to Braintrust. Set to 'true' or
+      'false'."
+    required: false
+    default: "false"
+  num_workers:
+    description:
+      "Number of worker threads for Python eval execution. Leave empty to use
+      the bt default."
+    required: false
+    default: ""
+  project:
+    description: "Override the active Braintrust project name."
+    required: false
+    default: ""
+  org:
+    description: "Override the active Braintrust organization name."
+    required: false
+    default: ""
+  api_url:
+    description:
+      "Override the Braintrust API URL (e.g. for self-hosted deployments)."
+    required: false
+    default: ""
+  app_url:
+    description:
+      "Override the Braintrust app URL (e.g. for self-hosted deployments)."
+    required: false
+    default: ""
+  no_color:
+    description: "Disable ANSI color output from bt. Set to 'true' or 'false'."
+    required: false
+    default: "false"
+  quiet:
+    description:
+      "Suppress non-essential output from bt. Set to 'true' or 'false'."
+    required: false
+    default: "false"
   github_token:
     description: "Your GitHub token"
     required: true

diff --git a/eval/.eslintrc.yml b/eval/.eslintrc.yml
@@ -11,7 +11,7 @@ ignorePatterns:
   - "!.*"
   - "**/node_modules/.*"
   - "**/dist/**"
-  - "**/coverage/.*"
+  - "**/coverage/**"
   - "*.json"
 
 parser: "@typescript-eslint/parser"

diff --git a/eval/dist/index.js b/eval/dist/index.js