From 320334b0ef0eb7bb85372fa362a003eb34d41e4e Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 11 Mar 2026 23:28:54 +0800 Subject: [PATCH 1/6] Added: Rails engine for remote eval server Exposes the same eval dev server endpoints (health, list, eval) as a mountable Rails engine so Rails apps can embed the server without a separate Rack process. - Extract Services::ListService and Services::EvalService from Rack handlers so business logic is shared between Rack and Rails adapters - Add Braintrust::Server::Rails::Engine (ActionController::API, ActionController::Live for SSE streaming, isolate_namespace) - Reuse existing auth strategies and CORS middleware unchanged - Add rails-server appraisal (actionpack ~> 8.0 + rack-test) - Add service unit tests and Rails engine/controller integration tests Usage: # config/initializers/braintrust_server.rb Braintrust::Server::Rails::Engine.configure do |config| config.evaluators = { "my-eval" => MyEval.new } config.auth = :clerk_token end # config/routes.rb mount Braintrust::Server::Rails::Engine, at: "/braintrust" Co-Authored-By: Claude Sonnet 4.6 --- Appraisals | 9 + examples/contrib/rails/eval.rb | 50 ++++ gemfiles/rails_server.gemfile | 14 + .../contrib/rails/application_controller.rb | 24 ++ lib/braintrust/contrib/rails/engine.rb | 71 +++++ .../contrib/rails/eval_controller.rb | 44 ++++ .../contrib/rails/health_controller.rb | 13 + .../contrib/rails/list_controller.rb | 14 + lib/braintrust/contrib/rails/routes.rb | 8 + lib/braintrust/server/handlers/eval.rb | 176 +------------ lib/braintrust/server/handlers/list.rb | 44 +--- lib/braintrust/server/rack.rb | 2 + lib/braintrust/server/rails.rb | 20 ++ .../server/services/eval_service.rb | 209 +++++++++++++++ .../server/services/list_service.rb | 59 +++++ test/braintrust/contrib/rails/engine_test.rb | 102 +++++++ .../contrib/rails/eval_controller_test.rb | 168 ++++++++++++ .../contrib/rails/health_controller_test.rb | 39 +++ .../contrib/rails/list_controller_test.rb | 99 +++++++ test/braintrust/server/handlers/eval_test.rb | 2 +- .../server/services/eval_service_test.rb | 249 ++++++++++++++++++ .../server/services/list_service_test.rb | 87 ++++++ test/support/rails_server_helper.rb | 61 +++++ test/test_helper.rb | 2 + 24 files changed, 1356 insertions(+), 210 deletions(-) create mode 100644 examples/contrib/rails/eval.rb create mode 100644 gemfiles/rails_server.gemfile create mode 100644 lib/braintrust/contrib/rails/application_controller.rb create mode 100644 lib/braintrust/contrib/rails/engine.rb create mode 100644 lib/braintrust/contrib/rails/eval_controller.rb create mode 100644 lib/braintrust/contrib/rails/health_controller.rb create mode 100644 lib/braintrust/contrib/rails/list_controller.rb create mode 100644 lib/braintrust/contrib/rails/routes.rb create mode 100644 lib/braintrust/server/rails.rb create mode 100644 lib/braintrust/server/services/eval_service.rb create mode 100644 lib/braintrust/server/services/list_service.rb create mode 100644 test/braintrust/contrib/rails/engine_test.rb create mode 100644 test/braintrust/contrib/rails/eval_controller_test.rb create mode 100644 test/braintrust/contrib/rails/health_controller_test.rb create mode 100644 test/braintrust/contrib/rails/list_controller_test.rb create mode 100644 test/braintrust/server/services/eval_service_test.rb create mode 100644 test/braintrust/server/services/list_service_test.rb create mode 100644 test/support/rails_server_helper.rb diff --git a/Appraisals b/Appraisals index f0232d34..272581d7 100644 --- a/Appraisals +++ b/Appraisals @@ -99,3 +99,12 @@ appraise "rails" do gem "activesupport", "~> 8.0" gem "railties", "~> 8.0" end + +# Rails engine testing for the eval server engine +appraise "rails-server" do + gem "actionpack", "~> 8.0" + gem "railties", "~> 8.0" + gem "activesupport", "~> 8.0" + gem "rack", "~> 3.0" + gem "rack-test", "~> 2.1" +end diff --git a/examples/contrib/rails/eval.rb b/examples/contrib/rails/eval.rb new file mode 100644 index 00000000..ca1f16bf --- /dev/null +++ b/examples/contrib/rails/eval.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +# Braintrust Rails Engine — mount example +# +# This file shows how to mount the Braintrust eval server as a Rails engine. +# The engine exposes the same endpoints as the standalone Rack server: +# GET /braintrust/ — health check +# GET /braintrust/list — list registered evaluators +# POST /braintrust/list — list registered evaluators +# POST /braintrust/eval — run an evaluation (SSE stream) +# +# Requirements: +# gem 'actionpack', '~> 8.0' +# gem 'railties', '~> 8.0' +# gem 'activesupport', '~> 8.0' + +# --------------------------------------------------------------------------- +# config/initializers/braintrust_server.rb +# --------------------------------------------------------------------------- + +require "braintrust/server/rails" + +Braintrust::Contrib::Rails::Engine.configure do |config| + # Register your evaluators by name. The Braintrust UI will discover them + # via GET /braintrust/list and let you run them via POST /braintrust/eval. + config.evaluators = { + "my-classifier" => Braintrust::Eval::Evaluator.new( + task: ->(input) { classify(input) }, + scorers: [ + Braintrust::Eval.scorer("accuracy") { |_input, expected, output| + (output == expected) ? 1.0 : 0.0 + } + ] + ) + } + + # Auth strategy: :clerk_token (default) validates Braintrust session tokens. + # Use :none for local development without authentication. + config.auth = :clerk_token +end + +# --------------------------------------------------------------------------- +# config/routes.rb +# --------------------------------------------------------------------------- + +# Rails.application.routes.draw do +# mount Braintrust::Contrib::Rails::Engine, at: "/braintrust" +# end + +puts "Braintrust Rails Engine example — see comments for usage" diff --git a/gemfiles/rails_server.gemfile b/gemfiles/rails_server.gemfile new file mode 100644 index 00000000..0ab0584a --- /dev/null +++ b/gemfiles/rails_server.gemfile @@ -0,0 +1,14 @@ +# This file was generated by Appraisal + +source "https://rubygems.org" + +gem "minitest-reporters", "~> 1.6" +gem "minitest-stub-const", "~> 0.6" +gem "climate_control", "~> 1.2" +gem "actionpack", "~> 8.0" +gem "railties", "~> 8.0" +gem "activesupport", "~> 8.0" +gem "rack", "~> 3.0" +gem "rack-test", "~> 2.1" + +gemspec path: "../" diff --git a/lib/braintrust/contrib/rails/application_controller.rb b/lib/braintrust/contrib/rails/application_controller.rb new file mode 100644 index 00000000..12ecef76 --- /dev/null +++ b/lib/braintrust/contrib/rails/application_controller.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class ApplicationController < ActionController::API + before_action :authenticate! + + private + + def authenticate! + auth_result = Engine.auth_strategy.authenticate(request.env) + unless auth_result + render json: {"error" => "Unauthorized"}, status: :unauthorized + return + end + + request.env["braintrust.auth"] = auth_result + @braintrust_auth = auth_result + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/engine.rb b/lib/braintrust/contrib/rails/engine.rb new file mode 100644 index 00000000..85759ae8 --- /dev/null +++ b/lib/braintrust/contrib/rails/engine.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class Engine < ::Rails::Engine + isolate_namespace Braintrust::Contrib::Rails + + config.evaluators = {} + config.auth = :clerk_token + + # Register the engine's routes file so Rails loads it during initialization. + paths["config/routes.rb"] << File.expand_path("routes.rb", __dir__) + + initializer "braintrust.server.cors" do |app| + app.middleware.use Braintrust::Server::Middleware::Cors + end + + # Class-level helpers that read from engine config. + + def self.evaluators + config.evaluators + end + + def self.auth_strategy + @auth_strategy ||= resolve_auth(config.auth) + end + + def self.list_service + @list_service ||= Server::Services::List.new(config.evaluators) + end + + # Long-lived so the state cache persists across requests. + def self.eval_service + @eval_service ||= Server::Services::Eval.new(config.evaluators) + end + + # Reset memoized services (useful in tests when config changes). + def self.reset_services! + @auth_strategy = nil + @list_service = nil + @eval_service = nil + end + + def self.configure + yield config + reset_services! + end + + def self.resolve_auth(auth) + case auth + when :none + Server::Auth::NoAuth.new + when :clerk_token + Server::Auth::ClerkToken.new + when Symbol, String + raise ArgumentError, "Unknown auth strategy #{auth.inspect}. Expected :none, :clerk_token, or an auth object." + else + auth + end + end + private_class_method :resolve_auth + end + end + end +end + +require_relative "application_controller" +require_relative "health_controller" +require_relative "list_controller" +require_relative "eval_controller" diff --git a/lib/braintrust/contrib/rails/eval_controller.rb b/lib/braintrust/contrib/rails/eval_controller.rb new file mode 100644 index 00000000..11e3e417 --- /dev/null +++ b/lib/braintrust/contrib/rails/eval_controller.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class EvalController < ApplicationController + include ActionController::Live + + def create + body = parse_body + unless body + render json: {"error" => "Invalid JSON body"}, status: :bad_request + return + end + + result = Engine.eval_service.validate(body) + if result[:error] + render json: {"error" => result[:error]}, status: result[:status] + return + end + + response.headers["Content-Type"] = "text/event-stream" + response.headers["Cache-Control"] = "no-cache" + response.headers["Connection"] = "keep-alive" + + sse = Server::SSEWriter.new { |chunk| response.stream.write(chunk) } + Engine.eval_service.stream(result, auth: @braintrust_auth, sse: sse) + ensure + response.stream.close + end + + private + + def parse_body + body = request.body.read + return nil if body.nil? || body.empty? + JSON.parse(body) + rescue JSON::ParserError + nil + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/health_controller.rb b/lib/braintrust/contrib/rails/health_controller.rb new file mode 100644 index 00000000..9c362f26 --- /dev/null +++ b/lib/braintrust/contrib/rails/health_controller.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class HealthController < ApplicationController + def show + render json: {"status" => "ok"} + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/list_controller.rb b/lib/braintrust/contrib/rails/list_controller.rb new file mode 100644 index 00000000..3e9737c4 --- /dev/null +++ b/lib/braintrust/contrib/rails/list_controller.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + class ListController < ApplicationController + def show + result = Engine.list_service.call + render json: result + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/routes.rb b/lib/braintrust/contrib/rails/routes.rb new file mode 100644 index 00000000..6db2650b --- /dev/null +++ b/lib/braintrust/contrib/rails/routes.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +Braintrust::Contrib::Rails::Engine.routes.draw do + get "/", to: "health#show" + get "/list", to: "list#show" + post "/list", to: "list#show" + post "/eval", to: "eval#create" +end diff --git a/lib/braintrust/server/handlers/eval.rb b/lib/braintrust/server/handlers/eval.rb index 4283afcf..ec866808 100644 --- a/lib/braintrust/server/handlers/eval.rb +++ b/lib/braintrust/server/handlers/eval.rb @@ -10,38 +10,15 @@ module Handlers class Eval def initialize(evaluators) @evaluators = evaluators + @service = Services::Eval.new(evaluators) end def call(env) body = parse_body(env) return error_response(400, "Invalid JSON body") unless body - name = body["name"] - return error_response(400, "Missing required field: name") unless name - - evaluator = @evaluators[name] - return error_response(404, "Evaluator '#{name}' not found") unless evaluator - - data = body["data"] - return error_response(400, "Missing required field: data") unless data - - # Validate exactly one data source - data_sources = ["data", "dataset_name", "dataset_id"].count { |k| data.key?(k) } - return error_response(400, "Exactly one data source required") if data_sources != 1 - - experiment_name = body["experiment_name"] - - # Resolve data source - cases, dataset = resolve_data_source(data) - - # Resolve remote scorers from request - remote_scorer_ids = resolve_remote_scorers(body["scores"]) - - # Resolve parent span context - parent = resolve_parent(body["parent"]) - - # Build state from auth context (if present) - state = build_state(env) + result = @service.validate(body) + return error_response(result[:status], result[:error]) if result[:error] # The protocol-rack adapter (used by Falcon and any server built on # protocol-http) buffers `each`-based bodies through an Enumerable path. @@ -50,64 +27,7 @@ def call(env) body_class = env.key?("protocol.http.request") ? SSEStreamBody : SSEBody sse_body = body_class.new do |sse| - # Only pass project/experiment params when state is available - run_opts = { - on_progress: ->(progress_data) { - # Build remote eval protocol events from generic progress data. - # Runner provides: id, data/error, scores (optional), origin (optional). - # Protocol requires: id, object_type, origin, name, format, output_type, event, data. - base = { - "object_type" => "task", - "name" => name, - "format" => "code", - "output_type" => "completion" - } - base["id"] = progress_data["id"] if progress_data["id"] - base["origin"] = progress_data["origin"] if progress_data["origin"] - - if progress_data.key?("error") - sse.event("progress", JSON.dump(base.merge("event" => "error", "data" => progress_data["error"]))) - else - sse.event("progress", JSON.dump(base.merge("event" => "json_delta", "data" => JSON.dump(progress_data["data"])))) - end - - # Signal per-cell completion so the UI exits "Streaming..." state - # and updates the progress bar immediately. - sse.event("progress", JSON.dump(base.merge("event" => "done", "data" => ""))) - }, - quiet: true - } - run_opts[:parent] = parent if parent - run_opts[:scorers] = remote_scorer_ids if remote_scorer_ids - run_opts[:dataset] = dataset if dataset - - if state - run_opts[:state] = state - run_opts[:experiment] = experiment_name if experiment_name - run_opts[:project_id] = body["project_id"] if body["project_id"] - end - - result = evaluator.run(cases, **run_opts) - - # Flush buffered OTLP spans before sending completion events. - # The BatchSpanProcessor exports every ~5s; fast evals can finish - # before a single export fires, causing the UI to see no results. - Braintrust::Trace.flush_spans - - # Build summary from result scores - averaged_scores = {} - result.scorer_stats.each do |scorer_name, stats| - averaged_scores[scorer_name] = stats.score_mean - end - - sse.event("summary", JSON.dump({ - "scores" => averaged_scores, - "experiment_name" => experiment_name, - "experiment_id" => result.experiment_id, - "project_id" => result.project_id - })) - - sse.event("done", "") + @service.stream(result, auth: env["braintrust.auth"], sse: sse) end [200, {"content-type" => "text/event-stream", "cache-control" => "no-cache", "connection" => "keep-alive"}, sse_body] @@ -115,90 +35,6 @@ def call(env) private - # Resolve data source from the data field. - # Returns [cases, dataset] where exactly one is non-nil. - def resolve_data_source(data) - if data.key?("data") - cases = data["data"].map do |d| - {input: d["input"], expected: d["expected"]} - end - [cases, nil] - elsif data.key?("dataset_id") - [nil, Braintrust::Dataset::ID.new(id: data["dataset_id"])] - elsif data.key?("dataset_name") - dataset_opts = {name: data["dataset_name"]} - dataset_opts[:project] = data["project_name"] if data["project_name"] - [nil, dataset_opts] - else - [nil, nil] - end - end - - # Map request scores array to Scorer::ID structs. - # The UI sends function_id as a nested object: {"function_id": "uuid"}. - def resolve_remote_scorers(scores) - return nil if scores.nil? || scores.empty? - scores.map do |s| - func_id = s["function_id"] - func_id = func_id["function_id"] if func_id.is_a?(Hash) - Braintrust::Scorer::ID.new( - function_id: func_id, - version: s["version"] - ) - end - end - - # Map request parent to symbol-keyed Hash. - # Hardcode playground_id to match Java SDK behavior. - # Also extracts generation from propagated_event for span_attributes. - def resolve_parent(parent) - return nil unless parent.is_a?(Hash) - object_id = parent["object_id"] - return nil unless object_id - - generation = parent.dig("propagated_event", "span_attributes", "generation") - - result = {object_type: "playground_id", object_id: object_id} - result[:generation] = generation if generation - result - end - - # Build State from auth context set by Auth middleware. - # Returns nil when no auth context is present (e.g. NoAuth strategy). - # Uses an LRU-style cache (max 64 entries) keyed by [api_key, app_url, org_name]. - def build_state(env) - auth = env["braintrust.auth"] - return nil unless auth.is_a?(Hash) - - cache_key = [auth["api_key"], auth["app_url"], auth["org_name"]] - - @state_mutex ||= Mutex.new - @state_cache ||= {} - - @state_mutex.synchronize do - cached = @state_cache[cache_key] - return cached if cached - - state = Braintrust::State.new( - api_key: auth["api_key"], - org_id: auth["org_id"], - org_name: auth["org_name"], - app_url: auth["app_url"], - api_url: auth["api_url"], - enable_tracing: false - ) - - # Evict oldest entry if cache is full - if @state_cache.size >= 64 - oldest_key = @state_cache.keys.first - @state_cache.delete(oldest_key) - end - - @state_cache[cache_key] = state - state - end - end - def parse_body(env) body = env["rack.input"]&.read return nil if body.nil? || body.empty? @@ -211,6 +47,10 @@ def error_response(status, message) [status, {"content-type" => "application/json"}, [JSON.dump({"error" => message})]] end + + def build_state(env) + @service.build_state(env["braintrust.auth"]) + end end end end diff --git a/lib/braintrust/server/handlers/list.rb b/lib/braintrust/server/handlers/list.rb index 0a908116..2e58d893 100644 --- a/lib/braintrust/server/handlers/list.rb +++ b/lib/braintrust/server/handlers/list.rb @@ -23,50 +23,12 @@ module Handlers class List def initialize(evaluators) @evaluators = evaluators + @service = Services::List.new(evaluators) end def call(_env) - result = {} - @evaluators.each do |name, evaluator| - scores = (evaluator.scorers || []).each_with_index.map do |scorer, i| - scorer_name = scorer.respond_to?(:name) ? scorer.name : "score_#{i}" - {"name" => scorer_name} - end - entry = {"scores" => scores} - params = serialize_parameters(evaluator.parameters) - entry["parameters"] = params if params - result[name] = entry - end - - [200, {"content-type" => "application/json"}, - [JSON.dump(result)]] - end - - private - - # Convert user-defined parameters to the dev server protocol format. - # Wraps in a staticParameters container with "data" typed entries. - def serialize_parameters(parameters) - return nil unless parameters && !parameters.empty? - - schema = {} - parameters.each do |name, spec| - spec = spec.transform_keys(&:to_s) if spec.is_a?(Hash) - if spec.is_a?(Hash) - schema[name.to_s] = { - "type" => "data", - "schema" => {"type" => spec["type"] || "string"}, - "default" => spec["default"], - "description" => spec["description"] - } - end - end - - { - "type" => "braintrust.staticParameters", - "schema" => schema, - "source" => nil - } + result = @service.call + [200, {"content-type" => "application/json"}, [JSON.dump(result)]] end end end diff --git a/lib/braintrust/server/rack.rb b/lib/braintrust/server/rack.rb index d397d75d..154d8443 100644 --- a/lib/braintrust/server/rack.rb +++ b/lib/braintrust/server/rack.rb @@ -15,6 +15,8 @@ require_relative "auth/clerk_token" require_relative "middleware/cors" require_relative "middleware/auth" +require_relative "services/list_service" +require_relative "services/eval_service" require_relative "handlers/health" require_relative "handlers/list" require_relative "handlers/eval" diff --git a/lib/braintrust/server/rails.rb b/lib/braintrust/server/rails.rb new file mode 100644 index 00000000..75ddbf77 --- /dev/null +++ b/lib/braintrust/server/rails.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +begin + require "action_controller" + require "rails/engine" +rescue LoadError + raise LoadError, + "Rails (actionpack + railties) is required for the Braintrust Rails server engine. " \ + "Add `gem 'rails'` or `gem 'actionpack'` and `gem 'railties'` to your Gemfile." +end + +require "json" +require_relative "../eval" +require_relative "sse" +require_relative "auth/no_auth" +require_relative "auth/clerk_token" +require_relative "middleware/cors" +require_relative "services/list_service" +require_relative "services/eval_service" +require_relative "../contrib/rails/engine" diff --git a/lib/braintrust/server/services/eval_service.rb b/lib/braintrust/server/services/eval_service.rb new file mode 100644 index 00000000..32b5e3bb --- /dev/null +++ b/lib/braintrust/server/services/eval_service.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +require "json" + +module Braintrust + module Server + module Services + # Framework-agnostic service for running evaluations and streaming SSE results. + # Must be long-lived (not per-request) to preserve the @state_cache across requests. + class Eval + def initialize(evaluators) + @evaluators = evaluators + @state_mutex = Mutex.new + @state_cache = {} + end + + # Validates request body. Returns: + # {error: String, status: Integer} on failure + # {evaluator:, name:, cases:, dataset:, ...} on success + def validate(body) + name = body["name"] + return {error: "Missing required field: name", status: 400} unless name + + evaluator = @evaluators[name] + return {error: "Evaluator '#{name}' not found", status: 404} unless evaluator + + data = body["data"] + return {error: "Missing required field: data", status: 400} unless data + + data_sources = ["data", "dataset_name", "dataset_id"].count { |k| data.key?(k) } + return {error: "Exactly one data source required", status: 400} if data_sources != 1 + + cases, dataset = resolve_data_source(data) + + { + evaluator: evaluator, + name: name, + cases: cases, + dataset: dataset, + experiment_name: body["experiment_name"], + remote_scorer_ids: resolve_remote_scorers(body["scores"]), + parent: resolve_parent(body["parent"]), + project_id: body["project_id"] + } + end + + # Runs the validated eval and streams SSE events via the sse writer. + # +validated+ is the hash returned by #validate. + # +auth+ is the auth context hash (or nil/true for no-auth). + # +sse+ is an SSEWriter instance. + def stream(validated, auth:, sse:) + name = validated[:name] + evaluator = validated[:evaluator] + cases = validated[:cases] + dataset = validated[:dataset] + experiment_name = validated[:experiment_name] + remote_scorer_ids = validated[:remote_scorer_ids] + parent = validated[:parent] + project_id = validated[:project_id] + + state = build_state(auth) + + # Only pass project/experiment params when state is available + run_opts = { + on_progress: ->(progress_data) { + # Build remote eval protocol events from generic progress data. + # Runner provides: id, data/error, scores (optional), origin (optional). + # Protocol requires: id, object_type, origin, name, format, output_type, event, data. + base = { + "object_type" => "task", + "name" => name, + "format" => "code", + "output_type" => "completion" + } + base["id"] = progress_data["id"] if progress_data["id"] + base["origin"] = progress_data["origin"] if progress_data["origin"] + + if progress_data.key?("error") + sse.event("progress", JSON.dump(base.merge("event" => "error", "data" => progress_data["error"]))) + else + sse.event("progress", JSON.dump(base.merge("event" => "json_delta", "data" => JSON.dump(progress_data["data"])))) + end + + # Signal per-cell completion so the UI exits "Streaming..." state + # and updates the progress bar immediately. + sse.event("progress", JSON.dump(base.merge("event" => "done", "data" => ""))) + }, + quiet: true + } + run_opts[:parent] = parent if parent + run_opts[:scorers] = remote_scorer_ids if remote_scorer_ids + run_opts[:dataset] = dataset if dataset + + if state + run_opts[:state] = state + run_opts[:experiment] = experiment_name if experiment_name + run_opts[:project_id] = project_id if project_id + end + + result = evaluator.run(cases, **run_opts) + + # Flush buffered OTLP spans before sending completion events. + # The BatchSpanProcessor exports every ~5s; fast evals can finish + # before a single export fires, causing the UI to see no results. + Braintrust::Trace.flush_spans + + # Build summary from result scores + averaged_scores = {} + result.scorer_stats.each do |scorer_name, stats| + averaged_scores[scorer_name] = stats.score_mean + end + + sse.event("summary", JSON.dump({ + "scores" => averaged_scores, + "experiment_name" => experiment_name, + "experiment_id" => result.experiment_id, + "project_id" => result.project_id + })) + + sse.event("done", "") + end + + # Build State from auth context hash. + # Returns nil when auth is not a Hash (e.g. NoAuth returns true). + # Uses an LRU-style cache (max 64 entries) keyed by [api_key, app_url, org_name]. + def build_state(auth) + return nil unless auth.is_a?(Hash) + + cache_key = [auth["api_key"], auth["app_url"], auth["org_name"]] + + @state_mutex ||= Mutex.new + @state_cache ||= {} + + @state_mutex.synchronize do + cached = @state_cache[cache_key] + return cached if cached + + state = Braintrust::State.new( + api_key: auth["api_key"], + org_id: auth["org_id"], + org_name: auth["org_name"], + app_url: auth["app_url"], + api_url: auth["api_url"], + enable_tracing: false + ) + + if @state_cache.size >= 64 + oldest_key = @state_cache.keys.first + @state_cache.delete(oldest_key) + end + + @state_cache[cache_key] = state + state + end + end + + private + + # Resolve data source from the data field. + # Returns [cases, dataset] where exactly one is non-nil. + def resolve_data_source(data) + if data.key?("data") + cases = data["data"].map do |d| + {input: d["input"], expected: d["expected"]} + end + [cases, nil] + elsif data.key?("dataset_id") + [nil, Braintrust::Dataset::ID.new(id: data["dataset_id"])] + elsif data.key?("dataset_name") + dataset_opts = {name: data["dataset_name"]} + dataset_opts[:project] = data["project_name"] if data["project_name"] + [nil, dataset_opts] + else + [nil, nil] + end + end + + # Map request scores array to Scorer::ID structs. + # The UI sends function_id as a nested object: {"function_id": "uuid"}. + def resolve_remote_scorers(scores) + return nil if scores.nil? || scores.empty? + scores.map do |s| + func_id = s["function_id"] + func_id = func_id["function_id"] if func_id.is_a?(Hash) + Braintrust::Scorer::ID.new( + function_id: func_id, + version: s["version"] + ) + end + end + + # Map request parent to symbol-keyed Hash. + # Hardcode playground_id to match Java SDK behavior. + # Also extracts generation from propagated_event for span_attributes. + def resolve_parent(parent) + return nil unless parent.is_a?(Hash) + object_id = parent["object_id"] + return nil unless object_id + + generation = parent.dig("propagated_event", "span_attributes", "generation") + + result = {object_type: "playground_id", object_id: object_id} + result[:generation] = generation if generation + result + end + end + end + end +end diff --git a/lib/braintrust/server/services/list_service.rb b/lib/braintrust/server/services/list_service.rb new file mode 100644 index 00000000..1fd302f2 --- /dev/null +++ b/lib/braintrust/server/services/list_service.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +require "json" + +module Braintrust + module Server + module Services + # Framework-agnostic service for listing evaluators. + # Returns a plain Hash (not a Rack triplet) suitable for JSON.dump. + class List + def initialize(evaluators) + @evaluators = evaluators + end + + def call + result = {} + @evaluators.each do |name, evaluator| + scores = (evaluator.scorers || []).each_with_index.map do |scorer, i| + scorer_name = scorer.respond_to?(:name) ? scorer.name : "score_#{i}" + {"name" => scorer_name} + end + entry = {"scores" => scores} + params = serialize_parameters(evaluator.parameters) + entry["parameters"] = params if params + result[name] = entry + end + result + end + + private + + # Convert user-defined parameters to the dev server protocol format. + # Wraps in a staticParameters container with "data" typed entries. + def serialize_parameters(parameters) + return nil unless parameters && !parameters.empty? + + schema = {} + parameters.each do |name, spec| + spec = spec.transform_keys(&:to_s) if spec.is_a?(Hash) + if spec.is_a?(Hash) + schema[name.to_s] = { + "type" => "data", + "schema" => {"type" => spec["type"] || "string"}, + "default" => spec["default"], + "description" => spec["description"] + } + end + end + + { + "type" => "braintrust.staticParameters", + "schema" => schema, + "source" => nil + } + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/engine_test.rb b/test/braintrust/contrib/rails/engine_test.rb new file mode 100644 index 00000000..57f1e73c --- /dev/null +++ b/test/braintrust/contrib/rails/engine_test.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +require "test_helper" + +module Braintrust + module Contrib + module Rails + class EngineTest < Minitest::Test + def setup + skip_unless_rails_server! + reset_engine! + end + + def test_evaluators_returns_config_value + evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + Engine.config.evaluators = {"my-eval" => evaluator} + assert_same evaluator, Engine.evaluators["my-eval"] + end + + def test_auth_strategy_returns_no_auth_for_none + Engine.config.auth = :none + Engine.reset_services! + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + end + + def test_auth_strategy_returns_clerk_token_by_default + Engine.config.auth = :clerk_token + Engine.reset_services! + assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy + end + + def test_auth_strategy_accepts_custom_object + custom = Braintrust::Server::Auth::NoAuth.new + Engine.config.auth = custom + Engine.reset_services! + assert_same custom, Engine.auth_strategy + end + + def test_auth_strategy_raises_for_unknown_symbol + Engine.config.auth = :jwt + Engine.reset_services! + assert_raises(ArgumentError) { Engine.auth_strategy } + end + + def test_eval_service_returns_eval_instance + assert_instance_of Braintrust::Server::Services::Eval, Engine.eval_service + end + + def test_list_service_returns_list_instance + assert_instance_of Braintrust::Server::Services::List, Engine.list_service + end + + def test_eval_service_is_memoized + svc1 = Engine.eval_service + svc2 = Engine.eval_service + assert_same svc1, svc2 + end + + def test_reset_services_clears_memoized_instances + svc1 = Engine.eval_service + Engine.reset_services! + svc2 = Engine.eval_service + refute_same svc1, svc2 + end + + def test_configure_yields_config_and_resets_services + svc_before = Engine.eval_service + evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + Engine.configure do |config| + config.evaluators = {"configured-eval" => evaluator} + config.auth = :none + end + + assert_same evaluator, Engine.evaluators["configured-eval"] + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + refute_same svc_before, Engine.eval_service + end + + def test_cors_middleware_is_in_middleware_stack + stack = BraintrustRailsTestApp.middleware + middleware_classes = stack.map { |m| + begin + m.klass + rescue + m + end + } + assert middleware_classes.any? { |klass| + klass == Braintrust::Server::Middleware::Cors + }, "CORS middleware should be in the stack" + end + + def test_engine_has_expected_routes + routes = Engine.routes.routes.map { |r| "#{r.verb} #{r.path.spec}" } + assert routes.any? { |r| r.include?("/list") }, "Should have /list route" + assert routes.any? { |r| r.include?("/eval") }, "Should have /eval route" + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/eval_controller_test.rb b/test/braintrust/contrib/rails/eval_controller_test.rb new file mode 100644 index 00000000..98e029f2 --- /dev/null +++ b/test/braintrust/contrib/rails/eval_controller_test.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" + +module Braintrust + module Contrib + module Rails + class EvalControllerTest < Minitest::Test + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + @evaluators = {} + @rig = setup_otel_test_rig + reset_engine!(evaluators: @evaluators, auth: :none) + end + + def app + rails_engine_app + end + + def test_streams_sse_events_for_inline_data + @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "upcase-eval", + data: { + data: [ + {input: "hello", expected: "HELLO"}, + {input: "world", expected: "WORLD"} + ] + }, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + assert_equal 200, last_response.status + assert_match "text/event-stream", last_response.content_type + + events = parse_sse_events(last_response.body) + progress_events = events.select { |e| e[:event] == "progress" } + assert_equal 4, progress_events.length # 2 per case + + summary_events = events.select { |e| e[:event] == "summary" } + assert_equal 1, summary_events.length + + assert_equal "done", events.last[:event] + end + + def test_progress_events_contain_output + @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "upcase-eval", + data: {data: [{input: "hello", expected: "HELLO"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + progress = events.find { |e| e[:event] == "progress" } + data = JSON.parse(progress[:data]) + + assert_equal "HELLO", JSON.parse(data["data"]) + end + + def test_summary_event_contains_scores + scorer = Braintrust::Eval.scorer("exact") { |_i, e, o| (o == e) ? 1.0 : 0.0 } + @evaluators["scored-eval"] = test_evaluator( + task: ->(input) { input.to_s.upcase }, + scorers: [scorer] + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "scored-eval", + data: {data: [{input: "hello", expected: "HELLO"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + summary = events.find { |e| e[:event] == "summary" } + data = JSON.parse(summary[:data]) + + assert data.key?("scores") + assert data.key?("experiment_name") + end + + def test_error_still_emits_progress_and_done + @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "task exploded" }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "failing-eval", + data: {data: [{input: "hello"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + assert events.any? { |e| e[:event] == "progress" || e[:event] == "error" } + assert_equal "done", events.last[:event] + end + + def test_404_for_unknown_evaluator + post_json "/eval", { + name: "nonexistent", + data: {data: [{input: "hello"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + assert_equal 404, last_response.status + body = JSON.parse(last_response.body) + assert_match(/not found/i, body["error"]) + end + + def test_400_for_missing_name + post_json "/eval", { + data: {data: [{input: "hello"}]} + } + + assert_equal 400, last_response.status + end + + def test_400_for_missing_data + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", {name: "test-eval"} + + assert_equal 400, last_response.status + end + + def test_400_for_invalid_json_body + post "/eval", "not-json", {"CONTENT_TYPE" => "application/json"} + + assert_equal 400, last_response.status + end + + def test_returns_401_when_auth_fails + reset_engine!(evaluators: @evaluators, auth: :clerk_token) + + post_json "/eval", { + name: "test-eval", + data: {data: [{input: "hello"}]} + } + + assert_equal 401, last_response.status + end + + private + + def test_evaluator(**kwargs) + Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs) + end + + def post_json(path, body) + post path, JSON.generate(body), {"CONTENT_TYPE" => "application/json"} + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/health_controller_test.rb b/test/braintrust/contrib/rails/health_controller_test.rb new file mode 100644 index 00000000..e5ec7b27 --- /dev/null +++ b/test/braintrust/contrib/rails/health_controller_test.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" + +module Braintrust + module Contrib + module Rails + class HealthControllerTest < Minitest::Test + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + reset_engine!(auth: :none) + end + + def app + rails_engine_app + end + + def test_get_root_returns_200 + get "/" + assert_equal 200, last_response.status + end + + def test_get_root_returns_json_content_type + get "/" + assert_match "application/json", last_response.content_type + end + + def test_get_root_returns_status_ok + get "/" + body = JSON.parse(last_response.body) + assert_equal "ok", body["status"] + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/list_controller_test.rb b/test/braintrust/contrib/rails/list_controller_test.rb new file mode 100644 index 00000000..fc0f56a0 --- /dev/null +++ b/test/braintrust/contrib/rails/list_controller_test.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" + +module Braintrust + module Contrib + module Rails + class ListControllerTest < Minitest::Test + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + @evaluators = {} + reset_engine!(evaluators: @evaluators, auth: :none) + end + + def app + rails_engine_app + end + + def test_get_list_returns_200 + get "/list" + assert_equal 200, last_response.status + end + + def test_post_list_returns_200 + post "/list" + assert_equal 200, last_response.status + end + + def test_returns_empty_hash_when_no_evaluators + get "/list" + body = JSON.parse(last_response.body) + assert_equal({}, body) + end + + def test_returns_evaluators_keyed_by_name + @evaluators["food-classifier"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + @evaluators["text-summarizer"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + assert body.key?("food-classifier") + assert body.key?("text-summarizer") + end + + def test_includes_scorer_names + @evaluators["scored-eval"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + scorers: [ + Braintrust::Eval.scorer("exact_match") { |_i, e, o| (o == e) ? 1.0 : 0.0 }, + Braintrust::Eval.scorer("length_check") { |_i, _e, _o| 1.0 } + ] + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + score_names = body["scored-eval"]["scores"].map { |s| s["name"] } + assert_equal ["exact_match", "length_check"], score_names + end + + def test_includes_parameters_in_static_container + @evaluators["param-eval"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + parameters: {"temperature" => {type: "number", default: 0.7, description: "LLM temperature"}} + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + params = body["param-eval"]["parameters"] + assert_equal "braintrust.staticParameters", params["type"] + assert_equal 0.7, params["schema"]["temperature"]["default"] + end + + def test_omits_parameters_when_none_defined + @evaluators["no-params"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + refute body["no-params"].key?("parameters") + end + + def test_returns_401_when_auth_fails + # Use clerk_token auth — no auth header means failure + reset_engine!(evaluators: @evaluators, auth: :clerk_token) + + # WebMock blocks real HTTP, so clerk token validation will fail + get "/list" + assert_equal 401, last_response.status + end + end + end + end +end diff --git a/test/braintrust/server/handlers/eval_test.rb b/test/braintrust/server/handlers/eval_test.rb index 45473ea3..dce8a868 100644 --- a/test/braintrust/server/handlers/eval_test.rb +++ b/test/braintrust/server/handlers/eval_test.rb @@ -406,7 +406,7 @@ def test_build_state_evicts_oldest_when_cache_full end # First entry (key-0) should have been evicted - cache = h.instance_variable_get(:@state_cache) + cache = h.instance_variable_get(:@service).instance_variable_get(:@state_cache) assert_equal 64, cache.size, "Cache should not exceed 64 entries" refute cache.key?(["key-0", "https://app.example.com", "org-0"]), diff --git a/test/braintrust/server/services/eval_service_test.rb b/test/braintrust/server/services/eval_service_test.rb new file mode 100644 index 00000000..38235b35 --- /dev/null +++ b/test/braintrust/server/services/eval_service_test.rb @@ -0,0 +1,249 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" + +# Unit tests for Services::Eval — runs without any framework (no appraisal needed). +module Braintrust + module Server + module Services + class EvalTest < Minitest::Test + def setup + skip_unless_server! + @evaluators = {} + @rig = setup_otel_test_rig + end + + def service + Eval.new(@evaluators) + end + + # --- validate --- + + def test_validate_returns_error_for_missing_name + result = service.validate({}) + assert_equal 400, result[:status] + assert_match(/name/, result[:error]) + end + + def test_validate_returns_error_for_unknown_evaluator + result = service.validate({"name" => "nonexistent", "data" => {"data" => []}}) + assert_equal 404, result[:status] + assert_match(/not found/i, result[:error]) + end + + def test_validate_returns_error_for_missing_data + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + result = service.validate({"name" => "test-eval"}) + assert_equal 400, result[:status] + assert_match(/data/, result[:error]) + end + + def test_validate_returns_error_for_multiple_data_sources + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + result = service.validate({ + "name" => "test-eval", + "data" => {"data" => [{"input" => "x"}], "dataset_name" => "ds"} + }) + assert_equal 400, result[:status] + end + + def test_validate_returns_valid_hash_on_success + @evaluators["my-eval"] = test_evaluator(task: ->(input) { input }) + result = service.validate({ + "name" => "my-eval", + "data" => {"data" => [{"input" => "hello", "expected" => "hello"}]}, + "experiment_name" => "exp-1", + "project_id" => "proj-1" + }) + + refute result.key?(:error) + assert_equal "my-eval", result[:name] + assert_equal @evaluators["my-eval"], result[:evaluator] + assert_equal [{input: "hello", expected: "hello"}], result[:cases] + assert_equal "exp-1", result[:experiment_name] + assert_equal "proj-1", result[:project_id] + end + + def test_validate_accepts_dataset_id + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + result = service.validate({ + "name" => "test-eval", + "data" => {"dataset_id" => "ds-123"} + }) + + refute result.key?(:error) + assert_nil result[:cases] + assert_instance_of Braintrust::Dataset::ID, result[:dataset] + end + + def test_validate_accepts_dataset_name + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + result = service.validate({ + "name" => "test-eval", + "data" => {"dataset_name" => "my-dataset", "project_name" => "my-project"} + }) + + refute result.key?(:error) + assert_nil result[:cases] + assert_equal({name: "my-dataset", project: "my-project"}, result[:dataset]) + end + + # --- stream --- + + def test_stream_emits_progress_and_done_events + @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) + s = service + validated = s.validate({ + "name" => "upcase-eval", + "data" => {"data" => [{"input" => "hello"}, {"input" => "world"}]}, + "experiment_name" => "exp" + }) + + events = collect_streamed_events(s, validated) + + progress = events.select { |e| e[:event] == "progress" } + assert_equal 4, progress.length # 2 per case: json_delta + done + assert_equal "done", events.last[:event] + end + + def test_stream_emits_summary_with_scores + scorer = Braintrust::Eval.scorer("exact") { |_i, e, o| (o == e) ? 1.0 : 0.0 } + @evaluators["scored-eval"] = test_evaluator( + task: ->(input) { input.to_s.upcase }, + scorers: [scorer] + ) + s = service + validated = s.validate({ + "name" => "scored-eval", + "data" => {"data" => [{"input" => "hello", "expected" => "HELLO"}]}, + "experiment_name" => "my-exp" + }) + + events = collect_streamed_events(s, validated) + summary = events.find { |e| e[:event] == "summary" } + data = JSON.parse(summary[:data]) + + assert data.key?("scores") + assert_equal "my-exp", data["experiment_name"] + end + + def test_stream_emits_error_progress_on_task_failure + @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "boom" }) + s = service + validated = s.validate({ + "name" => "failing-eval", + "data" => {"data" => [{"input" => "x"}]}, + "experiment_name" => "exp" + }) + + events = collect_streamed_events(s, validated) + progress = events.find { |e| e[:event] == "progress" } + data = JSON.parse(progress[:data]) + + assert_equal "error", data["event"] + assert_match(/boom/, data["data"]) + assert_equal "done", events.last[:event] + end + + def test_stream_does_not_pass_state_when_auth_is_not_hash + received_opts = nil + spy = test_evaluator( + task: ->(input) { input }, + scorers: [Braintrust::Eval.scorer("s") { |_i, _e, _o| 1.0 }] + ) + spy.define_singleton_method(:run) do |cases, **opts| + received_opts = opts + Braintrust::Eval::Result.new( + experiment_id: nil, experiment_name: nil, + project_id: nil, project_name: nil, + permalink: nil, scores: {}, errors: [], duration: 0.01 + ) + end + + @evaluators["spy-eval"] = spy + s = service + validated = s.validate({ + "name" => "spy-eval", + "data" => {"data" => [{"input" => "x"}]}, + "experiment_name" => "exp" + }) + + collect_streamed_events(s, validated, auth: true) # NoAuth returns true + + assert_nil received_opts[:state] + end + + # --- build_state --- + + def test_build_state_returns_nil_for_non_hash_auth + assert_nil service.build_state(nil) + assert_nil service.build_state(true) + assert_nil service.build_state("string") + end + + def test_build_state_caches_by_auth_key + s = service + auth = { + "api_key" => "key-1", + "org_id" => "org-1", + "org_name" => "org", + "app_url" => "https://app.example.com", + "api_url" => "https://api.example.com" + } + + state1 = s.build_state(auth) + state2 = s.build_state(auth) + + assert_same state1, state2 + end + + def test_build_state_returns_different_state_for_different_keys + s = service + auth_a = {"api_key" => "key-a", "org_id" => "org-a", "org_name" => "org-a", + "app_url" => "https://a.example.com", "api_url" => "https://a.example.com"} + auth_b = {"api_key" => "key-b", "org_id" => "org-b", "org_name" => "org-b", + "app_url" => "https://b.example.com", "api_url" => "https://b.example.com"} + + state_a = s.build_state(auth_a) + state_b = s.build_state(auth_b) + + refute_same state_a, state_b + end + + def test_build_state_evicts_oldest_when_cache_full + s = service + + 65.times do |i| + auth = { + "api_key" => "key-#{i}", + "org_id" => "org-#{i}", + "org_name" => "org-#{i}", + "app_url" => "https://app.example.com", + "api_url" => "https://api.example.com" + } + s.build_state(auth) + end + + cache = s.instance_variable_get(:@state_cache) + assert_equal 64, cache.size + refute cache.key?(["key-0", "https://app.example.com", "org-0"]), + "Oldest entry should have been evicted" + end + + private + + def test_evaluator(**kwargs) + Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs) + end + + def collect_streamed_events(svc, validated, auth: nil) + chunks = [] + sse = Braintrust::Server::SSEWriter.new { |chunk| chunks << chunk } + svc.stream(validated, auth: auth, sse: sse) + parse_sse_events(chunks.join) + end + end + end + end +end diff --git a/test/braintrust/server/services/list_service_test.rb b/test/braintrust/server/services/list_service_test.rb new file mode 100644 index 00000000..ad6de82a --- /dev/null +++ b/test/braintrust/server/services/list_service_test.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true + +require "test_helper" +require "json" + +# Unit tests for Services::List — runs without any framework (no appraisal needed). +module Braintrust + module Server + module Services + class ListTest < Minitest::Test + def setup + skip_unless_server! + @evaluators = {} + end + + def service + List.new(@evaluators) + end + + def test_returns_empty_hash_when_no_evaluators + result = service.call + assert_equal({}, result) + end + + def test_returns_evaluators_keyed_by_name + @evaluators["eval-a"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + @evaluators["eval-b"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + result = service.call + assert result.key?("eval-a") + assert result.key?("eval-b") + end + + def test_includes_scorer_names + @evaluators["scored"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + scorers: [ + Braintrust::Eval.scorer("accuracy") { |_i, _e, _o| 1.0 }, + Braintrust::Eval.scorer("relevance") { |_i, _e, _o| 0.5 } + ] + ) + + result = service.call + score_names = result["scored"]["scores"].map { |s| s["name"] } + assert_equal ["accuracy", "relevance"], score_names + end + + def test_empty_scores_when_no_scorers + @evaluators["no-scores"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + result = service.call + assert_equal [], result["no-scores"]["scores"] + end + + def test_includes_parameters_in_static_container + @evaluators["param-eval"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + parameters: {"temperature" => {type: "number", default: 0.7, description: "LLM temperature"}} + ) + + result = service.call + params = result["param-eval"]["parameters"] + assert_equal "braintrust.staticParameters", params["type"] + assert_nil params["source"] + assert_equal 0.7, params["schema"]["temperature"]["default"] + assert_equal "number", params["schema"]["temperature"]["schema"]["type"] + end + + def test_omits_parameters_when_none_defined + @evaluators["no-params"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + result = service.call + refute result["no-params"].key?("parameters") + end + + def test_result_is_json_serializable + @evaluators["my-eval"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + result = service.call + json = JSON.dump(result) + parsed = JSON.parse(json) + assert parsed.key?("my-eval") + end + end + end + end +end diff --git a/test/support/rails_server_helper.rb b/test/support/rails_server_helper.rb new file mode 100644 index 00000000..a567361a --- /dev/null +++ b/test/support/rails_server_helper.rb @@ -0,0 +1,61 @@ +# Try to load Rails engine dependencies. +RAILS_SERVER_AVAILABLE = begin + require "rack/test" + require "action_controller" + require "action_dispatch" + require "rails" + require "braintrust/server/rails" + true +rescue LoadError + false +end + +if RAILS_SERVER_AVAILABLE + # Create a minimal Rails application for engine integration tests. + # Guard against being required multiple times. + unless defined?(BraintrustRailsTestApp) + class BraintrustRailsTestApp < Rails::Application + config.eager_load = false + config.secret_key_base = "braintrust-rails-test-secret-key-abc123456789" + config.logger = ::Logger.new(nil) + config.log_level = :fatal + + # Allow any host in tests (Rack::Test uses "example.org" by default) + config.hosts.clear + + routes.draw do + mount Braintrust::Contrib::Rails::Engine, at: "/" + end + + initialize! + end + end +end + +module Test + module Support + module RailsServerHelper + def skip_unless_rails_server! + skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" unless RAILS_SERVER_AVAILABLE + end + + # The engine itself as a Rack app — use for controller integration tests. + # Faster and more direct than routing through a full Rails application. + def rails_engine_app + Braintrust::Contrib::Rails::Engine + end + + # The full test Rails application (mounts the engine at /). + # Use only when you need to verify middleware stack or mounted routing. + def rails_app + BraintrustRailsTestApp + end + + def reset_engine!(evaluators: {}, auth: :none) + Braintrust::Contrib::Rails::Engine.config.evaluators = evaluators + Braintrust::Contrib::Rails::Engine.config.auth = auth + Braintrust::Contrib::Rails::Engine.reset_services! + end + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 287e465d..189225bb 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -77,6 +77,7 @@ require_relative "support/provider_helper" require_relative "support/eval_helper" require_relative "support/server_helper" +require_relative "support/rails_server_helper" require_relative "support/tracing_helper" # Include helper in all test cases @@ -89,6 +90,7 @@ class Minitest::Test include ::Test::Support::MockHelper include ::Test::Support::ProviderHelper include ::Test::Support::ServerHelper + include ::Test::Support::RailsServerHelper include ::Test::Support::TracingHelper # Use Minitest hooks to clear global state after every test From f4482255a422f89f8fd3ba09a022e314abd24285 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 18 Mar 2026 20:51:13 +0800 Subject: [PATCH 2/6] Address Rails engine review feedback --- README.md | 46 +++++++++++++++- examples/README.md | 1 + .../contrib/rails/application_controller.rb | 8 +++ lib/braintrust/contrib/rails/engine.rb | 21 +++----- .../contrib/rails/eval_controller.rb | 12 +---- .../server/services/eval_service.rb | 7 ++- .../server/services/list_service.rb | 7 ++- test/braintrust/contrib/rails/engine_test.rb | 54 ++++++++++++++----- test/support/rails_server_helper.rb | 8 +-- 9 files changed, 120 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index f41cd421..551e19b8 100644 --- a/README.md +++ b/README.md @@ -392,7 +392,11 @@ See [trace_scoring.rb](./examples/eval/trace_scoring.rb) for a full example. ### Dev Server -Run evaluations from the Braintrust web UI against code in your own application. Define evaluators, pass them to the dev server, and start serving: +Run evaluations from the Braintrust web UI against code in your own application. + +**Run as a Rack app** + +Define evaluators, pass them to the dev server, and start serving: ```ruby # eval_server.ru @@ -422,6 +426,44 @@ run Braintrust::Server::Rack.app( bundle exec rackup eval_server.ru -p 8300 -o 0.0.0.0 ``` +**Run as a Rails engine** + +Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust endpoints into that application. + +```ruby +# config/initializers/braintrust_server.rb +require "braintrust/server/rails" + +Braintrust::Contrib::Rails::Engine.configure do |config| + config.evaluators = { + "food-classifier" => Braintrust::Eval::Evaluator.new( + task: ->(input:) { FoodClassifier.classify(input) }, + scorers: [ + Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 } + ] + ) + } + + # Default is :clerk_token. Use :none for local development. + config.auth = :none +end +``` + +```ruby +# config/routes.rb +Rails.application.routes.draw do + mount Braintrust::Contrib::Rails::Engine, at: "/braintrust" +end +``` + +Mounted at `/braintrust`, the engine exposes: + +- `GET /braintrust/` for the health check +- `GET /braintrust/list` and `POST /braintrust/list` to enumerate evaluators +- `POST /braintrust/eval` to run an evaluation and stream SSE results + +See example: [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb) + **Custom evaluators** Evaluators can also be defined as subclasses: @@ -456,7 +498,7 @@ gem "rack" gem "puma" # recommended ``` -See example: [server/eval.ru](./examples/server/eval.ru) +See examples: [server/eval.ru](./examples/server/eval.ru), [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb) ## Documentation diff --git a/examples/README.md b/examples/README.md index 87d5d04d..0ba0f3a7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -33,6 +33,7 @@ BRAINTRUST_DEBUG=true ruby examples/login/login_basic.rb ### Dev Server Examples - **`server/eval.ru`**: Set up a dev server for remote evals — define evaluators (subclass or inline) and serve them via a Rack app. Start with: `bundle exec appraisal server rackup examples/server/eval.ru -p 8300 -o 0.0.0.0` +- **`contrib/rails/eval.rb`**: Mount the dev server as a Rails engine and configure evaluators via `Braintrust::Contrib::Rails::Engine.configure` ## Coming Soon diff --git a/lib/braintrust/contrib/rails/application_controller.rb b/lib/braintrust/contrib/rails/application_controller.rb index 12ecef76..09d2cf2c 100644 --- a/lib/braintrust/contrib/rails/application_controller.rb +++ b/lib/braintrust/contrib/rails/application_controller.rb @@ -18,6 +18,14 @@ def authenticate! request.env["braintrust.auth"] = auth_result @braintrust_auth = auth_result end + + def parse_json_body + body = request.body.read + return nil if body.nil? || body.empty? + JSON.parse(body) + rescue JSON::ParserError + nil + end end end end diff --git a/lib/braintrust/contrib/rails/engine.rb b/lib/braintrust/contrib/rails/engine.rb index 85759ae8..de9ef14b 100644 --- a/lib/braintrust/contrib/rails/engine.rb +++ b/lib/braintrust/contrib/rails/engine.rb @@ -23,28 +23,23 @@ def self.evaluators end def self.auth_strategy - @auth_strategy ||= resolve_auth(config.auth) + resolve_auth(config.auth) end def self.list_service - @list_service ||= Server::Services::List.new(config.evaluators) + Server::Services::List.new(-> { config.evaluators }) end # Long-lived so the state cache persists across requests. def self.eval_service - @eval_service ||= Server::Services::Eval.new(config.evaluators) + @eval_service ||= Server::Services::Eval.new(-> { config.evaluators }) end - # Reset memoized services (useful in tests when config changes). - def self.reset_services! - @auth_strategy = nil - @list_service = nil - @eval_service = nil - end - - def self.configure - yield config - reset_services! + # Support the explicit `|config|` style used by this integration while + # still delegating zero-arity DSL blocks to Rails' native implementation. + def self.configure(&block) + return super(&block) if block&.arity == 0 + yield config if block end def self.resolve_auth(auth) diff --git a/lib/braintrust/contrib/rails/eval_controller.rb b/lib/braintrust/contrib/rails/eval_controller.rb index 11e3e417..0d04b479 100644 --- a/lib/braintrust/contrib/rails/eval_controller.rb +++ b/lib/braintrust/contrib/rails/eval_controller.rb @@ -7,7 +7,7 @@ class EvalController < ApplicationController include ActionController::Live def create - body = parse_body + body = parse_json_body unless body render json: {"error" => "Invalid JSON body"}, status: :bad_request return @@ -28,16 +28,6 @@ def create ensure response.stream.close end - - private - - def parse_body - body = request.body.read - return nil if body.nil? || body.empty? - JSON.parse(body) - rescue JSON::ParserError - nil - end end end end diff --git a/lib/braintrust/server/services/eval_service.rb b/lib/braintrust/server/services/eval_service.rb index 32b5e3bb..406d7c80 100644 --- a/lib/braintrust/server/services/eval_service.rb +++ b/lib/braintrust/server/services/eval_service.rb @@ -21,7 +21,7 @@ def validate(body) name = body["name"] return {error: "Missing required field: name", status: 400} unless name - evaluator = @evaluators[name] + evaluator = current_evaluators[name] return {error: "Evaluator '#{name}' not found", status: 404} unless evaluator data = body["data"] @@ -156,6 +156,11 @@ def build_state(auth) private + def current_evaluators + return @evaluators.call if @evaluators.respond_to?(:call) + @evaluators + end + # Resolve data source from the data field. # Returns [cases, dataset] where exactly one is non-nil. def resolve_data_source(data) diff --git a/lib/braintrust/server/services/list_service.rb b/lib/braintrust/server/services/list_service.rb index 1fd302f2..06bd7add 100644 --- a/lib/braintrust/server/services/list_service.rb +++ b/lib/braintrust/server/services/list_service.rb @@ -14,7 +14,7 @@ def initialize(evaluators) def call result = {} - @evaluators.each do |name, evaluator| + current_evaluators.each do |name, evaluator| scores = (evaluator.scorers || []).each_with_index.map do |scorer, i| scorer_name = scorer.respond_to?(:name) ? scorer.name : "score_#{i}" {"name" => scorer_name} @@ -29,6 +29,11 @@ def call private + def current_evaluators + return @evaluators.call if @evaluators.respond_to?(:call) + @evaluators + end + # Convert user-defined parameters to the dev server protocol format. # Wraps in a staticParameters container with "data" typed entries. def serialize_parameters(parameters) diff --git a/test/braintrust/contrib/rails/engine_test.rb b/test/braintrust/contrib/rails/engine_test.rb index 57f1e73c..830b0b4d 100644 --- a/test/braintrust/contrib/rails/engine_test.rb +++ b/test/braintrust/contrib/rails/engine_test.rb @@ -19,29 +19,62 @@ def test_evaluators_returns_config_value def test_auth_strategy_returns_no_auth_for_none Engine.config.auth = :none - Engine.reset_services! assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy end def test_auth_strategy_returns_clerk_token_by_default Engine.config.auth = :clerk_token - Engine.reset_services! assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy end def test_auth_strategy_accepts_custom_object custom = Braintrust::Server::Auth::NoAuth.new Engine.config.auth = custom - Engine.reset_services! assert_same custom, Engine.auth_strategy end def test_auth_strategy_raises_for_unknown_symbol Engine.config.auth = :jwt - Engine.reset_services! assert_raises(ArgumentError) { Engine.auth_strategy } end + def test_auth_strategy_raises_for_unknown_string + Engine.config.auth = "jwt" + assert_raises(ArgumentError) { Engine.auth_strategy } + end + + def test_auth_strategy_reflects_config_changes_without_manual_reset + Engine.config.auth = :none + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + + Engine.config.auth = :clerk_token + assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy + end + + def test_list_service_uses_latest_evaluators_without_manual_reset + first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + Engine.config.evaluators = {"first" => first} + assert_equal ["first"], Engine.list_service.call.keys + + Engine.config.evaluators = {"second" => second} + assert_equal ["second"], Engine.list_service.call.keys + end + + def test_eval_service_uses_latest_evaluators_without_manual_reset + first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + payload = {"data" => {"data" => [{"input" => "hello"}]}} + + Engine.config.evaluators = {"first" => first} + service = Engine.eval_service + assert_same first, service.validate(payload.merge("name" => "first"))[:evaluator] + + Engine.config.evaluators = {"second" => second} + assert_same second, service.validate(payload.merge("name" => "second"))[:evaluator] + end + def test_eval_service_returns_eval_instance assert_instance_of Braintrust::Server::Services::Eval, Engine.eval_service end @@ -56,16 +89,10 @@ def test_eval_service_is_memoized assert_same svc1, svc2 end - def test_reset_services_clears_memoized_instances - svc1 = Engine.eval_service - Engine.reset_services! - svc2 = Engine.eval_service - refute_same svc1, svc2 - end - - def test_configure_yields_config_and_resets_services + def test_configure_yields_config_without_resetting_eval_service svc_before = Engine.eval_service evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + payload = {"name" => "configured-eval", "data" => {"data" => [{"input" => "hello"}]}} Engine.configure do |config| config.evaluators = {"configured-eval" => evaluator} @@ -74,7 +101,8 @@ def test_configure_yields_config_and_resets_services assert_same evaluator, Engine.evaluators["configured-eval"] assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy - refute_same svc_before, Engine.eval_service + assert_same svc_before, Engine.eval_service + assert_same evaluator, Engine.eval_service.validate(payload)[:evaluator] end def test_cors_middleware_is_in_middleware_stack diff --git a/test/support/rails_server_helper.rb b/test/support/rails_server_helper.rb index a567361a..878f3783 100644 --- a/test/support/rails_server_helper.rb +++ b/test/support/rails_server_helper.rb @@ -52,9 +52,11 @@ def rails_app end def reset_engine!(evaluators: {}, auth: :none) - Braintrust::Contrib::Rails::Engine.config.evaluators = evaluators - Braintrust::Contrib::Rails::Engine.config.auth = auth - Braintrust::Contrib::Rails::Engine.reset_services! + engine = Braintrust::Contrib::Rails::Engine + engine.config.evaluators = evaluators + engine.config.auth = auth + # Clear the long-lived eval service so cached state does not leak across tests. + engine.instance_variable_set(:@eval_service, nil) end end end From 98c49e9e6749b11f753c3d981e5cdf65c0d55980 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 18 Mar 2026 20:56:16 +0800 Subject: [PATCH 3/6] Fix Rails engine lint --- lib/braintrust/contrib/rails/engine.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/braintrust/contrib/rails/engine.rb b/lib/braintrust/contrib/rails/engine.rb index de9ef14b..ce4bf2d7 100644 --- a/lib/braintrust/contrib/rails/engine.rb +++ b/lib/braintrust/contrib/rails/engine.rb @@ -38,7 +38,7 @@ def self.eval_service # Support the explicit `|config|` style used by this integration while # still delegating zero-arity DSL blocks to Rails' native implementation. def self.configure(&block) - return super(&block) if block&.arity == 0 + return super if block&.arity == 0 yield config if block end From 2a1e0e288594694d244f254f0974d5e6787eef4c Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Thu, 19 Mar 2026 19:55:41 +0800 Subject: [PATCH 4/6] Add Rails eval server generator --- README.md | 42 +++++++------ examples/README.md | 2 +- examples/contrib/rails/eval.rb | 59 +++++++++++-------- .../eval_server/eval_server_generator.rb | 36 +++++++++++ .../templates/braintrust_server.rb.tt | 19 ++++++ .../rails/eval_server_generator_test.rb | 50 ++++++++++++++++ 6 files changed, 162 insertions(+), 46 deletions(-) create mode 100644 lib/generators/braintrust/eval_server/eval_server_generator.rb create mode 100644 lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt create mode 100644 test/braintrust/contrib/rails/eval_server_generator_test.rb diff --git a/README.md b/README.md index 551e19b8..57035b44 100644 --- a/README.md +++ b/README.md @@ -428,27 +428,29 @@ bundle exec rackup eval_server.ru -p 8300 -o 0.0.0.0 **Run as a Rails engine** -Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust endpoints into that application. +Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust eval server into that application. + +Define each evaluator in its own file, for example under `app/evaluators/`: ```ruby -# config/initializers/braintrust_server.rb -require "braintrust/server/rails" - -Braintrust::Contrib::Rails::Engine.configure do |config| - config.evaluators = { - "food-classifier" => Braintrust::Eval::Evaluator.new( - task: ->(input:) { FoodClassifier.classify(input) }, - scorers: [ - Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 } - ] - ) - } +# app/evaluators/food_classifier.rb +class FoodClassifier < Braintrust::Eval::Evaluator + def task + ->(input:) { classify(input) } + end - # Default is :clerk_token. Use :none for local development. - config.auth = :none + def scorers + [Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }] + end end ``` +Then generate the Braintrust initializer: + +```bash +bin/rails generate braintrust:eval_server +``` + ```ruby # config/routes.rb Rails.application.routes.draw do @@ -456,11 +458,13 @@ Rails.application.routes.draw do end ``` -Mounted at `/braintrust`, the engine exposes: +The generator writes `config/initializers/braintrust_server.rb`, where you can review or customize the slug-to-evaluator mapping it discovers from `app/evaluators/**/*.rb` and `evaluators/**/*.rb`. + +**Developing locally** + +If you want to skip authentication on incoming eval requests while developing locally, set `config.auth = :none` in `config/initializers/braintrust_server.rb`. -- `GET /braintrust/` for the health check -- `GET /braintrust/list` and `POST /braintrust/list` to enumerate evaluators -- `POST /braintrust/eval` to run an evaluation and stream SSE results +That only disables authentication on requests into your Rails app. Any outgoing Braintrust API calls still require normal Braintrust credentials such as `BRAINTRUST_API_KEY`. See example: [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb) diff --git a/examples/README.md b/examples/README.md index 0ba0f3a7..29adb290 100644 --- a/examples/README.md +++ b/examples/README.md @@ -33,7 +33,7 @@ BRAINTRUST_DEBUG=true ruby examples/login/login_basic.rb ### Dev Server Examples - **`server/eval.ru`**: Set up a dev server for remote evals — define evaluators (subclass or inline) and serve them via a Rack app. Start with: `bundle exec appraisal server rackup examples/server/eval.ru -p 8300 -o 0.0.0.0` -- **`contrib/rails/eval.rb`**: Mount the dev server as a Rails engine and configure evaluators via `Braintrust::Contrib::Rails::Engine.configure` +- **`contrib/rails/eval.rb`**: Mount the dev server as a Rails engine, define evaluator classes under `app/evaluators/`, and generate `config/initializers/braintrust_server.rb` with `bin/rails generate braintrust:eval_server` ## Coming Soon diff --git a/examples/contrib/rails/eval.rb b/examples/contrib/rails/eval.rb index ca1f16bf..7e7bef41 100644 --- a/examples/contrib/rails/eval.rb +++ b/examples/contrib/rails/eval.rb @@ -2,12 +2,11 @@ # Braintrust Rails Engine — mount example # -# This file shows how to mount the Braintrust eval server as a Rails engine. -# The engine exposes the same endpoints as the standalone Rack server: -# GET /braintrust/ — health check -# GET /braintrust/list — list registered evaluators -# POST /braintrust/list — list registered evaluators -# POST /braintrust/eval — run an evaluation (SSE stream) +# This file shows one conventional setup for the Braintrust eval server in Rails: +# 1. Define evaluator classes under app/evaluators/ +# 2. Generate the initializer with: +# bin/rails generate braintrust:eval_server +# 3. Mount the engine in config/routes.rb # # Requirements: # gem 'actionpack', '~> 8.0' @@ -15,30 +14,38 @@ # gem 'activesupport', '~> 8.0' # --------------------------------------------------------------------------- -# config/initializers/braintrust_server.rb +# app/evaluators/my_classifier.rb # --------------------------------------------------------------------------- -require "braintrust/server/rails" - -Braintrust::Contrib::Rails::Engine.configure do |config| - # Register your evaluators by name. The Braintrust UI will discover them - # via GET /braintrust/list and let you run them via POST /braintrust/eval. - config.evaluators = { - "my-classifier" => Braintrust::Eval::Evaluator.new( - task: ->(input) { classify(input) }, - scorers: [ - Braintrust::Eval.scorer("accuracy") { |_input, expected, output| - (output == expected) ? 1.0 : 0.0 - } - ] - ) - } - - # Auth strategy: :clerk_token (default) validates Braintrust session tokens. - # Use :none for local development without authentication. - config.auth = :clerk_token +class MyClassifier < Braintrust::Eval::Evaluator + def task + ->(input:) { classify(input) } + end + + def scorers + [Braintrust::Scorer.new("accuracy") { |expected:, output:| (output == expected) ? 1.0 : 0.0 }] + end end +# --------------------------------------------------------------------------- +# config/initializers/braintrust_server.rb +# --------------------------------------------------------------------------- + +# Generated by: bin/rails generate braintrust:eval_server +# +# require "braintrust/server/rails" +# +# Braintrust::Contrib::Rails::Engine.configure do |config| +# config.evaluators = { +# "my-classifier" => MyClassifier.new +# } +# +# # Default is :clerk_token. Use :none only for local development without +# # incoming request authentication. Outgoing Braintrust API calls still need +# # normal Braintrust credentials. +# config.auth = :clerk_token +# end + # --------------------------------------------------------------------------- # config/routes.rb # --------------------------------------------------------------------------- diff --git a/lib/generators/braintrust/eval_server/eval_server_generator.rb b/lib/generators/braintrust/eval_server/eval_server_generator.rb new file mode 100644 index 00000000..16216766 --- /dev/null +++ b/lib/generators/braintrust/eval_server/eval_server_generator.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require "rails/generators" + +module Braintrust + module Generators + class EvalServerGenerator < ::Rails::Generators::Base + source_root File.expand_path("templates", __dir__) + + def create_initializer + @evaluators = discovered_evaluators + template "braintrust_server.rb.tt", "config/initializers/braintrust_server.rb" + end + + private + + def discovered_evaluators + evaluator_roots.flat_map do |root| + Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file| + relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "") + { + class_name: relative_path.split("/").map(&:camelize).join("::"), + slug: relative_path.tr("/", "-").tr("_", "-") + } + end + end + end + + def evaluator_roots + %w[app/evaluators evaluators].select do |root| + Dir.exist?(File.join(destination_root, root)) + end + end + end + end +end diff --git a/lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt b/lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt new file mode 100644 index 00000000..dd65daff --- /dev/null +++ b/lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt @@ -0,0 +1,19 @@ +require "braintrust/server/rails" + +Braintrust::Contrib::Rails::Engine.configure do |config| + config.evaluators = { +<% if @evaluators.empty? -%> + # Add evaluator instances here, for example: + # "food-classifier" => FoodClassifier.new +<% else -%> +<% @evaluators.each_with_index do |evaluator, index| -%> + "<%= evaluator[:slug] %>" => <%= evaluator[:class_name] %>.new<%= "," unless index == @evaluators.length - 1 %> +<% end -%> +<% end -%> + } + + # Default is :clerk_token. Use :none only when developing locally without + # incoming request authentication; outgoing Braintrust API calls still need + # a valid Braintrust API key. + config.auth = :clerk_token +end diff --git a/test/braintrust/contrib/rails/eval_server_generator_test.rb b/test/braintrust/contrib/rails/eval_server_generator_test.rb new file mode 100644 index 00000000..46f560de --- /dev/null +++ b/test/braintrust/contrib/rails/eval_server_generator_test.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require "test_helper" + +if RAILS_SERVER_AVAILABLE + require "rails/generators/test_case" + require "generators/braintrust/eval_server/eval_server_generator" + + module Braintrust + module Contrib + module Rails + class EvalServerGeneratorTest < ::Rails::Generators::TestCase + tests ::Braintrust::Generators::EvalServerGenerator + destination File.expand_path("../../../../tmp/eval_server_generator", __dir__) + setup :prepare_destination + + def test_generates_initializer_from_app_evaluators + FileUtils.mkdir_p(File.join(destination_root, "app/evaluators")) + File.write( + File.join(destination_root, "app/evaluators/food_classifier.rb"), + <<~RUBY + class FoodClassifier < Braintrust::Eval::Evaluator + end + RUBY + ) + + run_generator + + assert_file "config/initializers/braintrust_server.rb" do |contents| + assert_includes contents, "require \"braintrust/server/rails\"" + assert_includes contents, "\"food-classifier\" => FoodClassifier.new" + end + end + end + end + end + end +else + module Braintrust + module Contrib + module Rails + class EvalServerGeneratorTest < Minitest::Test + def test_skips_without_rails + skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" + end + end + end + end + end +end From 763799cacf7ec49b7eb87685624cecd6dff6f896 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Thu, 19 Mar 2026 22:27:58 +0800 Subject: [PATCH 5/6] Namespace Rails eval server generator --- .../eval_server/eval_server_generator.rb | 41 +++++++++++++++++++ .../templates/braintrust_server.rb.tt | 0 .../eval_server/eval_server_generator.rb | 36 ---------------- .../rails/eval_server_generator_test.rb | 4 +- 4 files changed, 43 insertions(+), 38 deletions(-) create mode 100644 lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb rename lib/generators/braintrust/{ => contrib/rails}/eval_server/templates/braintrust_server.rb.tt (100%) delete mode 100644 lib/generators/braintrust/eval_server/eval_server_generator.rb diff --git a/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb b/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb new file mode 100644 index 00000000..1df544a9 --- /dev/null +++ b/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require "rails/generators" + +module Braintrust + module Contrib + module Rails + module Generators + class EvalServerGenerator < ::Rails::Generators::Base + namespace "braintrust:eval_server" + source_root File.expand_path("templates", __dir__) + + def create_initializer + @evaluators = discovered_evaluators + template "braintrust_server.rb.tt", "config/initializers/braintrust_server.rb" + end + + private + + def discovered_evaluators + evaluator_roots.flat_map do |root| + Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file| + relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "") + { + class_name: relative_path.split("/").map(&:camelize).join("::"), + slug: relative_path.tr("/", "-").tr("_", "-") + } + end + end + end + + def evaluator_roots + %w[app/evaluators evaluators].select do |root| + Dir.exist?(File.join(destination_root, root)) + end + end + end + end + end + end +end diff --git a/lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt b/lib/generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt similarity index 100% rename from lib/generators/braintrust/eval_server/templates/braintrust_server.rb.tt rename to lib/generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt diff --git a/lib/generators/braintrust/eval_server/eval_server_generator.rb b/lib/generators/braintrust/eval_server/eval_server_generator.rb deleted file mode 100644 index 16216766..00000000 --- a/lib/generators/braintrust/eval_server/eval_server_generator.rb +++ /dev/null @@ -1,36 +0,0 @@ -# frozen_string_literal: true - -require "rails/generators" - -module Braintrust - module Generators - class EvalServerGenerator < ::Rails::Generators::Base - source_root File.expand_path("templates", __dir__) - - def create_initializer - @evaluators = discovered_evaluators - template "braintrust_server.rb.tt", "config/initializers/braintrust_server.rb" - end - - private - - def discovered_evaluators - evaluator_roots.flat_map do |root| - Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file| - relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "") - { - class_name: relative_path.split("/").map(&:camelize).join("::"), - slug: relative_path.tr("/", "-").tr("_", "-") - } - end - end - end - - def evaluator_roots - %w[app/evaluators evaluators].select do |root| - Dir.exist?(File.join(destination_root, root)) - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/eval_server_generator_test.rb b/test/braintrust/contrib/rails/eval_server_generator_test.rb index 46f560de..ff680f21 100644 --- a/test/braintrust/contrib/rails/eval_server_generator_test.rb +++ b/test/braintrust/contrib/rails/eval_server_generator_test.rb @@ -4,13 +4,13 @@ if RAILS_SERVER_AVAILABLE require "rails/generators/test_case" - require "generators/braintrust/eval_server/eval_server_generator" + require "generators/braintrust/contrib/rails/eval_server/eval_server_generator" module Braintrust module Contrib module Rails class EvalServerGeneratorTest < ::Rails::Generators::TestCase - tests ::Braintrust::Generators::EvalServerGenerator + tests ::Braintrust::Contrib::Rails::Generators::EvalServerGenerator destination File.expand_path("../../../../tmp/eval_server_generator", __dir__) setup :prepare_destination From d7a84240f839d25347adbd9c402e370b046610dc Mon Sep 17 00:00:00 2001 From: David Elner Date: Thu, 19 Mar 2026 19:03:35 +0000 Subject: [PATCH 6/6] Changed: Move some files around for namespacing server --- README.md | 63 ++++--- examples/README.md | 2 +- examples/contrib/rails/eval.rb | 10 +- .../contrib/rails/application_controller.rb | 32 ---- lib/braintrust/contrib/rails/engine.rb | 66 ------- .../contrib/rails/eval_controller.rb | 34 ---- .../contrib/rails/health_controller.rb | 13 -- .../contrib/rails/list_controller.rb | 14 -- lib/braintrust/contrib/rails/server.rb | 20 ++ .../rails/server/application_controller.rb | 34 ++++ lib/braintrust/contrib/rails/server/engine.rb | 72 ++++++++ .../contrib/rails/server/eval_controller.rb | 36 ++++ .../contrib/rails/server/generator.rb | 43 +++++ .../contrib/rails/server/health_controller.rb | 15 ++ .../contrib/rails/server/list_controller.rb | 16 ++ .../contrib/rails/{ => server}/routes.rb | 2 +- .../rails/server/templates/initializer.rb.tt} | 4 +- lib/braintrust/server/rails.rb | 20 -- .../eval_server/eval_server_generator.rb | 41 ----- test/braintrust/contrib/rails/engine_test.rb | 130 ------------- .../contrib/rails/eval_controller_test.rb | 168 ----------------- .../rails/eval_server_generator_test.rb | 50 ----- .../contrib/rails/health_controller_test.rb | 39 ---- .../contrib/rails/list_controller_test.rb | 99 ---------- .../contrib/rails/rails_server_helper.rb | 67 +++++++ .../contrib/rails/server/engine_test.rb | 135 ++++++++++++++ .../rails/server/eval_controller_test.rb | 172 ++++++++++++++++++ .../contrib/rails/server/generator_test.rb | 56 ++++++ .../rails/server/health_controller_test.rb | 43 +++++ .../rails/server/list_controller_test.rb | 103 +++++++++++ test/support/rails_server_helper.rb | 63 ------- test/test_helper.rb | 2 - 32 files changed, 856 insertions(+), 808 deletions(-) delete mode 100644 lib/braintrust/contrib/rails/application_controller.rb delete mode 100644 lib/braintrust/contrib/rails/engine.rb delete mode 100644 lib/braintrust/contrib/rails/eval_controller.rb delete mode 100644 lib/braintrust/contrib/rails/health_controller.rb delete mode 100644 lib/braintrust/contrib/rails/list_controller.rb create mode 100644 lib/braintrust/contrib/rails/server.rb create mode 100644 lib/braintrust/contrib/rails/server/application_controller.rb create mode 100644 lib/braintrust/contrib/rails/server/engine.rb create mode 100644 lib/braintrust/contrib/rails/server/eval_controller.rb create mode 100644 lib/braintrust/contrib/rails/server/generator.rb create mode 100644 lib/braintrust/contrib/rails/server/health_controller.rb create mode 100644 lib/braintrust/contrib/rails/server/list_controller.rb rename lib/braintrust/contrib/rails/{ => server}/routes.rb (73%) rename lib/{generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt => braintrust/contrib/rails/server/templates/initializer.rb.tt} (84%) delete mode 100644 lib/braintrust/server/rails.rb delete mode 100644 lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb delete mode 100644 test/braintrust/contrib/rails/engine_test.rb delete mode 100644 test/braintrust/contrib/rails/eval_controller_test.rb delete mode 100644 test/braintrust/contrib/rails/eval_server_generator_test.rb delete mode 100644 test/braintrust/contrib/rails/health_controller_test.rb delete mode 100644 test/braintrust/contrib/rails/list_controller_test.rb create mode 100644 test/braintrust/contrib/rails/rails_server_helper.rb create mode 100644 test/braintrust/contrib/rails/server/engine_test.rb create mode 100644 test/braintrust/contrib/rails/server/eval_controller_test.rb create mode 100644 test/braintrust/contrib/rails/server/generator_test.rb create mode 100644 test/braintrust/contrib/rails/server/health_controller_test.rb create mode 100644 test/braintrust/contrib/rails/server/list_controller_test.rb delete mode 100644 test/support/rails_server_helper.rb diff --git a/README.md b/README.md index 57035b44..07de9947 100644 --- a/README.md +++ b/README.md @@ -394,7 +394,7 @@ See [trace_scoring.rb](./examples/eval/trace_scoring.rb) for a full example. Run evaluations from the Braintrust web UI against code in your own application. -**Run as a Rack app** +#### Run as a Rack app Define evaluators, pass them to the dev server, and start serving: @@ -422,11 +422,38 @@ run Braintrust::Server::Rack.app( ) ``` +Add your Rack server to your Gemfile: + +```ruby +gem "rack" +gem "puma" # recommended +``` + +Then start the server: + ```bash bundle exec rackup eval_server.ru -p 8300 -o 0.0.0.0 ``` -**Run as a Rails engine** +See example: [server/eval.ru](./examples/server/eval.ru) + +**Custom evaluators** + +Evaluators can also be defined as subclasses: + +```ruby +class FoodClassifier < Braintrust::Eval::Evaluator + def task + ->(input:) { classify(input) } + end + + def scorers + [Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }] + end +end +``` + +#### Run as a Rails engine Use the Rails engine when your evaluators live inside an existing Rails app and you want to mount the Braintrust eval server into that application. @@ -460,29 +487,16 @@ end The generator writes `config/initializers/braintrust_server.rb`, where you can review or customize the slug-to-evaluator mapping it discovers from `app/evaluators/**/*.rb` and `evaluators/**/*.rb`. -**Developing locally** - -If you want to skip authentication on incoming eval requests while developing locally, set `config.auth = :none` in `config/initializers/braintrust_server.rb`. - -That only disables authentication on requests into your Rails app. Any outgoing Braintrust API calls still require normal Braintrust credentials such as `BRAINTRUST_API_KEY`. - See example: [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb) -**Custom evaluators** +**Developing locally** -Evaluators can also be defined as subclasses: +If you want to skip authentication on incoming eval requests while developing locally: -```ruby -class FoodClassifier < Braintrust::Eval::Evaluator - def task - ->(input:) { classify(input) } - end +- **For Rack**: Pass `auth: :none` to `Braintrust::Server::Rack.app(...)` +- **For Rails**: Set `config.auth = :none` in `config/initializers/braintrust_server.rb` - def scorers - [Braintrust::Scorer.new("exact_match") { |expected:, output:| output == expected ? 1.0 : 0.0 }] - end -end -``` +*NOTE: Setting `:none` disables authentication on incoming requests into your server; executing evals requires a `BRAINTRUST_API_KEY` to fetch resources.* **Supported web servers** @@ -495,14 +509,7 @@ The dev server requires the `rack` gem and a Rack-compatible web server. | [Passenger](https://www.phusionpassenger.com/) | 6.x | | | [WEBrick](https://github.com/ruby/webrick) | Not supported | Does not support server-sent events. | -Add your chosen server to your Gemfile: - -```ruby -gem "rack" -gem "puma" # recommended -``` - -See examples: [server/eval.ru](./examples/server/eval.ru), [contrib/rails/eval.rb](./examples/contrib/rails/eval.rb) +See examples: [server/eval.ru](./examples/server/eval.ru), ## Documentation diff --git a/examples/README.md b/examples/README.md index 29adb290..6c1f6f24 100644 --- a/examples/README.md +++ b/examples/README.md @@ -33,7 +33,7 @@ BRAINTRUST_DEBUG=true ruby examples/login/login_basic.rb ### Dev Server Examples - **`server/eval.ru`**: Set up a dev server for remote evals — define evaluators (subclass or inline) and serve them via a Rack app. Start with: `bundle exec appraisal server rackup examples/server/eval.ru -p 8300 -o 0.0.0.0` -- **`contrib/rails/eval.rb`**: Mount the dev server as a Rails engine, define evaluator classes under `app/evaluators/`, and generate `config/initializers/braintrust_server.rb` with `bin/rails generate braintrust:eval_server` +- **`contrib/rails/eval.rb`**: Mount the dev server as a Rails engine, define evaluator classes under `app/evaluators/`, and generate `config/initializers/braintrust_server.rb` with `bin/rails generate braintrust:server` ## Coming Soon diff --git a/examples/contrib/rails/eval.rb b/examples/contrib/rails/eval.rb index 7e7bef41..b6884c09 100644 --- a/examples/contrib/rails/eval.rb +++ b/examples/contrib/rails/eval.rb @@ -5,7 +5,7 @@ # This file shows one conventional setup for the Braintrust eval server in Rails: # 1. Define evaluator classes under app/evaluators/ # 2. Generate the initializer with: -# bin/rails generate braintrust:eval_server +# bin/rails generate braintrust:server # 3. Mount the engine in config/routes.rb # # Requirements: @@ -31,11 +31,11 @@ def scorers # config/initializers/braintrust_server.rb # --------------------------------------------------------------------------- -# Generated by: bin/rails generate braintrust:eval_server +# Generated by: bin/rails generate braintrust:server # -# require "braintrust/server/rails" +# require "braintrust/contrib/rails/server" # -# Braintrust::Contrib::Rails::Engine.configure do |config| +# Braintrust::Contrib::Rails::Server::Engine.configure do |config| # config.evaluators = { # "my-classifier" => MyClassifier.new # } @@ -51,7 +51,7 @@ def scorers # --------------------------------------------------------------------------- # Rails.application.routes.draw do -# mount Braintrust::Contrib::Rails::Engine, at: "/braintrust" +# mount Braintrust::Contrib::Rails::Server::Engine, at: "/braintrust" # end puts "Braintrust Rails Engine example — see comments for usage" diff --git a/lib/braintrust/contrib/rails/application_controller.rb b/lib/braintrust/contrib/rails/application_controller.rb deleted file mode 100644 index 09d2cf2c..00000000 --- a/lib/braintrust/contrib/rails/application_controller.rb +++ /dev/null @@ -1,32 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Contrib - module Rails - class ApplicationController < ActionController::API - before_action :authenticate! - - private - - def authenticate! - auth_result = Engine.auth_strategy.authenticate(request.env) - unless auth_result - render json: {"error" => "Unauthorized"}, status: :unauthorized - return - end - - request.env["braintrust.auth"] = auth_result - @braintrust_auth = auth_result - end - - def parse_json_body - body = request.body.read - return nil if body.nil? || body.empty? - JSON.parse(body) - rescue JSON::ParserError - nil - end - end - end - end -end diff --git a/lib/braintrust/contrib/rails/engine.rb b/lib/braintrust/contrib/rails/engine.rb deleted file mode 100644 index ce4bf2d7..00000000 --- a/lib/braintrust/contrib/rails/engine.rb +++ /dev/null @@ -1,66 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Contrib - module Rails - class Engine < ::Rails::Engine - isolate_namespace Braintrust::Contrib::Rails - - config.evaluators = {} - config.auth = :clerk_token - - # Register the engine's routes file so Rails loads it during initialization. - paths["config/routes.rb"] << File.expand_path("routes.rb", __dir__) - - initializer "braintrust.server.cors" do |app| - app.middleware.use Braintrust::Server::Middleware::Cors - end - - # Class-level helpers that read from engine config. - - def self.evaluators - config.evaluators - end - - def self.auth_strategy - resolve_auth(config.auth) - end - - def self.list_service - Server::Services::List.new(-> { config.evaluators }) - end - - # Long-lived so the state cache persists across requests. - def self.eval_service - @eval_service ||= Server::Services::Eval.new(-> { config.evaluators }) - end - - # Support the explicit `|config|` style used by this integration while - # still delegating zero-arity DSL blocks to Rails' native implementation. - def self.configure(&block) - return super if block&.arity == 0 - yield config if block - end - - def self.resolve_auth(auth) - case auth - when :none - Server::Auth::NoAuth.new - when :clerk_token - Server::Auth::ClerkToken.new - when Symbol, String - raise ArgumentError, "Unknown auth strategy #{auth.inspect}. Expected :none, :clerk_token, or an auth object." - else - auth - end - end - private_class_method :resolve_auth - end - end - end -end - -require_relative "application_controller" -require_relative "health_controller" -require_relative "list_controller" -require_relative "eval_controller" diff --git a/lib/braintrust/contrib/rails/eval_controller.rb b/lib/braintrust/contrib/rails/eval_controller.rb deleted file mode 100644 index 0d04b479..00000000 --- a/lib/braintrust/contrib/rails/eval_controller.rb +++ /dev/null @@ -1,34 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Contrib - module Rails - class EvalController < ApplicationController - include ActionController::Live - - def create - body = parse_json_body - unless body - render json: {"error" => "Invalid JSON body"}, status: :bad_request - return - end - - result = Engine.eval_service.validate(body) - if result[:error] - render json: {"error" => result[:error]}, status: result[:status] - return - end - - response.headers["Content-Type"] = "text/event-stream" - response.headers["Cache-Control"] = "no-cache" - response.headers["Connection"] = "keep-alive" - - sse = Server::SSEWriter.new { |chunk| response.stream.write(chunk) } - Engine.eval_service.stream(result, auth: @braintrust_auth, sse: sse) - ensure - response.stream.close - end - end - end - end -end diff --git a/lib/braintrust/contrib/rails/health_controller.rb b/lib/braintrust/contrib/rails/health_controller.rb deleted file mode 100644 index 9c362f26..00000000 --- a/lib/braintrust/contrib/rails/health_controller.rb +++ /dev/null @@ -1,13 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Contrib - module Rails - class HealthController < ApplicationController - def show - render json: {"status" => "ok"} - end - end - end - end -end diff --git a/lib/braintrust/contrib/rails/list_controller.rb b/lib/braintrust/contrib/rails/list_controller.rb deleted file mode 100644 index 3e9737c4..00000000 --- a/lib/braintrust/contrib/rails/list_controller.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -module Braintrust - module Contrib - module Rails - class ListController < ApplicationController - def show - result = Engine.list_service.call - render json: result - end - end - end - end -end diff --git a/lib/braintrust/contrib/rails/server.rb b/lib/braintrust/contrib/rails/server.rb new file mode 100644 index 00000000..558852c5 --- /dev/null +++ b/lib/braintrust/contrib/rails/server.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +begin + require "action_controller" + require "rails/engine" +rescue LoadError + raise LoadError, + "Rails (actionpack + railties) is required for the Braintrust Rails server engine. " \ + "Add `gem 'rails'` or `gem 'actionpack'` and `gem 'railties'` to your Gemfile." +end + +require "json" +require_relative "../../eval" +require_relative "../../server/sse" +require_relative "../../server/auth/no_auth" +require_relative "../../server/auth/clerk_token" +require_relative "../../server/middleware/cors" +require_relative "../../server/services/list_service" +require_relative "../../server/services/eval_service" +require_relative "server/engine" diff --git a/lib/braintrust/contrib/rails/server/application_controller.rb b/lib/braintrust/contrib/rails/server/application_controller.rb new file mode 100644 index 00000000..74da4411 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/application_controller.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + module Server + class ApplicationController < ActionController::API + before_action :authenticate! + + private + + def authenticate! + auth_result = Engine.auth_strategy.authenticate(request.env) + unless auth_result + render json: {"error" => "Unauthorized"}, status: :unauthorized + return + end + + request.env["braintrust.auth"] = auth_result + @braintrust_auth = auth_result + end + + def parse_json_body + body = request.body.read + return nil if body.nil? || body.empty? + JSON.parse(body) + rescue JSON::ParserError + nil + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/server/engine.rb b/lib/braintrust/contrib/rails/server/engine.rb new file mode 100644 index 00000000..28905503 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/engine.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + module Server + class Engine < ::Rails::Engine + isolate_namespace Braintrust::Contrib::Rails::Server + + config.evaluators = {} + config.auth = :clerk_token + + # Register the engine's routes file so Rails loads it during initialization. + paths["config/routes.rb"] << File.expand_path("routes.rb", __dir__) + + initializer "braintrust.server.cors" do |app| + app.middleware.use Braintrust::Server::Middleware::Cors + end + + # Class-level helpers that read from engine config. + + def self.evaluators + config.evaluators + end + + def self.auth_strategy + resolve_auth(config.auth) + end + + def self.list_service + Braintrust::Server::Services::List.new(-> { config.evaluators }) + end + + # Long-lived so the state cache persists across requests. + def self.eval_service + @eval_service ||= Braintrust::Server::Services::Eval.new(-> { config.evaluators }) + end + + # Support the explicit `|config|` style used by this integration while + # still delegating zero-arity DSL blocks to Rails' native implementation. + def self.configure(&block) + return super if block&.arity == 0 + yield config if block + end + + def self.resolve_auth(auth) + case auth + when :none + Braintrust::Server::Auth::NoAuth.new + when :clerk_token + Braintrust::Server::Auth::ClerkToken.new + when Symbol, String + raise ArgumentError, "Unknown auth strategy #{auth.inspect}. Expected :none, :clerk_token, or an auth object." + else + auth + end + end + private_class_method :resolve_auth + + generators do + require "braintrust/contrib/rails/server/generator" + end + end + end + end + end +end + +require_relative "application_controller" +require_relative "health_controller" +require_relative "list_controller" +require_relative "eval_controller" diff --git a/lib/braintrust/contrib/rails/server/eval_controller.rb b/lib/braintrust/contrib/rails/server/eval_controller.rb new file mode 100644 index 00000000..ccd4c380 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/eval_controller.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + module Server + class EvalController < ApplicationController + include ActionController::Live + + def create + body = parse_json_body + unless body + render json: {"error" => "Invalid JSON body"}, status: :bad_request + return + end + + result = Engine.eval_service.validate(body) + if result[:error] + render json: {"error" => result[:error]}, status: result[:status] + return + end + + response.headers["Content-Type"] = "text/event-stream" + response.headers["Cache-Control"] = "no-cache" + response.headers["Connection"] = "keep-alive" + + sse = Braintrust::Server::SSEWriter.new { |chunk| response.stream.write(chunk) } + Engine.eval_service.stream(result, auth: @braintrust_auth, sse: sse) + ensure + response.stream.close + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/server/generator.rb b/lib/braintrust/contrib/rails/server/generator.rb new file mode 100644 index 00000000..37801eb3 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/generator.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "rails/generators" + +module Braintrust + module Contrib + module Rails + module Server + module Generators + class ServerGenerator < ::Rails::Generators::Base + namespace "braintrust:server" + source_root File.expand_path("templates", __dir__) + + def create_initializer + @evaluators = discovered_evaluators + template "initializer.rb.tt", "config/initializers/braintrust_server.rb" + end + + private + + def discovered_evaluators + evaluator_roots.flat_map do |root| + Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file| + relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "") + { + class_name: relative_path.split("/").map(&:camelize).join("::"), + slug: relative_path.tr("/", "-").tr("_", "-") + } + end + end + end + + def evaluator_roots + %w[app/evaluators evaluators].select do |root| + Dir.exist?(File.join(destination_root, root)) + end + end + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/server/health_controller.rb b/lib/braintrust/contrib/rails/server/health_controller.rb new file mode 100644 index 00000000..361f8433 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/health_controller.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + module Server + class HealthController < ApplicationController + def show + render json: {"status" => "ok"} + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/server/list_controller.rb b/lib/braintrust/contrib/rails/server/list_controller.rb new file mode 100644 index 00000000..1b661151 --- /dev/null +++ b/lib/braintrust/contrib/rails/server/list_controller.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +module Braintrust + module Contrib + module Rails + module Server + class ListController < ApplicationController + def show + result = Engine.list_service.call + render json: result + end + end + end + end + end +end diff --git a/lib/braintrust/contrib/rails/routes.rb b/lib/braintrust/contrib/rails/server/routes.rb similarity index 73% rename from lib/braintrust/contrib/rails/routes.rb rename to lib/braintrust/contrib/rails/server/routes.rb index 6db2650b..5e3043cd 100644 --- a/lib/braintrust/contrib/rails/routes.rb +++ b/lib/braintrust/contrib/rails/server/routes.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -Braintrust::Contrib::Rails::Engine.routes.draw do +Braintrust::Contrib::Rails::Server::Engine.routes.draw do get "/", to: "health#show" get "/list", to: "list#show" post "/list", to: "list#show" diff --git a/lib/generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt b/lib/braintrust/contrib/rails/server/templates/initializer.rb.tt similarity index 84% rename from lib/generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt rename to lib/braintrust/contrib/rails/server/templates/initializer.rb.tt index dd65daff..afe292e5 100644 --- a/lib/generators/braintrust/contrib/rails/eval_server/templates/braintrust_server.rb.tt +++ b/lib/braintrust/contrib/rails/server/templates/initializer.rb.tt @@ -1,6 +1,6 @@ -require "braintrust/server/rails" +require "braintrust/contrib/rails/server" -Braintrust::Contrib::Rails::Engine.configure do |config| +Braintrust::Contrib::Rails::Server::Engine.configure do |config| config.evaluators = { <% if @evaluators.empty? -%> # Add evaluator instances here, for example: diff --git a/lib/braintrust/server/rails.rb b/lib/braintrust/server/rails.rb deleted file mode 100644 index 75ddbf77..00000000 --- a/lib/braintrust/server/rails.rb +++ /dev/null @@ -1,20 +0,0 @@ -# frozen_string_literal: true - -begin - require "action_controller" - require "rails/engine" -rescue LoadError - raise LoadError, - "Rails (actionpack + railties) is required for the Braintrust Rails server engine. " \ - "Add `gem 'rails'` or `gem 'actionpack'` and `gem 'railties'` to your Gemfile." -end - -require "json" -require_relative "../eval" -require_relative "sse" -require_relative "auth/no_auth" -require_relative "auth/clerk_token" -require_relative "middleware/cors" -require_relative "services/list_service" -require_relative "services/eval_service" -require_relative "../contrib/rails/engine" diff --git a/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb b/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb deleted file mode 100644 index 1df544a9..00000000 --- a/lib/generators/braintrust/contrib/rails/eval_server/eval_server_generator.rb +++ /dev/null @@ -1,41 +0,0 @@ -# frozen_string_literal: true - -require "rails/generators" - -module Braintrust - module Contrib - module Rails - module Generators - class EvalServerGenerator < ::Rails::Generators::Base - namespace "braintrust:eval_server" - source_root File.expand_path("templates", __dir__) - - def create_initializer - @evaluators = discovered_evaluators - template "braintrust_server.rb.tt", "config/initializers/braintrust_server.rb" - end - - private - - def discovered_evaluators - evaluator_roots.flat_map do |root| - Dir[File.join(destination_root, root, "**/*.rb")].sort.map do |file| - relative_path = file.delete_prefix("#{File.join(destination_root, root)}/").sub(/\.rb\z/, "") - { - class_name: relative_path.split("/").map(&:camelize).join("::"), - slug: relative_path.tr("/", "-").tr("_", "-") - } - end - end - end - - def evaluator_roots - %w[app/evaluators evaluators].select do |root| - Dir.exist?(File.join(destination_root, root)) - end - end - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/engine_test.rb b/test/braintrust/contrib/rails/engine_test.rb deleted file mode 100644 index 830b0b4d..00000000 --- a/test/braintrust/contrib/rails/engine_test.rb +++ /dev/null @@ -1,130 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" - -module Braintrust - module Contrib - module Rails - class EngineTest < Minitest::Test - def setup - skip_unless_rails_server! - reset_engine! - end - - def test_evaluators_returns_config_value - evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - Engine.config.evaluators = {"my-eval" => evaluator} - assert_same evaluator, Engine.evaluators["my-eval"] - end - - def test_auth_strategy_returns_no_auth_for_none - Engine.config.auth = :none - assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy - end - - def test_auth_strategy_returns_clerk_token_by_default - Engine.config.auth = :clerk_token - assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy - end - - def test_auth_strategy_accepts_custom_object - custom = Braintrust::Server::Auth::NoAuth.new - Engine.config.auth = custom - assert_same custom, Engine.auth_strategy - end - - def test_auth_strategy_raises_for_unknown_symbol - Engine.config.auth = :jwt - assert_raises(ArgumentError) { Engine.auth_strategy } - end - - def test_auth_strategy_raises_for_unknown_string - Engine.config.auth = "jwt" - assert_raises(ArgumentError) { Engine.auth_strategy } - end - - def test_auth_strategy_reflects_config_changes_without_manual_reset - Engine.config.auth = :none - assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy - - Engine.config.auth = :clerk_token - assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy - end - - def test_list_service_uses_latest_evaluators_without_manual_reset - first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - - Engine.config.evaluators = {"first" => first} - assert_equal ["first"], Engine.list_service.call.keys - - Engine.config.evaluators = {"second" => second} - assert_equal ["second"], Engine.list_service.call.keys - end - - def test_eval_service_uses_latest_evaluators_without_manual_reset - first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - payload = {"data" => {"data" => [{"input" => "hello"}]}} - - Engine.config.evaluators = {"first" => first} - service = Engine.eval_service - assert_same first, service.validate(payload.merge("name" => "first"))[:evaluator] - - Engine.config.evaluators = {"second" => second} - assert_same second, service.validate(payload.merge("name" => "second"))[:evaluator] - end - - def test_eval_service_returns_eval_instance - assert_instance_of Braintrust::Server::Services::Eval, Engine.eval_service - end - - def test_list_service_returns_list_instance - assert_instance_of Braintrust::Server::Services::List, Engine.list_service - end - - def test_eval_service_is_memoized - svc1 = Engine.eval_service - svc2 = Engine.eval_service - assert_same svc1, svc2 - end - - def test_configure_yields_config_without_resetting_eval_service - svc_before = Engine.eval_service - evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - payload = {"name" => "configured-eval", "data" => {"data" => [{"input" => "hello"}]}} - - Engine.configure do |config| - config.evaluators = {"configured-eval" => evaluator} - config.auth = :none - end - - assert_same evaluator, Engine.evaluators["configured-eval"] - assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy - assert_same svc_before, Engine.eval_service - assert_same evaluator, Engine.eval_service.validate(payload)[:evaluator] - end - - def test_cors_middleware_is_in_middleware_stack - stack = BraintrustRailsTestApp.middleware - middleware_classes = stack.map { |m| - begin - m.klass - rescue - m - end - } - assert middleware_classes.any? { |klass| - klass == Braintrust::Server::Middleware::Cors - }, "CORS middleware should be in the stack" - end - - def test_engine_has_expected_routes - routes = Engine.routes.routes.map { |r| "#{r.verb} #{r.path.spec}" } - assert routes.any? { |r| r.include?("/list") }, "Should have /list route" - assert routes.any? { |r| r.include?("/eval") }, "Should have /eval route" - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/eval_controller_test.rb b/test/braintrust/contrib/rails/eval_controller_test.rb deleted file mode 100644 index 98e029f2..00000000 --- a/test/braintrust/contrib/rails/eval_controller_test.rb +++ /dev/null @@ -1,168 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" -require "json" - -module Braintrust - module Contrib - module Rails - class EvalControllerTest < Minitest::Test - include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) - - def setup - skip_unless_rails_server! - @evaluators = {} - @rig = setup_otel_test_rig - reset_engine!(evaluators: @evaluators, auth: :none) - end - - def app - rails_engine_app - end - - def test_streams_sse_events_for_inline_data - @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) - reset_engine!(evaluators: @evaluators, auth: :none) - - post_json "/eval", { - name: "upcase-eval", - data: { - data: [ - {input: "hello", expected: "HELLO"}, - {input: "world", expected: "WORLD"} - ] - }, - experiment_name: "test-experiment", - project_id: "proj-123" - } - - assert_equal 200, last_response.status - assert_match "text/event-stream", last_response.content_type - - events = parse_sse_events(last_response.body) - progress_events = events.select { |e| e[:event] == "progress" } - assert_equal 4, progress_events.length # 2 per case - - summary_events = events.select { |e| e[:event] == "summary" } - assert_equal 1, summary_events.length - - assert_equal "done", events.last[:event] - end - - def test_progress_events_contain_output - @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) - reset_engine!(evaluators: @evaluators, auth: :none) - - post_json "/eval", { - name: "upcase-eval", - data: {data: [{input: "hello", expected: "HELLO"}]}, - experiment_name: "test-experiment", - project_id: "proj-123" - } - - events = parse_sse_events(last_response.body) - progress = events.find { |e| e[:event] == "progress" } - data = JSON.parse(progress[:data]) - - assert_equal "HELLO", JSON.parse(data["data"]) - end - - def test_summary_event_contains_scores - scorer = Braintrust::Eval.scorer("exact") { |_i, e, o| (o == e) ? 1.0 : 0.0 } - @evaluators["scored-eval"] = test_evaluator( - task: ->(input) { input.to_s.upcase }, - scorers: [scorer] - ) - reset_engine!(evaluators: @evaluators, auth: :none) - - post_json "/eval", { - name: "scored-eval", - data: {data: [{input: "hello", expected: "HELLO"}]}, - experiment_name: "test-experiment", - project_id: "proj-123" - } - - events = parse_sse_events(last_response.body) - summary = events.find { |e| e[:event] == "summary" } - data = JSON.parse(summary[:data]) - - assert data.key?("scores") - assert data.key?("experiment_name") - end - - def test_error_still_emits_progress_and_done - @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "task exploded" }) - reset_engine!(evaluators: @evaluators, auth: :none) - - post_json "/eval", { - name: "failing-eval", - data: {data: [{input: "hello"}]}, - experiment_name: "test-experiment", - project_id: "proj-123" - } - - events = parse_sse_events(last_response.body) - assert events.any? { |e| e[:event] == "progress" || e[:event] == "error" } - assert_equal "done", events.last[:event] - end - - def test_404_for_unknown_evaluator - post_json "/eval", { - name: "nonexistent", - data: {data: [{input: "hello"}]}, - experiment_name: "test-experiment", - project_id: "proj-123" - } - - assert_equal 404, last_response.status - body = JSON.parse(last_response.body) - assert_match(/not found/i, body["error"]) - end - - def test_400_for_missing_name - post_json "/eval", { - data: {data: [{input: "hello"}]} - } - - assert_equal 400, last_response.status - end - - def test_400_for_missing_data - @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) - reset_engine!(evaluators: @evaluators, auth: :none) - - post_json "/eval", {name: "test-eval"} - - assert_equal 400, last_response.status - end - - def test_400_for_invalid_json_body - post "/eval", "not-json", {"CONTENT_TYPE" => "application/json"} - - assert_equal 400, last_response.status - end - - def test_returns_401_when_auth_fails - reset_engine!(evaluators: @evaluators, auth: :clerk_token) - - post_json "/eval", { - name: "test-eval", - data: {data: [{input: "hello"}]} - } - - assert_equal 401, last_response.status - end - - private - - def test_evaluator(**kwargs) - Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs) - end - - def post_json(path, body) - post path, JSON.generate(body), {"CONTENT_TYPE" => "application/json"} - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/eval_server_generator_test.rb b/test/braintrust/contrib/rails/eval_server_generator_test.rb deleted file mode 100644 index ff680f21..00000000 --- a/test/braintrust/contrib/rails/eval_server_generator_test.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" - -if RAILS_SERVER_AVAILABLE - require "rails/generators/test_case" - require "generators/braintrust/contrib/rails/eval_server/eval_server_generator" - - module Braintrust - module Contrib - module Rails - class EvalServerGeneratorTest < ::Rails::Generators::TestCase - tests ::Braintrust::Contrib::Rails::Generators::EvalServerGenerator - destination File.expand_path("../../../../tmp/eval_server_generator", __dir__) - setup :prepare_destination - - def test_generates_initializer_from_app_evaluators - FileUtils.mkdir_p(File.join(destination_root, "app/evaluators")) - File.write( - File.join(destination_root, "app/evaluators/food_classifier.rb"), - <<~RUBY - class FoodClassifier < Braintrust::Eval::Evaluator - end - RUBY - ) - - run_generator - - assert_file "config/initializers/braintrust_server.rb" do |contents| - assert_includes contents, "require \"braintrust/server/rails\"" - assert_includes contents, "\"food-classifier\" => FoodClassifier.new" - end - end - end - end - end - end -else - module Braintrust - module Contrib - module Rails - class EvalServerGeneratorTest < Minitest::Test - def test_skips_without_rails - skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" - end - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/health_controller_test.rb b/test/braintrust/contrib/rails/health_controller_test.rb deleted file mode 100644 index e5ec7b27..00000000 --- a/test/braintrust/contrib/rails/health_controller_test.rb +++ /dev/null @@ -1,39 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" -require "json" - -module Braintrust - module Contrib - module Rails - class HealthControllerTest < Minitest::Test - include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) - - def setup - skip_unless_rails_server! - reset_engine!(auth: :none) - end - - def app - rails_engine_app - end - - def test_get_root_returns_200 - get "/" - assert_equal 200, last_response.status - end - - def test_get_root_returns_json_content_type - get "/" - assert_match "application/json", last_response.content_type - end - - def test_get_root_returns_status_ok - get "/" - body = JSON.parse(last_response.body) - assert_equal "ok", body["status"] - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/list_controller_test.rb b/test/braintrust/contrib/rails/list_controller_test.rb deleted file mode 100644 index fc0f56a0..00000000 --- a/test/braintrust/contrib/rails/list_controller_test.rb +++ /dev/null @@ -1,99 +0,0 @@ -# frozen_string_literal: true - -require "test_helper" -require "json" - -module Braintrust - module Contrib - module Rails - class ListControllerTest < Minitest::Test - include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) - - def setup - skip_unless_rails_server! - @evaluators = {} - reset_engine!(evaluators: @evaluators, auth: :none) - end - - def app - rails_engine_app - end - - def test_get_list_returns_200 - get "/list" - assert_equal 200, last_response.status - end - - def test_post_list_returns_200 - post "/list" - assert_equal 200, last_response.status - end - - def test_returns_empty_hash_when_no_evaluators - get "/list" - body = JSON.parse(last_response.body) - assert_equal({}, body) - end - - def test_returns_evaluators_keyed_by_name - @evaluators["food-classifier"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - @evaluators["text-summarizer"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - reset_engine!(evaluators: @evaluators, auth: :none) - - get "/list" - body = JSON.parse(last_response.body) - assert body.key?("food-classifier") - assert body.key?("text-summarizer") - end - - def test_includes_scorer_names - @evaluators["scored-eval"] = Braintrust::Eval::Evaluator.new( - task: ->(input) { input }, - scorers: [ - Braintrust::Eval.scorer("exact_match") { |_i, e, o| (o == e) ? 1.0 : 0.0 }, - Braintrust::Eval.scorer("length_check") { |_i, _e, _o| 1.0 } - ] - ) - reset_engine!(evaluators: @evaluators, auth: :none) - - get "/list" - body = JSON.parse(last_response.body) - score_names = body["scored-eval"]["scores"].map { |s| s["name"] } - assert_equal ["exact_match", "length_check"], score_names - end - - def test_includes_parameters_in_static_container - @evaluators["param-eval"] = Braintrust::Eval::Evaluator.new( - task: ->(input) { input }, - parameters: {"temperature" => {type: "number", default: 0.7, description: "LLM temperature"}} - ) - reset_engine!(evaluators: @evaluators, auth: :none) - - get "/list" - body = JSON.parse(last_response.body) - params = body["param-eval"]["parameters"] - assert_equal "braintrust.staticParameters", params["type"] - assert_equal 0.7, params["schema"]["temperature"]["default"] - end - - def test_omits_parameters_when_none_defined - @evaluators["no-params"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) - reset_engine!(evaluators: @evaluators, auth: :none) - - get "/list" - body = JSON.parse(last_response.body) - refute body["no-params"].key?("parameters") - end - - def test_returns_401_when_auth_fails - # Use clerk_token auth — no auth header means failure - reset_engine!(evaluators: @evaluators, auth: :clerk_token) - - # WebMock blocks real HTTP, so clerk token validation will fail - get "/list" - assert_equal 401, last_response.status - end - end - end - end -end diff --git a/test/braintrust/contrib/rails/rails_server_helper.rb b/test/braintrust/contrib/rails/rails_server_helper.rb new file mode 100644 index 00000000..951ea513 --- /dev/null +++ b/test/braintrust/contrib/rails/rails_server_helper.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +# Try to load Rails engine dependencies. +RAILS_SERVER_AVAILABLE = begin + require "rack/test" + require "action_controller" + require "action_dispatch" + require "rails" + require "braintrust/contrib/rails/server" + true +rescue LoadError + false +end + +if RAILS_SERVER_AVAILABLE + # Create a minimal Rails application for engine integration tests. + # Guard against being required multiple times. + unless defined?(BraintrustRailsTestApp) + class BraintrustRailsTestApp < Rails::Application + config.eager_load = false + config.secret_key_base = "braintrust-rails-test-secret-key-abc123456789" + config.logger = ::Logger.new(nil) + config.log_level = :fatal + + # Allow any host in tests (Rack::Test uses "example.org" by default) + config.hosts.clear + + routes.draw do + mount Braintrust::Contrib::Rails::Server::Engine, at: "/" + end + + initialize! + end + end +end + +module Braintrust + module Contrib + module Rails + module ServerHelper + def skip_unless_rails_server! + skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" unless RAILS_SERVER_AVAILABLE + end + + # The engine itself as a Rack app — use for controller integration tests. + # Faster and more direct than routing through a full Rails application. + def rails_engine_app + Braintrust::Contrib::Rails::Server::Engine + end + + # The full test Rails application (mounts the engine at /). + # Use only when you need to verify middleware stack or mounted routing. + def rails_app + BraintrustRailsTestApp + end + + def reset_engine!(evaluators: {}, auth: :none) + engine = Braintrust::Contrib::Rails::Server::Engine + engine.config.evaluators = evaluators + engine.config.auth = auth + # Clear the long-lived eval service so cached state does not leak across tests. + engine.instance_variable_set(:@eval_service, nil) + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/server/engine_test.rb b/test/braintrust/contrib/rails/server/engine_test.rb new file mode 100644 index 00000000..2456606d --- /dev/null +++ b/test/braintrust/contrib/rails/server/engine_test.rb @@ -0,0 +1,135 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../rails_server_helper" + +module Braintrust + module Contrib + module Rails + module Server + class EngineTest < Minitest::Test + include Braintrust::Contrib::Rails::ServerHelper + + def setup + skip_unless_rails_server! + reset_engine! + end + + def test_evaluators_returns_config_value + evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + Engine.config.evaluators = {"my-eval" => evaluator} + assert_same evaluator, Engine.evaluators["my-eval"] + end + + def test_auth_strategy_returns_no_auth_for_none + Engine.config.auth = :none + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + end + + def test_auth_strategy_returns_clerk_token_by_default + Engine.config.auth = :clerk_token + assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy + end + + def test_auth_strategy_accepts_custom_object + custom = Braintrust::Server::Auth::NoAuth.new + Engine.config.auth = custom + assert_same custom, Engine.auth_strategy + end + + def test_auth_strategy_raises_for_unknown_symbol + Engine.config.auth = :jwt + assert_raises(ArgumentError) { Engine.auth_strategy } + end + + def test_auth_strategy_raises_for_unknown_string + Engine.config.auth = "jwt" + assert_raises(ArgumentError) { Engine.auth_strategy } + end + + def test_auth_strategy_reflects_config_changes_without_manual_reset + Engine.config.auth = :none + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + + Engine.config.auth = :clerk_token + assert_instance_of Braintrust::Server::Auth::ClerkToken, Engine.auth_strategy + end + + def test_list_service_uses_latest_evaluators_without_manual_reset + first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + + Engine.config.evaluators = {"first" => first} + assert_equal ["first"], Engine.list_service.call.keys + + Engine.config.evaluators = {"second" => second} + assert_equal ["second"], Engine.list_service.call.keys + end + + def test_eval_service_uses_latest_evaluators_without_manual_reset + first = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + second = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + payload = {"data" => {"data" => [{"input" => "hello"}]}} + + Engine.config.evaluators = {"first" => first} + service = Engine.eval_service + assert_same first, service.validate(payload.merge("name" => "first"))[:evaluator] + + Engine.config.evaluators = {"second" => second} + assert_same second, service.validate(payload.merge("name" => "second"))[:evaluator] + end + + def test_eval_service_returns_eval_instance + assert_instance_of Braintrust::Server::Services::Eval, Engine.eval_service + end + + def test_list_service_returns_list_instance + assert_instance_of Braintrust::Server::Services::List, Engine.list_service + end + + def test_eval_service_is_memoized + svc1 = Engine.eval_service + svc2 = Engine.eval_service + assert_same svc1, svc2 + end + + def test_configure_yields_config_without_resetting_eval_service + svc_before = Engine.eval_service + evaluator = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + payload = {"name" => "configured-eval", "data" => {"data" => [{"input" => "hello"}]}} + + Engine.configure do |config| + config.evaluators = {"configured-eval" => evaluator} + config.auth = :none + end + + assert_same evaluator, Engine.evaluators["configured-eval"] + assert_instance_of Braintrust::Server::Auth::NoAuth, Engine.auth_strategy + assert_same svc_before, Engine.eval_service + assert_same evaluator, Engine.eval_service.validate(payload)[:evaluator] + end + + def test_cors_middleware_is_in_middleware_stack + stack = BraintrustRailsTestApp.middleware + middleware_classes = stack.map { |m| + begin + m.klass + rescue + m + end + } + assert middleware_classes.any? { |klass| + klass == Braintrust::Server::Middleware::Cors + }, "CORS middleware should be in the stack" + end + + def test_engine_has_expected_routes + routes = Engine.routes.routes.map { |r| "#{r.verb} #{r.path.spec}" } + assert routes.any? { |r| r.include?("/list") }, "Should have /list route" + assert routes.any? { |r| r.include?("/eval") }, "Should have /eval route" + end + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/server/eval_controller_test.rb b/test/braintrust/contrib/rails/server/eval_controller_test.rb new file mode 100644 index 00000000..8eaaa54e --- /dev/null +++ b/test/braintrust/contrib/rails/server/eval_controller_test.rb @@ -0,0 +1,172 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../rails_server_helper" +require "json" + +module Braintrust + module Contrib + module Rails + module Server + class EvalControllerTest < Minitest::Test + include Braintrust::Contrib::Rails::ServerHelper + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + @evaluators = {} + @rig = setup_otel_test_rig + reset_engine!(evaluators: @evaluators, auth: :none) + end + + def app + rails_engine_app + end + + def test_streams_sse_events_for_inline_data + @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "upcase-eval", + data: { + data: [ + {input: "hello", expected: "HELLO"}, + {input: "world", expected: "WORLD"} + ] + }, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + assert_equal 200, last_response.status + assert_match "text/event-stream", last_response.content_type + + events = parse_sse_events(last_response.body) + progress_events = events.select { |e| e[:event] == "progress" } + assert_equal 4, progress_events.length # 2 per case + + summary_events = events.select { |e| e[:event] == "summary" } + assert_equal 1, summary_events.length + + assert_equal "done", events.last[:event] + end + + def test_progress_events_contain_output + @evaluators["upcase-eval"] = test_evaluator(task: ->(input) { input.to_s.upcase }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "upcase-eval", + data: {data: [{input: "hello", expected: "HELLO"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + progress = events.find { |e| e[:event] == "progress" } + data = JSON.parse(progress[:data]) + + assert_equal "HELLO", JSON.parse(data["data"]) + end + + def test_summary_event_contains_scores + scorer = Braintrust::Eval.scorer("exact") { |_i, e, o| (o == e) ? 1.0 : 0.0 } + @evaluators["scored-eval"] = test_evaluator( + task: ->(input) { input.to_s.upcase }, + scorers: [scorer] + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "scored-eval", + data: {data: [{input: "hello", expected: "HELLO"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + summary = events.find { |e| e[:event] == "summary" } + data = JSON.parse(summary[:data]) + + assert data.key?("scores") + assert data.key?("experiment_name") + end + + def test_error_still_emits_progress_and_done + @evaluators["failing-eval"] = test_evaluator(task: ->(_input) { raise "task exploded" }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", { + name: "failing-eval", + data: {data: [{input: "hello"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + events = parse_sse_events(last_response.body) + assert events.any? { |e| e[:event] == "progress" || e[:event] == "error" } + assert_equal "done", events.last[:event] + end + + def test_404_for_unknown_evaluator + post_json "/eval", { + name: "nonexistent", + data: {data: [{input: "hello"}]}, + experiment_name: "test-experiment", + project_id: "proj-123" + } + + assert_equal 404, last_response.status + body = JSON.parse(last_response.body) + assert_match(/not found/i, body["error"]) + end + + def test_400_for_missing_name + post_json "/eval", { + data: {data: [{input: "hello"}]} + } + + assert_equal 400, last_response.status + end + + def test_400_for_missing_data + @evaluators["test-eval"] = test_evaluator(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + post_json "/eval", {name: "test-eval"} + + assert_equal 400, last_response.status + end + + def test_400_for_invalid_json_body + post "/eval", "not-json", {"CONTENT_TYPE" => "application/json"} + + assert_equal 400, last_response.status + end + + def test_returns_401_when_auth_fails + reset_engine!(evaluators: @evaluators, auth: :clerk_token) + + post_json "/eval", { + name: "test-eval", + data: {data: [{input: "hello"}]} + } + + assert_equal 401, last_response.status + end + + private + + def test_evaluator(**kwargs) + Test::Support::EvalHelper::TestEvaluator.new(tracer_provider: @rig.tracer_provider, **kwargs) + end + + def post_json(path, body) + post path, JSON.generate(body), {"CONTENT_TYPE" => "application/json"} + end + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/server/generator_test.rb b/test/braintrust/contrib/rails/server/generator_test.rb new file mode 100644 index 00000000..bedd2598 --- /dev/null +++ b/test/braintrust/contrib/rails/server/generator_test.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../rails_server_helper" + +if RAILS_SERVER_AVAILABLE + require "rails/generators/test_case" + require "braintrust/contrib/rails/server/generator" + + module Braintrust + module Contrib + module Rails + module Server + class GeneratorTest < ::Rails::Generators::TestCase + tests ::Braintrust::Contrib::Rails::Server::Generators::ServerGenerator + destination File.expand_path("../../../../tmp/server_generator", __dir__) + setup :prepare_destination + + def test_generates_initializer_from_app_evaluators + FileUtils.mkdir_p(File.join(destination_root, "app/evaluators")) + File.write( + File.join(destination_root, "app/evaluators/food_classifier.rb"), + <<~RUBY + class FoodClassifier < Braintrust::Eval::Evaluator + end + RUBY + ) + + run_generator + + assert_file "config/initializers/braintrust_server.rb" do |contents| + assert_includes contents, "require \"braintrust/contrib/rails/server\"" + assert_includes contents, "Braintrust::Contrib::Rails::Server::Engine.configure" + assert_includes contents, "\"food-classifier\" => FoodClassifier.new" + end + end + end + end + end + end + end +else + module Braintrust + module Contrib + module Rails + module Server + class GeneratorTest < Minitest::Test + def test_skips_without_rails + skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" + end + end + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/server/health_controller_test.rb b/test/braintrust/contrib/rails/server/health_controller_test.rb new file mode 100644 index 00000000..192bbfc5 --- /dev/null +++ b/test/braintrust/contrib/rails/server/health_controller_test.rb @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../rails_server_helper" +require "json" + +module Braintrust + module Contrib + module Rails + module Server + class HealthControllerTest < Minitest::Test + include Braintrust::Contrib::Rails::ServerHelper + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + reset_engine!(auth: :none) + end + + def app + rails_engine_app + end + + def test_get_root_returns_200 + get "/" + assert_equal 200, last_response.status + end + + def test_get_root_returns_json_content_type + get "/" + assert_match "application/json", last_response.content_type + end + + def test_get_root_returns_status_ok + get "/" + body = JSON.parse(last_response.body) + assert_equal "ok", body["status"] + end + end + end + end + end +end diff --git a/test/braintrust/contrib/rails/server/list_controller_test.rb b/test/braintrust/contrib/rails/server/list_controller_test.rb new file mode 100644 index 00000000..15f3fa0f --- /dev/null +++ b/test/braintrust/contrib/rails/server/list_controller_test.rb @@ -0,0 +1,103 @@ +# frozen_string_literal: true + +require "test_helper" +require_relative "../rails_server_helper" +require "json" + +module Braintrust + module Contrib + module Rails + module Server + class ListControllerTest < Minitest::Test + include Braintrust::Contrib::Rails::ServerHelper + include ::Rack::Test::Methods if defined?(::Rack::Test::Methods) + + def setup + skip_unless_rails_server! + @evaluators = {} + reset_engine!(evaluators: @evaluators, auth: :none) + end + + def app + rails_engine_app + end + + def test_get_list_returns_200 + get "/list" + assert_equal 200, last_response.status + end + + def test_post_list_returns_200 + post "/list" + assert_equal 200, last_response.status + end + + def test_returns_empty_hash_when_no_evaluators + get "/list" + body = JSON.parse(last_response.body) + assert_equal({}, body) + end + + def test_returns_evaluators_keyed_by_name + @evaluators["food-classifier"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + @evaluators["text-summarizer"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + assert body.key?("food-classifier") + assert body.key?("text-summarizer") + end + + def test_includes_scorer_names + @evaluators["scored-eval"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + scorers: [ + Braintrust::Eval.scorer("exact_match") { |_i, e, o| (o == e) ? 1.0 : 0.0 }, + Braintrust::Eval.scorer("length_check") { |_i, _e, _o| 1.0 } + ] + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + score_names = body["scored-eval"]["scores"].map { |s| s["name"] } + assert_equal ["exact_match", "length_check"], score_names + end + + def test_includes_parameters_in_static_container + @evaluators["param-eval"] = Braintrust::Eval::Evaluator.new( + task: ->(input) { input }, + parameters: {"temperature" => {type: "number", default: 0.7, description: "LLM temperature"}} + ) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + params = body["param-eval"]["parameters"] + assert_equal "braintrust.staticParameters", params["type"] + assert_equal 0.7, params["schema"]["temperature"]["default"] + end + + def test_omits_parameters_when_none_defined + @evaluators["no-params"] = Braintrust::Eval::Evaluator.new(task: ->(input) { input }) + reset_engine!(evaluators: @evaluators, auth: :none) + + get "/list" + body = JSON.parse(last_response.body) + refute body["no-params"].key?("parameters") + end + + def test_returns_401_when_auth_fails + # Use clerk_token auth — no auth header means failure + reset_engine!(evaluators: @evaluators, auth: :clerk_token) + + # WebMock blocks real HTTP, so clerk token validation will fail + get "/list" + assert_equal 401, last_response.status + end + end + end + end + end +end diff --git a/test/support/rails_server_helper.rb b/test/support/rails_server_helper.rb deleted file mode 100644 index 878f3783..00000000 --- a/test/support/rails_server_helper.rb +++ /dev/null @@ -1,63 +0,0 @@ -# Try to load Rails engine dependencies. -RAILS_SERVER_AVAILABLE = begin - require "rack/test" - require "action_controller" - require "action_dispatch" - require "rails" - require "braintrust/server/rails" - true -rescue LoadError - false -end - -if RAILS_SERVER_AVAILABLE - # Create a minimal Rails application for engine integration tests. - # Guard against being required multiple times. - unless defined?(BraintrustRailsTestApp) - class BraintrustRailsTestApp < Rails::Application - config.eager_load = false - config.secret_key_base = "braintrust-rails-test-secret-key-abc123456789" - config.logger = ::Logger.new(nil) - config.log_level = :fatal - - # Allow any host in tests (Rack::Test uses "example.org" by default) - config.hosts.clear - - routes.draw do - mount Braintrust::Contrib::Rails::Engine, at: "/" - end - - initialize! - end - end -end - -module Test - module Support - module RailsServerHelper - def skip_unless_rails_server! - skip "Rails not available (run with: bundle exec appraisal rails-server rake test)" unless RAILS_SERVER_AVAILABLE - end - - # The engine itself as a Rack app — use for controller integration tests. - # Faster and more direct than routing through a full Rails application. - def rails_engine_app - Braintrust::Contrib::Rails::Engine - end - - # The full test Rails application (mounts the engine at /). - # Use only when you need to verify middleware stack or mounted routing. - def rails_app - BraintrustRailsTestApp - end - - def reset_engine!(evaluators: {}, auth: :none) - engine = Braintrust::Contrib::Rails::Engine - engine.config.evaluators = evaluators - engine.config.auth = auth - # Clear the long-lived eval service so cached state does not leak across tests. - engine.instance_variable_set(:@eval_service, nil) - end - end - end -end diff --git a/test/test_helper.rb b/test/test_helper.rb index 189225bb..287e465d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -77,7 +77,6 @@ require_relative "support/provider_helper" require_relative "support/eval_helper" require_relative "support/server_helper" -require_relative "support/rails_server_helper" require_relative "support/tracing_helper" # Include helper in all test cases @@ -90,7 +89,6 @@ class Minitest::Test include ::Test::Support::MockHelper include ::Test::Support::ProviderHelper include ::Test::Support::ServerHelper - include ::Test::Support::RailsServerHelper include ::Test::Support::TracingHelper # Use Minitest hooks to clear global state after every test