From 9473d099256ec6fd8ba0d6a48f49e13b31b24052 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:56 +0100 Subject: [PATCH 01/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/quickstart/docker-compose.yml | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 examples/quickstart/docker-compose.yml diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml new file mode 100644 index 0000000..0b5affa --- /dev/null +++ b/examples/quickstart/docker-compose.yml @@ -0,0 +1,48 @@ +# Fairvisor Edge — Quickstart stack (standalone mode) +# +# Usage: +# docker compose up -d +# curl -s http://localhost:8080/readyz # health check +# curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ +# -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-upstream-key" \ +# -H "Content-Type: application/json" \ +# -d @fixtures/normal_request.json # expect 200 +# curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ +# -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-upstream-key" \ +# -H "Content-Type: application/json" \ +# -d @fixtures/over_limit_request.json # expect 429 +# +# This file is also the base for the e2e-smoke CI check. +# CI extends it via tests/e2e/docker-compose.test.yml; do not diverge the +# service name, port, or volume contract without updating CI as well. + +services: + edge: + image: ghcr.io/fairvisor/fairvisor-edge:latest + ports: + - "8080:8080" + environment: + FAIRVISOR_CONFIG_FILE: /etc/fairvisor/policy.json + FAIRVISOR_MODE: wrapper + FAIRVISOR_SHARED_DICT_SIZE: 32m + FAIRVISOR_LOG_LEVEL: info + FAIRVISOR_WORKER_PROCESSES: "1" + volumes: + - ./policy.json:/etc/fairvisor/policy.json:ro + healthcheck: + test: ["CMD", "curl", "-sf", "http://127.0.0.1:8080/readyz"] + interval: 2s + timeout: 2s + retries: 15 + start_period: 5s + + mock_llm: + image: nginx:1.27-alpine + volumes: + - ./mock-llm.conf:/etc/nginx/nginx.conf:ro + healthcheck: + test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:80/"] + interval: 2s + timeout: 2s + retries: 10 + start_period: 5s From c608efce8c579c8841286365014e5c58eecb81d4 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:57 +0100 Subject: [PATCH 02/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/quickstart/policy.json | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 examples/quickstart/policy.json diff --git a/examples/quickstart/policy.json b/examples/quickstart/policy.json new file mode 100644 index 0000000..f5520aa --- /dev/null +++ b/examples/quickstart/policy.json @@ -0,0 +1,31 @@ +{ + "bundle_version": 1, + "issued_at": "2026-01-01T00:00:00Z", + "expires_at": "2030-01-01T00:00:00Z", + "policies": [ + { + "id": "quickstart-tpm-policy", + "spec": { + "selector": { + "pathPrefix": "/openai/", + "methods": ["POST"] + }, + "mode": "enforce", + "rules": [ + { + "name": "tpm-limit", + "limit_keys": ["jwt:sub"], + "algorithm": "token_bucket_llm", + "algorithm_config": { + "tokens_per_minute": 100, + "tokens_per_day": 1000, + "burst_tokens": 100, + "default_max_completion_tokens": 50 + } + } + ] + } + } + ], + "kill_switches": [] +} From 046ac3ba7fbd1f53e8f412aa63aed12241901c67 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:57 +0100 Subject: [PATCH 03/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/quickstart/mock-llm.conf | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 examples/quickstart/mock-llm.conf diff --git a/examples/quickstart/mock-llm.conf b/examples/quickstart/mock-llm.conf new file mode 100644 index 0000000..26603ab --- /dev/null +++ b/examples/quickstart/mock-llm.conf @@ -0,0 +1,10 @@ +events {} +http { + server { + listen 80; + location / { + default_type application/json; + return 200 '{"id":"chatcmpl-qs","object":"chat.completion","choices":[{"index":0,"message":{"role":"assistant","content":"Hello from the mock backend!"},"finish_reason":"stop"}],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}'; + } + } +} From 7b4cbbedad973de8891c6ecfb1628c471c728f26 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:58 +0100 Subject: [PATCH 04/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/quickstart/README.md | 97 +++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 examples/quickstart/README.md diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md new file mode 100644 index 0000000..a23fc78 --- /dev/null +++ b/examples/quickstart/README.md @@ -0,0 +1,97 @@ +# Fairvisor Edge — Quickstart + +Go from `git clone` to working policy enforcement in one step. + +## Prerequisites + +- Docker with Compose V2 (`docker compose version`) +- Port 8080 free on localhost + +## Start + +```bash +docker compose up -d +``` + +Wait for the edge service to report healthy: + +```bash +docker compose ps +# edge should show "healthy" +``` + +## Verify enforcement + +**Allowed request** — should return `200`: + +```bash +curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ + -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-key" \ + -H "Content-Type: application/json" \ + -d @../../fixtures/normal_request.json +``` + +Expected response matches `../../fixtures/allow_response.json`. + +**Over-limit request** — should return `429`: + +```bash +curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ + -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-key" \ + -H "Content-Type: application/json" \ + -d @../../fixtures/over_limit_request.json +``` + +Expected response body matches `../../fixtures/reject_tpm_exceeded.json`. +The response will also include: +- `X-Fairvisor-Reason: tpm_exceeded` +- `Retry-After: 60` +- `RateLimit-Limit: 100` +- `RateLimit-Remaining: 0` + +## Wrapper mode and auth + +This quickstart runs in `FAIRVISOR_MODE=wrapper`. The composite Bearer token format is: + +``` +Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY +``` + +- `CLIENT_JWT` — a signed JWT identifying the calling client/tenant (used for policy enforcement) +- `UPSTREAM_KEY` — the real upstream API key forwarded to the provider (e.g. `sk-...` for OpenAI) + +Fairvisor strips the composite header and injects the correct provider auth before forwarding. The upstream key is **never returned to the caller** — see `../../fixtures/allow_response.json` for proof (no `Authorization`, `x-api-key`, or `x-goog-api-key` headers in the response). + +## Provider-prefixed paths + +Wrapper mode routes by path prefix: + +| Path prefix | Upstream | Auth header | +|---|---|---| +| `/openai/v1/...` | `https://api.openai.com/v1/...` | `Authorization: Bearer UPSTREAM_KEY` | +| `/anthropic/v1/...` | `https://api.anthropic.com/v1/...` | `x-api-key: UPSTREAM_KEY` | +| `/gemini/v1beta/...` | `https://generativelanguage.googleapis.com/v1beta/...` | `x-goog-api-key: UPSTREAM_KEY` | + +## Anthropic example + +```bash +curl -s -X POST http://localhost:8080/anthropic/v1/messages \ + -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-ant-fake-key" \ + -H "Content-Type: application/json" \ + -d @../../fixtures/anthropic_normal_request.json +``` + +A rejected Anthropic request returns an Anthropic-native error body — see `../../fixtures/reject_anthropic.json`. + +## Teardown + +```bash +docker compose down +``` + +## Next steps + +- See `../recipes/` for team budgets, runaway agent guard, and provider failover scenarios +- See `../../fixtures/` for all sample request/response artifacts +- See [fairvisor/benchmark](https://github.com/fairvisor/benchmark) for performance benchmarks +- See [docs/install/](../../docs/install/) for Kubernetes, VM, and SaaS deployment options From a0551ffdbec47ebe82da9db01ae00edde616cdc8 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:59 +0100 Subject: [PATCH 05/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/recipes/team-budgets/policy.json | 47 +++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 examples/recipes/team-budgets/policy.json diff --git a/examples/recipes/team-budgets/policy.json b/examples/recipes/team-budgets/policy.json new file mode 100644 index 0000000..d361a30 --- /dev/null +++ b/examples/recipes/team-budgets/policy.json @@ -0,0 +1,47 @@ +{ + "bundle_version": 1, + "issued_at": "2026-01-01T00:00:00Z", + "expires_at": "2030-01-01T00:00:00Z", + "policies": [ + { + "id": "team-token-budget", + "spec": { + "selector": { + "pathPrefix": "/openai/", + "methods": ["POST"] + }, + "mode": "enforce", + "rules": [ + { + "name": "per-team-tpm", + "limit_keys": ["jwt:team_id"], + "algorithm": "token_bucket_llm", + "algorithm_config": { + "tokens_per_minute": 120000, + "tokens_per_day": 2000000, + "burst_tokens": 120000, + "default_max_completion_tokens": 1024 + } + }, + { + "name": "per-team-cost-budget", + "limit_keys": ["jwt:team_id"], + "algorithm": "cost_based", + "algorithm_config": { + "budget": 50000, + "period": "30d", + "cost_key": "fixed", + "fixed_cost": 1, + "staged_actions": [ + { "threshold_percent": 80, "action": "warn" }, + { "threshold_percent": 95, "action": "throttle", "delay_ms": 500 }, + { "threshold_percent": 100, "action": "reject" } + ] + } + } + ] + } + } + ], + "kill_switches": [] +} From a36312e84f5ad4b82d6ee71a96ce5dc88ff8efba Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:47:59 +0100 Subject: [PATCH 06/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/recipes/team-budgets/README.md | 45 +++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 examples/recipes/team-budgets/README.md diff --git a/examples/recipes/team-budgets/README.md b/examples/recipes/team-budgets/README.md new file mode 100644 index 0000000..54c1551 --- /dev/null +++ b/examples/recipes/team-budgets/README.md @@ -0,0 +1,45 @@ +# Recipe: Team Budgets + +Enforce per-team token and cost limits using JWT claims. + +## How it works + +Each request carries a JWT with a `team_id` claim. Fairvisor uses this as +the bucket key for two independent rules: + +1. **TPM/TPD limit** — token-rate enforcement per minute and per day +2. **Monthly cost budget** — cumulative cost cap with staged warn/throttle/reject + +## Deploy + +```bash +# Copy policy to your edge config path +cp policy.json /etc/fairvisor/policy.json + +# Or use with docker compose (standalone mode): +FAIRVISOR_CONFIG_FILE=./policy.json FAIRVISOR_MODE=wrapper docker compose up -d +``` + +## JWT shape expected + +```json +{ + "sub": "user-123", + "team_id": "engineering", + "plan": "pro", + "exp": 9999999999 +} +``` + +## Staged actions at cost budget thresholds + +| Threshold | Action | +|---|---| +| 80% | Warn (allow, log, emit business event) | +| 95% | Throttle (allow with 500 ms delay) | +| 100% | Reject (429, `budget_exceeded`) | + +## Related fixtures + +- `../../../fixtures/reject_tpd_exceeded.json` — TPD reject body +- `../../../fixtures/reject_tpm_exceeded.json` — TPM reject body From d70e2c4b5de167beae91c056dc5185017caed207 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:00 +0100 Subject: [PATCH 07/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- .../recipes/runaway-agent-guard/policy.json | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 examples/recipes/runaway-agent-guard/policy.json diff --git a/examples/recipes/runaway-agent-guard/policy.json b/examples/recipes/runaway-agent-guard/policy.json new file mode 100644 index 0000000..2715b60 --- /dev/null +++ b/examples/recipes/runaway-agent-guard/policy.json @@ -0,0 +1,40 @@ +{ + "bundle_version": 1, + "issued_at": "2026-01-01T00:00:00Z", + "expires_at": "2030-01-01T00:00:00Z", + "policies": [ + { + "id": "runaway-agent-guard", + "spec": { + "selector": { + "pathPrefix": "/", + "methods": ["POST"] + }, + "mode": "enforce", + "rules": [ + { + "name": "loop-detection", + "limit_keys": ["jwt:agent_id"], + "algorithm": "loop_detector", + "algorithm_config": { + "window_seconds": 60, + "max_requests": 30, + "cooldown_seconds": 120 + } + }, + { + "name": "agent-tpm-guard", + "limit_keys": ["jwt:agent_id"], + "algorithm": "token_bucket_llm", + "algorithm_config": { + "tokens_per_minute": 50000, + "burst_tokens": 50000, + "default_max_completion_tokens": 512 + } + } + ] + } + } + ], + "kill_switches": [] +} From 489a28ab04e9de94ff4e46695c6e7328d2265623 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:01 +0100 Subject: [PATCH 08/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- .../recipes/runaway-agent-guard/README.md | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/recipes/runaway-agent-guard/README.md diff --git a/examples/recipes/runaway-agent-guard/README.md b/examples/recipes/runaway-agent-guard/README.md new file mode 100644 index 0000000..7b34491 --- /dev/null +++ b/examples/recipes/runaway-agent-guard/README.md @@ -0,0 +1,50 @@ +# Recipe: Runaway Agent Guard + +Stop runaway agentic workflows before they exhaust your token budget or +billing limit. + +## Problem + +Autonomous agents (LangChain, AutoGPT, custom loops) can enter retry storms +or infinite planning loops. Without enforcement, a single runaway agent +can consume thousands of dollars of API budget in minutes. + +## How it works + +Two rules cooperate: + +1. **Loop detector** — counts requests per `agent_id` in a sliding window. + If the agent fires more than 30 requests in 60 seconds, it trips a + 120-second cooldown. This catches tight retry loops. + +2. **TPM guard** — caps tokens per minute per agent. A burst-heavy agent + that passes the loop check still cannot drain the token pool. + +## Deploy + +```bash +cp policy.json /etc/fairvisor/policy.json +``` + +## JWT shape expected + +```json +{ + "sub": "user-456", + "agent_id": "autoagent-prod-7", + "exp": 9999999999 +} +``` + +## Kill switch for incidents + +If an agent causes an incident, flip a kill switch without restarting edge: + +```bash +# Via CLI +fairvisor kill-switch enable agent-id=autoagent-prod-7 + +# Or update the policy bundle with a kill_switch entry and hot-reload +``` + +See `docs/cookbook/kill-switch-incident-response.md` for the full incident playbook. From 53a70356f89fc37b3f29d487f2200a9957d2c770 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:02 +0100 Subject: [PATCH 09/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- .../recipes/provider-failover/policy.json | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 examples/recipes/provider-failover/policy.json diff --git a/examples/recipes/provider-failover/policy.json b/examples/recipes/provider-failover/policy.json new file mode 100644 index 0000000..5974c77 --- /dev/null +++ b/examples/recipes/provider-failover/policy.json @@ -0,0 +1,62 @@ +{ + "bundle_version": 1, + "issued_at": "2026-01-01T00:00:00Z", + "expires_at": "2030-01-01T00:00:00Z", + "policies": [ + { + "id": "provider-failover-primary", + "spec": { + "selector": { + "pathPrefix": "/openai/", + "methods": ["POST"] + }, + "mode": "enforce", + "rules": [ + { + "name": "openai-tpm", + "limit_keys": ["jwt:org_id"], + "algorithm": "token_bucket_llm", + "algorithm_config": { + "tokens_per_minute": 200000, + "burst_tokens": 200000, + "default_max_completion_tokens": 2048 + } + }, + { + "name": "openai-circuit-breaker", + "limit_keys": ["jwt:org_id"], + "algorithm": "circuit_breaker", + "algorithm_config": { + "spend_window_seconds": 300, + "spend_threshold": 100000, + "cooldown_seconds": 600 + } + } + ] + } + }, + { + "id": "provider-failover-fallback", + "spec": { + "selector": { + "pathPrefix": "/anthropic/", + "methods": ["POST"] + }, + "mode": "enforce", + "rules": [ + { + "name": "anthropic-tpm", + "limit_keys": ["jwt:org_id"], + "algorithm": "token_bucket_llm", + "algorithm_config": { + "tokens_per_minute": 100000, + "burst_tokens": 100000, + "default_max_completion_tokens": 2048 + } + } + ] + } + } + ], + "kill_switches": [] +} From 3b078d09d99083e1c5914e4b7615c69ee6e7f495 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:03 +0100 Subject: [PATCH 10/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- examples/recipes/provider-failover/README.md | 52 ++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 examples/recipes/provider-failover/README.md diff --git a/examples/recipes/provider-failover/README.md b/examples/recipes/provider-failover/README.md new file mode 100644 index 0000000..520226c --- /dev/null +++ b/examples/recipes/provider-failover/README.md @@ -0,0 +1,52 @@ +# Recipe: Provider Failover / Edge Control + +Run two provider paths under independent policy budgets. When the primary +provider (OpenAI) trips a circuit breaker, your client-side router can +switch to the fallback (Anthropic) — both paths enforced by the same edge. + +## How it works + +- `/openai/v1/...` — enforced by an OpenAI TPM limit + a spend-based circuit breaker +- `/anthropic/v1/...` — enforced by an Anthropic TPM limit + +The circuit breaker on the OpenAI path auto-trips when cumulative spend +exceeds the threshold in a 5-minute window, then auto-resets after 10 minutes. +Your application can detect the 429 with `X-Fairvisor-Reason: circuit_breaker_open` +and switch to the Anthropic path without any Fairvisor configuration change. + +## Deploy + +```bash +cp policy.json /etc/fairvisor/policy.json +``` + +## Client-side failover pattern + +```python +import httpx + +EDGE = "http://localhost:8080" +AUTH = "Bearer my-client-jwt.payload.sig:sk-my-upstream-key" + +def chat(messages, provider="openai"): + resp = httpx.post( + f"{EDGE}/{provider}/v1/chat/completions", + headers={"Authorization": AUTH, "Content-Type": "application/json"}, + json={"model": "gpt-4o", "messages": messages}, + ) + if resp.status_code == 429: + reason = resp.headers.get("X-Fairvisor-Reason", "") + if reason == "circuit_breaker_open" and provider == "openai": + return chat(messages, provider="anthropic") + resp.raise_for_status() + return resp.json() +``` + +## Auth note + +The composite `CLIENT_JWT:UPSTREAM_KEY` format is the same for all providers. +Fairvisor injects the correct provider-native auth header: +- OpenAI: `Authorization: Bearer UPSTREAM_KEY` +- Anthropic: `x-api-key: UPSTREAM_KEY` + +The upstream key is stripped from responses — it never reaches your client. From 4b4d24922be8e1406f38f94d1f4172dfc639c000 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:04 +0100 Subject: [PATCH 11/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_tpm_exceeded.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 fixtures/reject_tpm_exceeded.json diff --git a/fixtures/reject_tpm_exceeded.json b/fixtures/reject_tpm_exceeded.json new file mode 100644 index 0000000..26f45d0 --- /dev/null +++ b/fixtures/reject_tpm_exceeded.json @@ -0,0 +1,17 @@ +{ + "_comment": "429 body returned when the per-minute token budget is exhausted.", + "_headers": { + "X-Fairvisor-Reason": "tpm_exceeded", + "Retry-After": "60", + "RateLimit-Limit": "120000", + "RateLimit-Remaining": "0", + "RateLimit-Reset": "", + "Content-Type": "application/json" + }, + "error": { + "type": "rate_limit_error", + "code": "tpm_exceeded", + "message": "Token budget exceeded for this tenant.", + "param": null + } +} From 70ed186f9604d7ccc6cabb6e8d42ecbb5ae2e7d8 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:05 +0100 Subject: [PATCH 12/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_tpd_exceeded.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 fixtures/reject_tpd_exceeded.json diff --git a/fixtures/reject_tpd_exceeded.json b/fixtures/reject_tpd_exceeded.json new file mode 100644 index 0000000..8d2bcdb --- /dev/null +++ b/fixtures/reject_tpd_exceeded.json @@ -0,0 +1,16 @@ +{ + "_comment": "429 body returned when the per-day token budget is exhausted.", + "_headers": { + "X-Fairvisor-Reason": "tpd_exceeded", + "Retry-After": "86400", + "RateLimit-Limit": "2000000", + "RateLimit-Remaining": "0", + "Content-Type": "application/json" + }, + "error": { + "type": "rate_limit_error", + "code": "tpd_exceeded", + "message": "Token budget exceeded for this tenant.", + "param": null + } +} From e03dfcc0ada593af89c2306b7eaa2777f238d3f1 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:06 +0100 Subject: [PATCH 13/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_prompt_too_large.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 fixtures/reject_prompt_too_large.json diff --git a/fixtures/reject_prompt_too_large.json b/fixtures/reject_prompt_too_large.json new file mode 100644 index 0000000..9c4cf8c --- /dev/null +++ b/fixtures/reject_prompt_too_large.json @@ -0,0 +1,13 @@ +{ + "_comment": "429 body returned when the request exceeds max_prompt_tokens.", + "_headers": { + "X-Fairvisor-Reason": "prompt_too_large", + "Content-Type": "application/json" + }, + "error": { + "type": "rate_limit_error", + "code": "prompt_too_large", + "message": "Request prompt exceeds the maximum allowed token count for this policy.", + "param": null + } +} From b538176d31b3e001fa17a4e7e17c1d50dac88d4e Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:07 +0100 Subject: [PATCH 14/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/allow_response.json | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 fixtures/allow_response.json diff --git a/fixtures/allow_response.json b/fixtures/allow_response.json new file mode 100644 index 0000000..7cc0312 --- /dev/null +++ b/fixtures/allow_response.json @@ -0,0 +1,28 @@ +{ + "_comment": "Sample 200 response for an allowed request in wrapper mode. Note: no Authorization, x-api-key, or x-goog-api-key headers — upstream auth is stripped on the response side.", + "_status": 200, + "_headers": { + "Content-Type": "application/json", + "X-Fairvisor-Reason": null, + "Authorization": null, + "x-api-key": null, + "x-goog-api-key": null + }, + "id": "chatcmpl-example", + "object": "chat.completion", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I help you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 9, + "total_tokens": 19 + } +} From f021fd66e98e14318834043bca602fa630d8cda5 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:08 +0100 Subject: [PATCH 15/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/normal_request.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 fixtures/normal_request.json diff --git a/fixtures/normal_request.json b/fixtures/normal_request.json new file mode 100644 index 0000000..049a4e4 --- /dev/null +++ b/fixtures/normal_request.json @@ -0,0 +1,10 @@ +{ + "model": "gpt-4o-mini", + "messages": [ + { + "role": "user", + "content": "Say hello in one sentence." + } + ], + "max_tokens": 20 +} From 3b54d3ff21dd3f9ff9f2f0bc97b6f17f59ea4156 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:08 +0100 Subject: [PATCH 16/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/over_limit_request.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 fixtures/over_limit_request.json diff --git a/fixtures/over_limit_request.json b/fixtures/over_limit_request.json new file mode 100644 index 0000000..b3b554f --- /dev/null +++ b/fixtures/over_limit_request.json @@ -0,0 +1,10 @@ +{ + "model": "gpt-4o", + "messages": [ + { + "role": "user", + "content": "Say hello in one sentence." + } + ], + "max_tokens": 200000 +} From e1dd56da63c818b8705c312d724905d5b0f37be2 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:09 +0100 Subject: [PATCH 17/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_openai.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 fixtures/reject_openai.json diff --git a/fixtures/reject_openai.json b/fixtures/reject_openai.json new file mode 100644 index 0000000..eabd023 --- /dev/null +++ b/fixtures/reject_openai.json @@ -0,0 +1,14 @@ +{ + "_comment": "OpenAI-native 429 reject body. Used for /openai/* paths and OpenAI-compatible providers.", + "_headers": { + "X-Fairvisor-Reason": "tpm_exceeded", + "Retry-After": "60", + "Content-Type": "application/json" + }, + "error": { + "type": "rate_limit_error", + "code": "tpm_exceeded", + "message": "Token budget exceeded for this tenant.", + "param": null + } +} From 800c4f9cc9a03ad436558254cc791d740d35dbb1 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:10 +0100 Subject: [PATCH 18/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_anthropic.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 fixtures/reject_anthropic.json diff --git a/fixtures/reject_anthropic.json b/fixtures/reject_anthropic.json new file mode 100644 index 0000000..bdf468f --- /dev/null +++ b/fixtures/reject_anthropic.json @@ -0,0 +1,13 @@ +{ + "_comment": "Anthropic-native 429 reject body. Used for /anthropic/* paths.", + "_headers": { + "X-Fairvisor-Reason": "tpm_exceeded", + "Retry-After": "60", + "Content-Type": "application/json" + }, + "type": "error", + "error": { + "type": "rate_limit_error", + "message": "Token budget exceeded for this tenant." + } +} From f13e64111623eae652a232c207f61624c0311e2a Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:10 +0100 Subject: [PATCH 19/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/reject_gemini.json | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 fixtures/reject_gemini.json diff --git a/fixtures/reject_gemini.json b/fixtures/reject_gemini.json new file mode 100644 index 0000000..f0df901 --- /dev/null +++ b/fixtures/reject_gemini.json @@ -0,0 +1,13 @@ +{ + "_comment": "Gemini-native 429 reject body. Used for /gemini/* paths.", + "_headers": { + "X-Fairvisor-Reason": "tpm_exceeded", + "Retry-After": "60", + "Content-Type": "application/json" + }, + "error": { + "code": 429, + "message": "Token budget exceeded for this tenant.", + "status": "RESOURCE_EXHAUSTED" + } +} From fbcf12d5b906454aa1d8c570e957acd5621be676 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:48:11 +0100 Subject: [PATCH 20/64] feat(quickstart): add runnable quickstart, recipes, and fixtures (issue #32) --- fixtures/anthropic_normal_request.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 fixtures/anthropic_normal_request.json diff --git a/fixtures/anthropic_normal_request.json b/fixtures/anthropic_normal_request.json new file mode 100644 index 0000000..bcffdbf --- /dev/null +++ b/fixtures/anthropic_normal_request.json @@ -0,0 +1,10 @@ +{ + "model": "claude-3-5-haiku-20241022", + "max_tokens": 20, + "messages": [ + { + "role": "user", + "content": "Say hello in one sentence." + } + ] +} From a93b37735b0b80e8ed392bf111b0aabaaa15c835 Mon Sep 17 00:00:00 2001 From: Lev Date: Tue, 17 Mar 2026 11:49:41 +0100 Subject: [PATCH 21/64] docs(readme): add quickstart pointer, update project layout, fix benchmark link (issue #32) --- README.md | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 07bc768..c267fa7 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,12 @@ Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacem ## Quick start +> **Runnable quickstart:** `examples/quickstart/` — `docker compose up -d` and run your first enforce/reject test in under a minute. See [`examples/quickstart/README.md`](examples/quickstart/README.md). +> +> **Recipes:** `examples/recipes/` — deployable team budgets, runaway agent guard, and provider failover examples. +> +> **Sample artifacts:** `fixtures/` — canonical request/response fixtures for enforce, reject (TPM, TPD, prompt-too-large), and provider-native error bodies (OpenAI, Anthropic, Gemini). + ### 1. Create a policy ```bash @@ -304,7 +310,7 @@ Policies are versioned JSON — commit them to Git, review changes in PRs, roll **No external datastore.** All enforcement state lives in in-process shared memory (`ngx.shared.dict`). No Redis, no Postgres, no network round-trips in the decision path. -> Reproduce: `git clone https://github.com/fairvisor/benchmark && cd benchmark && ./run-all.sh` +> Reproduce: see [fairvisor/benchmark](https://github.com/fairvisor/benchmark) — the canonical benchmark source of truth for Fairvisor Edge performance numbers. ## Deployment @@ -348,14 +354,16 @@ If the SaaS is unreachable, the edge keeps enforcing with the last-known policy ## Project layout ``` -src/fairvisor/ runtime modules (OpenResty/LuaJIT) -cli/ command-line tooling -spec/ unit and integration tests (busted) -tests/e2e/ Docker-based E2E tests (pytest) -examples/ sample policy bundles -helm/ Helm chart -docker/ Docker artifacts -docs/ reference documentation +src/fairvisor/ runtime modules (OpenResty/LuaJIT) +cli/ command-line tooling +spec/ unit and integration tests (busted) +tests/e2e/ Docker-based E2E tests (pytest) +examples/quickstart/ runnable quickstart (docker compose up -d) +examples/recipes/ deployable policy recipes (team budgets, agent guard, failover) +fixtures/ canonical request/response sample artifacts +helm/ Helm chart +docker/ Docker artifacts +docs/ reference documentation ``` ## Contributing @@ -376,3 +384,4 @@ pytest tests/e2e -v # E2E (requires Docker) --- **Docs:** [docs.fairvisor.com](https://docs.fairvisor.com/docs/) · **Website:** [fairvisor.com](https://fairvisor.com) · **Quickstart:** [5 minutes to enforcement](https://docs.fairvisor.com/docs/quickstart/) + From a4ad21cdab0121ae56b760b086a7aa8b20f5aaca Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:50:48 +0100 Subject: [PATCH 22/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quickstart/docker-compose.yml | 28 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 0b5affa..870812d 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -1,16 +1,22 @@ -# Fairvisor Edge — Quickstart stack (standalone mode) +# Fairvisor Edge — Quickstart stack (standalone + reverse proxy mode) # # Usage: # docker compose up -d -# curl -s http://localhost:8080/readyz # health check -# curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ -# -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-upstream-key" \ +# curl -s http://localhost:8080/readyz # health check +# curl -s -X POST http://localhost:8080/v1/chat/completions \ # -H "Content-Type: application/json" \ -# -d @fixtures/normal_request.json # expect 200 -# curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ -# -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-upstream-key" \ +# -d @../../fixtures/normal_request.json # expect 200 +# curl -s -X POST http://localhost:8080/v1/chat/completions \ # -H "Content-Type: application/json" \ -# -d @fixtures/over_limit_request.json # expect 429 +# -d @../../fixtures/over_limit_request.json # expect 429 +# +# This stack runs in FAIRVISOR_MODE=reverse_proxy — requests to /v1/* are +# enforced by policy then forwarded to the local mock LLM backend. +# No real API keys required. +# +# Wrapper mode (routing by provider prefix, real upstream keys) is documented +# in README.md under "Wrapper mode". It requires real provider credentials and +# cannot be demonstrated with this mock stack. # # This file is also the base for the e2e-smoke CI check. # CI extends it via tests/e2e/docker-compose.test.yml; do not diverge the @@ -23,12 +29,16 @@ services: - "8080:8080" environment: FAIRVISOR_CONFIG_FILE: /etc/fairvisor/policy.json - FAIRVISOR_MODE: wrapper + FAIRVISOR_MODE: reverse_proxy + FAIRVISOR_BACKEND_URL: http://mock_llm:80 FAIRVISOR_SHARED_DICT_SIZE: 32m FAIRVISOR_LOG_LEVEL: info FAIRVISOR_WORKER_PROCESSES: "1" volumes: - ./policy.json:/etc/fairvisor/policy.json:ro + depends_on: + mock_llm: + condition: service_healthy healthcheck: test: ["CMD", "curl", "-sf", "http://127.0.0.1:8080/readyz"] interval: 2s From 411c6c7feb65f135358e7963cb03cbe5b8d6de81 Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:50:54 +0100 Subject: [PATCH 23/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quickstart/policy.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/quickstart/policy.json b/examples/quickstart/policy.json index f5520aa..fb9b375 100644 --- a/examples/quickstart/policy.json +++ b/examples/quickstart/policy.json @@ -7,20 +7,20 @@ "id": "quickstart-tpm-policy", "spec": { "selector": { - "pathPrefix": "/openai/", + "pathPrefix": "/v1/", "methods": ["POST"] }, "mode": "enforce", "rules": [ { "name": "tpm-limit", - "limit_keys": ["jwt:sub"], + "limit_keys": ["ip:address"], "algorithm": "token_bucket_llm", "algorithm_config": { "tokens_per_minute": 100, "tokens_per_day": 1000, "burst_tokens": 100, - "default_max_completion_tokens": 50 + "default_max_completion": 50 } } ] From 288c9c75acdbe252aea4ff994b11cd12b0163e35 Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:50:59 +0100 Subject: [PATCH 24/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/quickstart/README.md | 63 ++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md index a23fc78..3acb943 100644 --- a/examples/quickstart/README.md +++ b/examples/quickstart/README.md @@ -22,66 +22,77 @@ docker compose ps ## Verify enforcement +This quickstart runs in `FAIRVISOR_MODE=reverse_proxy`. Requests to `/v1/*` +are enforced by the TPM policy and forwarded to a local mock LLM backend. +No real API keys are required. + **Allowed request** — should return `200`: ```bash -curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ - -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-key" \ +curl -s -X POST http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -d @../../fixtures/normal_request.json ``` -Expected response matches `../../fixtures/allow_response.json`. +Expected response body shape matches `../../fixtures/allow_response.json`. **Over-limit request** — should return `429`: ```bash -curl -s -X POST http://localhost:8080/openai/v1/chat/completions \ - -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-fake-key" \ +curl -s -X POST http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \ -d @../../fixtures/over_limit_request.json ``` -Expected response body matches `../../fixtures/reject_tpm_exceeded.json`. +Expected response body shape: `../../fixtures/reject_tpm_exceeded.json`. The response will also include: - `X-Fairvisor-Reason: tpm_exceeded` - `Retry-After: 60` -- `RateLimit-Limit: 100` +- `RateLimit-Limit: 100` (matches the quickstart policy `tokens_per_minute`) - `RateLimit-Remaining: 0` -## Wrapper mode and auth +## How the policy works + +The quickstart policy (`policy.json`) enforces a TPM limit keyed on `ip:address`: + +- `tokens_per_minute: 100` — allows roughly 2 small requests per minute +- `tokens_per_day: 1000` — daily cap +- `default_max_completion: 50` — pessimistic reservation per request when `max_tokens` is not set + +Sending `over_limit_request.json` (which sets `max_tokens: 200000`) immediately +exceeds the 100-token per-minute budget and triggers a `429`. + +## Wrapper mode (real provider routing) + +Wrapper mode routes requests to real upstream providers using provider-prefixed paths +and a composite Bearer token. It requires real provider API keys and cannot be +demonstrated with this mock stack. -This quickstart runs in `FAIRVISOR_MODE=wrapper`. The composite Bearer token format is: +**Path and auth format:** ``` +POST /openai/v1/chat/completions Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY ``` -- `CLIENT_JWT` — a signed JWT identifying the calling client/tenant (used for policy enforcement) -- `UPSTREAM_KEY` — the real upstream API key forwarded to the provider (e.g. `sk-...` for OpenAI) +Where: +- `CLIENT_JWT` — signed JWT identifying the calling client/tenant (used for policy enforcement) +- `UPSTREAM_KEY` — real upstream API key forwarded to the provider (e.g. `sk-...` for OpenAI) -Fairvisor strips the composite header and injects the correct provider auth before forwarding. The upstream key is **never returned to the caller** — see `../../fixtures/allow_response.json` for proof (no `Authorization`, `x-api-key`, or `x-goog-api-key` headers in the response). +Fairvisor strips the composite header, injects the correct provider auth before forwarding, +and **never returns upstream auth headers to the caller** +(see `../../fixtures/allow_response.json`). -## Provider-prefixed paths +**Provider-prefixed paths:** -Wrapper mode routes by path prefix: - -| Path prefix | Upstream | Auth header | +| Path prefix | Upstream | Auth header injected | |---|---|---| | `/openai/v1/...` | `https://api.openai.com/v1/...` | `Authorization: Bearer UPSTREAM_KEY` | | `/anthropic/v1/...` | `https://api.anthropic.com/v1/...` | `x-api-key: UPSTREAM_KEY` | | `/gemini/v1beta/...` | `https://generativelanguage.googleapis.com/v1beta/...` | `x-goog-api-key: UPSTREAM_KEY` | -## Anthropic example - -```bash -curl -s -X POST http://localhost:8080/anthropic/v1/messages \ - -H "Authorization: Bearer demo-client-jwt.demo-payload.demo-sig:sk-ant-fake-key" \ - -H "Content-Type: application/json" \ - -d @../../fixtures/anthropic_normal_request.json -``` - -A rejected Anthropic request returns an Anthropic-native error body — see `../../fixtures/reject_anthropic.json`. +To run in wrapper mode, change the compose env to `FAIRVISOR_MODE: wrapper` and +supply real credentials in the `Authorization` header. ## Teardown From 80365c9b2102493728eb94cd63bfb113f05a7bdb Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:51:06 +0100 Subject: [PATCH 25/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/recipes/team-budgets/policy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/recipes/team-budgets/policy.json b/examples/recipes/team-budgets/policy.json index d361a30..87d7c63 100644 --- a/examples/recipes/team-budgets/policy.json +++ b/examples/recipes/team-budgets/policy.json @@ -20,7 +20,7 @@ "tokens_per_minute": 120000, "tokens_per_day": 2000000, "burst_tokens": 120000, - "default_max_completion_tokens": 1024 + "default_max_completion": 1024 } }, { From e7785995c1f0987b6cf9d368c48b766443bb6f1f Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:51:12 +0100 Subject: [PATCH 26/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/recipes/runaway-agent-guard/policy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/recipes/runaway-agent-guard/policy.json b/examples/recipes/runaway-agent-guard/policy.json index 2715b60..38de248 100644 --- a/examples/recipes/runaway-agent-guard/policy.json +++ b/examples/recipes/runaway-agent-guard/policy.json @@ -29,7 +29,7 @@ "algorithm_config": { "tokens_per_minute": 50000, "burst_tokens": 50000, - "default_max_completion_tokens": 512 + "default_max_completion": 512 } } ] From 399cd93e2a2b3bedc10e0e9d62908b59722e7a47 Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:51:18 +0100 Subject: [PATCH 27/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../recipes/provider-failover/policy.json | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/examples/recipes/provider-failover/policy.json b/examples/recipes/provider-failover/policy.json index 5974c77..03533dd 100644 --- a/examples/recipes/provider-failover/policy.json +++ b/examples/recipes/provider-failover/policy.json @@ -19,20 +19,17 @@ "algorithm_config": { "tokens_per_minute": 200000, "burst_tokens": 200000, - "default_max_completion_tokens": 2048 - } - }, - { - "name": "openai-circuit-breaker", - "limit_keys": ["jwt:org_id"], - "algorithm": "circuit_breaker", - "algorithm_config": { - "spend_window_seconds": 300, - "spend_threshold": 100000, - "cooldown_seconds": 600 + "default_max_completion": 2048 } } - ] + ], + "circuit_breaker": { + "enabled": true, + "spend_rate_threshold_per_minute": 10000, + "action": "reject", + "alert": true, + "auto_reset_after_minutes": 10 + } } }, { @@ -51,7 +48,7 @@ "algorithm_config": { "tokens_per_minute": 100000, "burst_tokens": 100000, - "default_max_completion_tokens": 2048 + "default_max_completion": 2048 } } ] From e327a0cf7a3e6cf00bf02a596f02f3508120573d Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:51:25 +0100 Subject: [PATCH 28/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fixtures/reject_tpm_exceeded.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fixtures/reject_tpm_exceeded.json b/fixtures/reject_tpm_exceeded.json index 26f45d0..0805778 100644 --- a/fixtures/reject_tpm_exceeded.json +++ b/fixtures/reject_tpm_exceeded.json @@ -1,9 +1,9 @@ { - "_comment": "429 body returned when the per-minute token budget is exhausted.", + "_comment": "Illustrative 429 body returned when the per-minute token budget is exhausted. RateLimit-Limit reflects the policy's tokens_per_minute value.", "_headers": { "X-Fairvisor-Reason": "tpm_exceeded", "Retry-After": "60", - "RateLimit-Limit": "120000", + "RateLimit-Limit": "", "RateLimit-Remaining": "0", "RateLimit-Reset": "", "Content-Type": "application/json" From ee2ab171e13536df91aa570085111bb78f4a25da Mon Sep 17 00:00:00 2001 From: Lev Date: Wed, 18 Mar 2026 09:51:31 +0100 Subject: [PATCH 29/64] =?UTF-8?q?fix(quickstart):=20fix=20review=20issues?= =?UTF-8?q?=20=E2=80=94=20reverse=5Fproxy=20mode,=20correct=20config=20sha?= =?UTF-8?q?pes=20(issue=20#32)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fixtures/reject_tpd_exceeded.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fixtures/reject_tpd_exceeded.json b/fixtures/reject_tpd_exceeded.json index 8d2bcdb..83cb2ea 100644 --- a/fixtures/reject_tpd_exceeded.json +++ b/fixtures/reject_tpd_exceeded.json @@ -1,9 +1,9 @@ { - "_comment": "429 body returned when the per-day token budget is exhausted.", + "_comment": "Illustrative 429 body returned when the per-day token budget is exhausted. RateLimit-Limit reflects the policy's tokens_per_day value.", "_headers": { "X-Fairvisor-Reason": "tpd_exceeded", - "Retry-After": "86400", - "RateLimit-Limit": "2000000", + "Retry-After": "", + "RateLimit-Limit": "", "RateLimit-Remaining": "0", "Content-Type": "application/json" }, From 348789e37d4cfdf35d6778ff23e6518fbe753dec Mon Sep 17 00:00:00 2001 From: Claude Agent Date: Wed, 18 Mar 2026 13:58:50 +0000 Subject: [PATCH 30/64] refactor: replace provider-failover recipe with circuit-breaker Failover between providers is a client-side pattern, not a Fairvisor feature. Replace with a focused circuit-breaker recipe that demonstrates the actual cost-spike auto-shutdown capability: - spend_rate_threshold_per_minute triggers full traffic block - auto_reset_after_minutes provides hands-free cooldown - per-org TPM limit continues to run in parallel --- examples/recipes/circuit-breaker/README.md | 43 +++++++++++++++ .../policy.json | 34 +++--------- examples/recipes/provider-failover/README.md | 52 ------------------- 3 files changed, 49 insertions(+), 80 deletions(-) create mode 100644 examples/recipes/circuit-breaker/README.md rename examples/recipes/{provider-failover => circuit-breaker}/policy.json (51%) delete mode 100644 examples/recipes/provider-failover/README.md diff --git a/examples/recipes/circuit-breaker/README.md b/examples/recipes/circuit-breaker/README.md new file mode 100644 index 0000000..ad1227e --- /dev/null +++ b/examples/recipes/circuit-breaker/README.md @@ -0,0 +1,43 @@ +# Recipe: Circuit Breaker — Cost Spike Auto-Shutdown + +Automatically block all LLM traffic when the aggregate token spend rate +exceeds a budget threshold, then self-reset after a cooldown period. + +## How it works + +- Normal traffic: per-org TPM limit enforced (`100 000 tokens/min`) +- Spike detection: if the rolling spend rate hits `500 000 tokens/min` + the circuit breaker opens and **all requests return `429`** with + `X-Fairvisor-Reason: circuit_breaker_open` +- Auto-reset: after 10 minutes without breaker-triggering load, the + circuit resets automatically — no manual intervention needed +- `alert: true` logs the trip event to the Fairvisor audit log + +## Deploy + +```bash +cp policy.json /etc/fairvisor/policy.json +``` + +## Expected behaviour + +```bash +# Normal request — passes +curl -s -o /dev/null -w "%{http_code}" \ + -H "Authorization: Bearer :" \ + http://localhost:8080/v1/chat/completions \ + -d '{"model":"gpt-4o","messages":[{"role":"user","content":"hi"}]}' +# → 200 + +# After spend spike trips the breaker: +# → 429 X-Fairvisor-Reason: circuit_breaker_open +# Retry-After: 600 +``` + +## Tuning + +| Field | Description | +|---|---| +| `spend_rate_threshold_per_minute` | Tokens/min rolling spend that opens the breaker | +| `auto_reset_after_minutes` | Cooldown before automatic reset (0 = manual only) | +| `tokens_per_minute` | Per-org steady-state limit (independent of breaker) | diff --git a/examples/recipes/provider-failover/policy.json b/examples/recipes/circuit-breaker/policy.json similarity index 51% rename from examples/recipes/provider-failover/policy.json rename to examples/recipes/circuit-breaker/policy.json index 03533dd..7d58c8d 100644 --- a/examples/recipes/provider-failover/policy.json +++ b/examples/recipes/circuit-breaker/policy.json @@ -4,55 +4,33 @@ "expires_at": "2030-01-01T00:00:00Z", "policies": [ { - "id": "provider-failover-primary", + "id": "cost-spike-guard", "spec": { "selector": { - "pathPrefix": "/openai/", + "pathPrefix": "/v1/", "methods": ["POST"] }, "mode": "enforce", "rules": [ { - "name": "openai-tpm", + "name": "per-org-tpm", "limit_keys": ["jwt:org_id"], "algorithm": "token_bucket_llm", "algorithm_config": { - "tokens_per_minute": 200000, - "burst_tokens": 200000, + "tokens_per_minute": 100000, + "burst_tokens": 100000, "default_max_completion": 2048 } } ], "circuit_breaker": { "enabled": true, - "spend_rate_threshold_per_minute": 10000, + "spend_rate_threshold_per_minute": 500000, "action": "reject", "alert": true, "auto_reset_after_minutes": 10 } } - }, - { - "id": "provider-failover-fallback", - "spec": { - "selector": { - "pathPrefix": "/anthropic/", - "methods": ["POST"] - }, - "mode": "enforce", - "rules": [ - { - "name": "anthropic-tpm", - "limit_keys": ["jwt:org_id"], - "algorithm": "token_bucket_llm", - "algorithm_config": { - "tokens_per_minute": 100000, - "burst_tokens": 100000, - "default_max_completion": 2048 - } - } - ] - } } ], "kill_switches": [] diff --git a/examples/recipes/provider-failover/README.md b/examples/recipes/provider-failover/README.md deleted file mode 100644 index 520226c..0000000 --- a/examples/recipes/provider-failover/README.md +++ /dev/null @@ -1,52 +0,0 @@ -# Recipe: Provider Failover / Edge Control - -Run two provider paths under independent policy budgets. When the primary -provider (OpenAI) trips a circuit breaker, your client-side router can -switch to the fallback (Anthropic) — both paths enforced by the same edge. - -## How it works - -- `/openai/v1/...` — enforced by an OpenAI TPM limit + a spend-based circuit breaker -- `/anthropic/v1/...` — enforced by an Anthropic TPM limit - -The circuit breaker on the OpenAI path auto-trips when cumulative spend -exceeds the threshold in a 5-minute window, then auto-resets after 10 minutes. -Your application can detect the 429 with `X-Fairvisor-Reason: circuit_breaker_open` -and switch to the Anthropic path without any Fairvisor configuration change. - -## Deploy - -```bash -cp policy.json /etc/fairvisor/policy.json -``` - -## Client-side failover pattern - -```python -import httpx - -EDGE = "http://localhost:8080" -AUTH = "Bearer my-client-jwt.payload.sig:sk-my-upstream-key" - -def chat(messages, provider="openai"): - resp = httpx.post( - f"{EDGE}/{provider}/v1/chat/completions", - headers={"Authorization": AUTH, "Content-Type": "application/json"}, - json={"model": "gpt-4o", "messages": messages}, - ) - if resp.status_code == 429: - reason = resp.headers.get("X-Fairvisor-Reason", "") - if reason == "circuit_breaker_open" and provider == "openai": - return chat(messages, provider="anthropic") - resp.raise_for_status() - return resp.json() -``` - -## Auth note - -The composite `CLIENT_JWT:UPSTREAM_KEY` format is the same for all providers. -Fairvisor injects the correct provider-native auth header: -- OpenAI: `Authorization: Bearer UPSTREAM_KEY` -- Anthropic: `x-api-key: UPSTREAM_KEY` - -The upstream key is stripped from responses — it never reaches your client. From 7a13c7ffa7b8e3018c321955d8bea3b7ca06fc73 Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 13:08:06 +0100 Subject: [PATCH 31/64] docs: add wrapper mode to README + integration links in comparison section Refs #17 #19 from job discussion --- README.md | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ed06a20..bf6bda0 100644 --- a/README.md +++ b/README.md @@ -84,13 +84,13 @@ If you have an existing gateway, the question is whether Fairvisor adds anything **If nginx `limit_req` is enough for you**, use it. It has zero overhead and is the right tool for simple per-IP global throttling. Fairvisor becomes relevant when you need per-tenant awareness, JWT-claim-based bucketing, or cost/token tracking that `limit_req` has no model for. -**If you are already running Kong**, the built-in rate limiting plugin stores counters in Redis or Postgres — every decision is a network call. Fairvisor can run alongside Kong as an `auth_request` decision service with no external state. +**If you are already running Kong**, the built-in rate limiting plugin stores counters in Redis or Postgres — every decision is a network call. Fairvisor can run alongside Kong as an `auth_request` decision service with no external state. See [Kong / Traefik integration →](https://docs.fairvisor.com/docs/gateway/) -**If you are running Envoy**, the [global rate limit service](https://github.com/envoyproxy/ratelimit) requires deploying a separate Redis-backed service with its own config language. Fairvisor is one container, one JSON file, and integrates via `ext_authz` in the same position. +**If you are running Envoy**, the [global rate limit service](https://github.com/envoyproxy/ratelimit) requires deploying a separate Redis-backed service with its own config language. Fairvisor is one container, one JSON file, and integrates via `ext_authz` in the same position. See [Envoy ext_authz integration →](https://docs.fairvisor.com/docs/gateway/envoy/) **If you are on Cloudflare or Akamai**, per-JWT-claim limits, LLM token budgets, and cost caps are not in the platform's model. If your limits are tenant-aware or cost-aware, you need something that runs in your own stack. -Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacement. See [docs/gateway-integration.md](docs/gateway-integration.md) for integration patterns. +Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacement. See [nginx auth_request →](https://docs.fairvisor.com/docs/gateway/nginx/) · [Envoy ext_authz →](https://docs.fairvisor.com/docs/gateway/envoy/) · [Kong / Traefik →](https://docs.fairvisor.com/docs/gateway/) for integration patterns. ## Quick start @@ -194,7 +194,9 @@ Works with OpenAI, Anthropic, Azure OpenAI, Mistral, and any OpenAI-compatible e **Reverse proxy mode** — Fairvisor sits inline. Traffic arrives at Fairvisor directly, gets evaluated, and is proxied to the upstream if allowed. No separate gateway needed. -Both modes use the same policy bundle and return the same rejection headers. +**Wrapper mode** — Fairvisor acts as a transparent LLM proxy. Clients send requests to Fairvisor's OpenAI-compatible endpoint (`/openai/v1/chat/completions`, `/anthropic/v1/messages`, `/gemini/v1/generateContent`). Fairvisor enforces token budgets and cost limits, strips the client auth header, injects the upstream API key, and forwards the request. No changes needed in the client — swap the base URL and you're done. + +All three modes use the same policy bundle and return the same rejection headers. When a request is rejected: @@ -245,7 +247,25 @@ Headers follow [RFC 9333 RateLimit Fields](https://www.rfc-editor.org/rfc/rfc933 reject ──► 429 + RFC 9333 headers ``` -Both modes use the same policy bundle and produce the same rejection headers. +**Wrapper mode** (transparent LLM proxy — swap base URL, no client changes): + +``` + Client ──► Fairvisor Edge POST /openai/v1/chat/completions + (wrapper) Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY + │ + │ 1. Validate CLIENT_JWT, extract org_id / user_id claims + │ 2. Enforce token budget (TPM / TPD / cost) + │ 3. Strip Authorization header, inject upstream API key + │ 4. Forward to https://api.openai.com (or Anthropic/Gemini) + │ 5. Count response tokens, refund unused reservation + │ + allow ──► upstream response (Authorization header stripped from reply) + reject ──► 429 + X-Fairvisor-Reason: tpm_exceeded / budget_exhausted +``` + +Supported upstream paths out of the box: `/openai/*`, `/anthropic/*`, `/gemini/*`. + +All three modes use the same policy bundle and produce the same rejection headers. ## Enforcement capabilities From 480a409c68ff16d6076cc86662d2d197d4e771a3 Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 13:20:59 +0100 Subject: [PATCH 32/64] docs: rewrite LLM token budget section to showcase wrapper mode --- README.md | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bf6bda0..c193cee 100644 --- a/README.md +++ b/README.md @@ -162,11 +162,15 @@ curl -s -w "\nHTTP %{http_code}\n" \ ## LLM token budget in 30 seconds +The fastest path is **wrapper mode**: Fairvisor sits in front of the LLM API, enforces budgets, and strips the upstream key from the client. No gateway changes needed — just point your client at Fairvisor instead of OpenAI. + +**1. Policy** — one rule, per-org TPM + daily cap: + ```json { "id": "llm-budget", "spec": { - "selector": { "pathPrefix": "/v1/chat" }, + "selector": { "pathPrefix": "/openai/v1/chat" }, "mode": "enforce", "rules": [ { @@ -184,9 +188,27 @@ curl -s -w "\nHTTP %{http_code}\n" \ } ``` -Each organization (from the JWT `org_id` claim) gets its own independent 60k TPM / 1.2M TPD budget. Requests over the limit return a `429` with an OpenAI-compatible error body — no client changes needed. +**2. Call the API** — token format `Bearer :`: + +```bash +curl https://your-fairvisor-host/openai/v1/chat/completions -H "Authorization: Bearer eyJhbGc...:sk-proj-..." -H "Content-Type: application/json" -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello"}]}' +``` + +Fairvisor validates the JWT, extracts `org_id`, charges tokens against the budget, strips the `Authorization` header, and forwards with the upstream key. The upstream never sees the client JWT. + +When the budget is exhausted: + +```http +HTTP/1.1 429 Too Many Requests +X-Fairvisor-Reason: tpm_exceeded +Retry-After: 12 +RateLimit-Limit: 60000 +RateLimit-Remaining: 0 +``` + +Each organization gets its own independent 60k TPM / 1.2M TPD budget. Works with OpenAI, Anthropic, Azure OpenAI, Mistral, and any OpenAI-compatible endpoint. -Works with OpenAI, Anthropic, Azure OpenAI, Mistral, and any OpenAI-compatible endpoint. +> **Decision service / reverse proxy mode:** if you already have a gateway, use `selector: { "pathPrefix": "/v1/chat" }` and call `POST /v1/decision` from your existing `auth_request` or `ext_authz` hook instead. ## How a request flows From c297873449fe8bf7263c957330bf4b7325c4eb05 Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 13:43:04 +0100 Subject: [PATCH 33/64] docs: wrapper mode selector pathPrefix "/" covers all providers --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c193cee..75f48c0 100644 --- a/README.md +++ b/README.md @@ -170,7 +170,7 @@ The fastest path is **wrapper mode**: Fairvisor sits in front of the LLM API, en { "id": "llm-budget", "spec": { - "selector": { "pathPrefix": "/openai/v1/chat" }, + "selector": { "pathPrefix": "/" }, "mode": "enforce", "rules": [ { @@ -208,6 +208,8 @@ RateLimit-Remaining: 0 Each organization gets its own independent 60k TPM / 1.2M TPD budget. Works with OpenAI, Anthropic, Azure OpenAI, Mistral, and any OpenAI-compatible endpoint. +The selector matches the incoming wrapper path. Use `pathPrefix: "/"` to cover all providers, or `pathPrefix: "/openai"` to limit to one provider only. + > **Decision service / reverse proxy mode:** if you already have a gateway, use `selector: { "pathPrefix": "/v1/chat" }` and call `POST /v1/decision` from your existing `auth_request` or `ext_authz` hook instead. ## How a request flows From 989cc041b8d366c2605cf01f222e6dec0e2bee87 Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 20:40:34 +0100 Subject: [PATCH 34/64] docs: replace ASCII architecture diagrams with Mermaid sequence diagrams --- README.md | 98 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 75f48c0..69f6c53 100644 --- a/README.md +++ b/README.md @@ -238,56 +238,70 @@ Headers follow [RFC 9333 RateLimit Fields](https://www.rfc-editor.org/rfc/rfc933 ### Architecture -**Decision service mode** (sidecar — your gateway calls `/v1/decision`, handles forwarding itself): - -``` - Client ──► Your gateway (nginx / Envoy / Kong) - │ - │ POST /v1/decision - │ (auth_request / ext_authz) - ▼ - ┌─────────────────────┐ - │ Fairvisor Edge │ - │ decision_service │ - │ │ - │ rule_engine │ - │ ngx.shared.dict │ ◄── no Redis, no network - └──────────┬──────────┘ - │ - 204 allow │ 429 reject - ▼ - gateway proxies or returns rejection +**Decision service mode** — sidecar: your gateway calls `/v1/decision`, handles forwarding itself. + +```mermaid +sequenceDiagram + participant C as Client + participant G as Your Gateway
(nginx / Envoy / Kong) + participant F as Fairvisor Edge
decision_service + participant U as Upstream service + + C->>G: Request + G->>F: POST /v1/decision
(auth_request / ext_authz) + alt allow + F-->>G: 204 No Content + G->>U: Forward request + U-->>G: Response + G-->>C: Response + else reject + F-->>G: 429 + RateLimit headers + G-->>C: 429 Too Many Requests + end ``` -**Reverse proxy mode** (inline — Fairvisor handles proxying): +No Redis, no external state — all counters live in `ngx.shared.dict`. -``` - Client ──► Fairvisor Edge (reverse_proxy) - │ - │ access.lua → rule_engine - │ ngx.shared.dict - │ - allow ──► upstream service - reject ──► 429 + RFC 9333 headers -``` +**Reverse proxy mode** — inline: Fairvisor handles both enforcement and proxying. -**Wrapper mode** (transparent LLM proxy — swap base URL, no client changes): +```mermaid +sequenceDiagram + participant C as Client + participant F as Fairvisor Edge
reverse_proxy + participant U as Upstream service + C->>F: Request + alt allow + F->>U: Forward request + U-->>F: Response + F-->>C: Response + else reject + F-->>C: 429 + RFC 9333 headers + end ``` - Client ──► Fairvisor Edge POST /openai/v1/chat/completions - (wrapper) Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY - │ - │ 1. Validate CLIENT_JWT, extract org_id / user_id claims - │ 2. Enforce token budget (TPM / TPD / cost) - │ 3. Strip Authorization header, inject upstream API key - │ 4. Forward to https://api.openai.com (or Anthropic/Gemini) - │ 5. Count response tokens, refund unused reservation - │ - allow ──► upstream response (Authorization header stripped from reply) - reject ──► 429 + X-Fairvisor-Reason: tpm_exceeded / budget_exhausted + +**Wrapper mode** — transparent LLM proxy: swap the base URL, no other client changes needed. + +```mermaid +sequenceDiagram + participant C as Client + participant F as Fairvisor Edge
wrapper + participant U as Upstream LLM
(OpenAI / Anthropic / Gemini) + + C->>F: POST /openai/v1/chat/completions
Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY + F->>F: 1. Validate JWT · extract org_id claims + F->>F: 2. Enforce TPM / TPD / cost budget + alt budget ok + F->>U: POST /v1/chat/completions
Authorization: Bearer UPSTREAM_KEY + U-->>F: 200 OK + token usage + F->>F: 3. Count tokens · refund unused reservation + F-->>C: 200 OK (Authorization stripped from reply) + else budget exceeded + F-->>C: 429 X-Fairvisor-Reason: tpm_exceeded + end ``` -Supported upstream paths out of the box: `/openai/*`, `/anthropic/*`, `/gemini/*`. +Supported upstream paths: `/openai/*`, `/anthropic/*`, `/gemini/*`, `/grok/*`. All three modes use the same policy bundle and produce the same rejection headers. From a2bb9dad92085dacbe180518580f5cb662626814 Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 20:42:41 +0100 Subject: [PATCH 35/64] =?UTF-8?q?docs:=20fix=20JWT=20wording=20=E2=80=94?= =?UTF-8?q?=20Fairvisor=20parses=20claims,=20does=20not=20validate=20signa?= =?UTF-8?q?ture?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 69f6c53..4c4b663 100644 --- a/README.md +++ b/README.md @@ -194,7 +194,7 @@ The fastest path is **wrapper mode**: Fairvisor sits in front of the LLM API, en curl https://your-fairvisor-host/openai/v1/chat/completions -H "Authorization: Bearer eyJhbGc...:sk-proj-..." -H "Content-Type: application/json" -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello"}]}' ``` -Fairvisor validates the JWT, extracts `org_id`, charges tokens against the budget, strips the `Authorization` header, and forwards with the upstream key. The upstream never sees the client JWT. +Fairvisor parses the JWT claims (no signature validation — the JWT is trusted as-is), extracts `org_id`, charges tokens against the budget, strips the `Authorization` header, and forwards with the upstream key. The upstream never sees the client JWT. When the budget is exhausted: @@ -289,7 +289,7 @@ sequenceDiagram participant U as Upstream LLM
(OpenAI / Anthropic / Gemini) C->>F: POST /openai/v1/chat/completions
Authorization: Bearer CLIENT_JWT:UPSTREAM_KEY - F->>F: 1. Validate JWT · extract org_id claims + F->>F: 1. Parse JWT claims (org_id, user_id) F->>F: 2. Enforce TPM / TPD / cost budget alt budget ok F->>U: POST /v1/chat/completions
Authorization: Bearer UPSTREAM_KEY From 4759b72ff8430c16b1e9cc790ff76cd3283284aa Mon Sep 17 00:00:00 2001 From: Lev Date: Thu, 19 Mar 2026 20:45:13 +0100 Subject: [PATCH 36/64] trim README: remove benchmark methodology, Contributing section; fix recipe name --- README.md | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/README.md b/README.md index 4c4b663..d739b01 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,6 @@ - [CLI](#cli) - [SaaS control plane (optional)](#saas-control-plane-optional) - [Project layout](#project-layout) -- [Contributing](#contributing) - [License](#license) --- @@ -96,7 +95,7 @@ Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacem > **Runnable quickstart:** `examples/quickstart/` — `docker compose up -d` and run your first enforce/reject test in under a minute. See [`examples/quickstart/README.md`](examples/quickstart/README.md). > -> **Recipes:** `examples/recipes/` — deployable team budgets, runaway agent guard, and provider failover examples. +> **Recipes:** `examples/recipes/` — deployable team budgets, runaway agent guard, and circuit-breaker examples. > > **Sample artifacts:** `fixtures/` — canonical request/response fixtures for enforce, reject (TPM, TPD, prompt-too-large), and provider-native error bodies (OpenAI, Anthropic, Gemini). @@ -338,17 +337,6 @@ Policies are versioned JSON — commit them to Git, review changes in PRs, roll ## Performance -### Benchmark methodology (March 2026) - -- **Hosts:** 2 × AWS `c7i.xlarge` (4 vCPU, 8 GiB each), cluster placement group, eu-central-1 -- **OS:** Ubuntu 24.04 LTS -- **Runtime:** OpenResty 1.29.2.1, Fairvisor latest `main` (no Docker) -- **Load tool:** `k6` v0.54.0, `constant-arrival-rate`, 10,000 RPS for 60s, 10s warmup -- **Benchmark script:** `run-all.sh` from `fairvisor/benchmark` -- **Topology:** two-host — Fairvisor and k6 on separate machines (VPC private network) -- **Decision endpoint contract:** `POST /v1/decision` with `X-Original-Method` and `X-Original-URI` -- **Note:** reverse proxy numbers include policy evaluation and upstream proxy hop to backend nginx. - ### Latest measured latency @ 10,000 RPS | Percentile | Decision service | Reverse proxy | Raw nginx (baseline) | @@ -426,17 +414,6 @@ docker/ Docker artifacts docs/ reference documentation ``` -## Contributing - -See [CONTRIBUTING.md](CONTRIBUTING.md). Bug reports, issues, and pull requests welcome. - -Run the test suite: - -```bash -busted spec # unit + integration -pytest tests/e2e -v # E2E (requires Docker) -``` - ## License [Mozilla Public License 2.0](LICENSE) From 5fac6e22c2286342b3cb547035c5bb095073c903 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 09:58:16 +0100 Subject: [PATCH 37/64] README: rework tagline, add hook + mode selector, drop Policy as code + ngx.shared.dict refs --- README.md | 56 ++++++++----------------------------------------------- 1 file changed, 8 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index d739b01..2da0365 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,7 @@

FAIRVISOR

-

Turn API limits into enforceable business policy.

- -

- Every API that charges per token, serves paying tenants, or runs agentic pipelines needs
- enforceable limits — not just rate-limit middleware bolted on as an afterthought.
-
- Open-source edge enforcement engine for rate limits, quotas, and cost budgets.
- Runs standalone or with a SaaS control plane for team governance. -

+

Stop one tenant from burning everyone's LLM budget.

License: MPL-2.0 @@ -34,13 +26,11 @@ ## Table of Contents -- [What is Fairvisor?](#what-is-fairvisor) - [Why not nginx / Kong / Envoy?](#why-not-nginx--kong--envoy) - [Quick start](#quick-start) - [LLM token budget in 30 seconds](#llm-token-budget-in-30-seconds) - [How a request flows](#how-a-request-flows) - [Enforcement capabilities](#enforcement-capabilities) -- [Policy as code](#policy-as-code) - [Performance](#performance) - [Deployment](#deployment) - [CLI](#cli) @@ -50,21 +40,7 @@ --- -## What is Fairvisor? - -Fairvisor Edge is a **policy enforcement layer** that sits between your API gateway and your upstream services. Every request is evaluated against a declarative JSON policy bundle and receives a deterministic allow or reject verdict — with machine-readable rejection headers and sub-millisecond latency. - -It is **not** a reverse proxy replacement. It is **not** a WAF. It is a dedicated, composable enforcement point for: - -- **Rate limits and quotas** — per route, per tenant, per JWT claim, per API key -- **Cost budgets** — cumulative spend caps per org, team, or endpoint -- **LLM token limits** — TPM/TPD budgets with pre-request reservation and post-response refund -- **Kill switches** — instant traffic blocking per descriptor, no restart required -- **Shadow mode** — dry-run enforcement against real traffic before going live -- **Loop detection** — stops runaway agentic workflows at the edge -- **Circuit breaker** — auto-trips on spend spikes, auto-resets after cooldown - -All controls are defined in one versioned policy bundle. Policies hot-reload without restarting the process. +Every LLM call costs tokens — and when multiple teams, customers, or agents share the same API credentials, a single bad actor can exhaust the budget for everyone. Fairvisor is a lightweight enforcement engine that sits in front of your LLM API and gives each tenant, team, or user their own token budget. No Redis, no separate rate-limit service — one container, one JSON policy file, sub-millisecond enforcement overhead. ## Why not nginx / Kong / Envoy? @@ -93,6 +69,11 @@ Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacem ## Quick start +> **Which mode is right for you?** +> - **Wrapper** — your app calls OpenAI / Anthropic / Gemini directly → point your client at Fairvisor instead, no other code changes needed. +> - **Reverse proxy** — you have a single upstream service → Fairvisor sits in front and enforces before forwarding. +> - **Decision service** — you already run nginx, Envoy, or Kong → call `POST /v1/decision` from `auth_request` / `ext_authz`. + > **Runnable quickstart:** `examples/quickstart/` — `docker compose up -d` and run your first enforce/reject test in under a minute. See [`examples/quickstart/README.md`](examples/quickstart/README.md). > > **Recipes:** `examples/recipes/` — deployable team budgets, runaway agent guard, and circuit-breaker examples. @@ -259,8 +240,6 @@ sequenceDiagram end ``` -No Redis, no external state — all counters live in `ngx.shared.dict`. - **Reverse proxy mode** — inline: Fairvisor handles both enforcement and proxying. ```mermaid @@ -318,23 +297,6 @@ All three modes use the same policy bundle and produce the same rejection header Identity keys can be **JWT claims** (`jwt:org_id`, `jwt:plan`), **HTTP headers** (`header:x-api-key`), or **IP attributes** (`ip:addr`, `ip:country`). Combine multiple keys per rule for compound matching. -## Policy as code - -Define policies in JSON, validate against the schema, test in shadow mode, then promote: - -```bash -# Validate bundle structure and rule semantics -fairvisor validate ./policies.json - -# Replay real traffic without blocking anything -fairvisor test --dry-run - -# Apply a new bundle (hot-reload, no restart) -fairvisor connect --push ./policies.json -``` - -Policies are versioned JSON — commit them to Git, review changes in PRs, roll back with confidence. - ## Performance ### Latest measured latency @ 10,000 RPS @@ -355,8 +317,6 @@ Policies are versioned JSON — commit them to Git, review changes in PRs, roll | Simple rate limit (1 rule) | 195,000 | | Complex policy (5 rules, JWT parsing, loop detection) | 195,000 | -**No external datastore.** All enforcement state lives in in-process shared memory (`ngx.shared.dict`). No Redis, no Postgres, no network round-trips in the decision path. - Reproduce: see [fairvisor/benchmark](https://github.com/fairvisor/benchmark) — the canonical benchmark source of truth for Fairvisor Edge performance numbers. @@ -407,7 +367,7 @@ cli/ command-line tooling spec/ unit and integration tests (busted) tests/e2e/ Docker-based E2E tests (pytest) examples/quickstart/ runnable quickstart (docker compose up -d) -examples/recipes/ deployable policy recipes (team budgets, agent guard, failover) +examples/recipes/ deployable policy recipes (team budgets, agent guard, circuit breaker) fixtures/ canonical request/response sample artifacts helm/ Helm chart docker/ Docker artifacts From b4e2b4d6bf32790bcf6b5008de6a3960c63cd016 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 10:11:29 +0100 Subject: [PATCH 38/64] README: remove hero latency/RPS line --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 2da0365..153ad46 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,6 @@ Docs

-

- Latency: < 70 µs enforcement overhead · 195k RPS max throughput · No external state (no Redis / DB) -

- --- ## Table of Contents From c75e2a554e47b2c5b760a76d7029d7f8b56f6981 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 10:16:09 +0100 Subject: [PATCH 39/64] =?UTF-8?q?README:=20broaden=20hook=20paragraph=20?= =?UTF-8?q?=E2=80=94=20not=20LLM-only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 153ad46..44b9d4e 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ --- -Every LLM call costs tokens — and when multiple teams, customers, or agents share the same API credentials, a single bad actor can exhaust the budget for everyone. Fairvisor is a lightweight enforcement engine that sits in front of your LLM API and gives each tenant, team, or user their own token budget. No Redis, no separate rate-limit service — one container, one JSON policy file, sub-millisecond enforcement overhead. +When multiple tenants, agents, or services share an API, one misbehaving caller can exhaust the budget for everyone — whether that's LLM tokens, API credits, or request quotas. Fairvisor is a lightweight enforcement engine that gives each tenant isolated limits at the edge: token budgets, cost caps, rate limits, and kill switches — keyed on JWT claims, API keys, or IP. One container, one JSON policy file, no Redis. ## Why not nginx / Kong / Envoy? From 51d45ae767e3442fcf175f22e370fe847b278296 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 10:57:19 +0100 Subject: [PATCH 40/64] docs: add 'Why we built this' section and ToC entry --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 44b9d4e..48cac2b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ ## Table of Contents +- [Why we built this](#why-we-built-this) - [Why not nginx / Kong / Envoy?](#why-not-nginx--kong--envoy) - [Quick start](#quick-start) - [LLM token budget in 30 seconds](#llm-token-budget-in-30-seconds) @@ -38,6 +39,20 @@ When multiple tenants, agents, or services share an API, one misbehaving caller can exhaust the budget for everyone — whether that's LLM tokens, API credits, or request quotas. Fairvisor is a lightweight enforcement engine that gives each tenant isolated limits at the edge: token budgets, cost caps, rate limits, and kill switches — keyed on JWT claims, API keys, or IP. One container, one JSON policy file, no Redis. +## Why we built this + +API gateways count requests. LLM providers bill by the token. + +When you serve multiple tenants — customers, teams, or agentic pipelines — that gap becomes a real problem. One runaway agent can consume a month's token budget overnight. Your gateway sees one request per second; your invoice shows 3 million tokens. + +We needed something that: +- Understood token budgets, not just request counts +- Could key limits on JWT claims (`org_id`, `plan`, `user_id`), not just IPs +- Added no external state — no Redis, no network round-trip in the hot path +- Could plug into nginx or Envoy *or* run standalone as a transparent LLM proxy + +We couldn't find it, so we built Fairvisor. + ## Why not nginx / Kong / Envoy? If you have an existing gateway, the question is whether Fairvisor adds anything you can't get from the plugin ecosystem already installed. Here is the honest comparison: From 0a9d1ed224ee8b8abd4eba72570379238496f67e Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 13:35:53 +0100 Subject: [PATCH 41/64] =?UTF-8?q?docs:=20README=20final=20polish=20?= =?UTF-8?q?=E2=80=94=20tagline,=20quickstart=20restructure,=20badge=20fix,?= =?UTF-8?q?=20ip:address,=20curl=20formatting,=20Architecture=20ToC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 62 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 48cac2b..d267450 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,12 @@

FAIRVISOR

-

Stop one tenant from burning everyone's LLM budget.

+

Stop one tenant from exhausting everyone's budget.

License: MPL-2.0 Latest release - CI + CI Lua coverage GHCR image Platforms: linux/amd64 · linux/arm64 @@ -27,6 +27,7 @@ - [Quick start](#quick-start) - [LLM token budget in 30 seconds](#llm-token-budget-in-30-seconds) - [How a request flows](#how-a-request-flows) + - [Architecture](#architecture) - [Enforcement capabilities](#enforcement-capabilities) - [Performance](#performance) - [Deployment](#deployment) @@ -81,17 +82,30 @@ Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacem ## Quick start > **Which mode is right for you?** -> - **Wrapper** — your app calls OpenAI / Anthropic / Gemini directly → point your client at Fairvisor instead, no other code changes needed. +> - **Wrapper** — your app calls OpenAI / Anthropic / Gemini directly → point your client at Fairvisor instead, no other code changes needed. *Fastest to try.* > - **Reverse proxy** — you have a single upstream service → Fairvisor sits in front and enforces before forwarding. > - **Decision service** — you already run nginx, Envoy, or Kong → call `POST /v1/decision` from `auth_request` / `ext_authz`. -> **Runnable quickstart:** `examples/quickstart/` — `docker compose up -d` and run your first enforce/reject test in under a minute. See [`examples/quickstart/README.md`](examples/quickstart/README.md). -> -> **Recipes:** `examples/recipes/` — deployable team budgets, runaway agent guard, and circuit-breaker examples. -> -> **Sample artifacts:** `fixtures/` — canonical request/response fixtures for enforce, reject (TPM, TPD, prompt-too-large), and provider-native error bodies (OpenAI, Anthropic, Gemini). +### Fastest path -### 1. Create a policy +```bash +git clone https://github.com/fairvisor/edge.git +cd examples/quickstart +docker compose up -d +``` + +Run your first enforce/reject test in under a minute — full walkthrough in [`examples/quickstart/README.md`](examples/quickstart/README.md). + +**Recipes:** `examples/recipes/` — team budgets, runaway agent guard, circuit-breaker. + +**Sample artifacts:** `fixtures/` — canonical enforce/reject fixtures (OpenAI, Anthropic, Gemini). + +### Minimal decision\_service example + +

+Expand — manual setup with a single docker run + +**1. Create a policy** ```bash mkdir fairvisor-demo && cd fairvisor-demo @@ -124,7 +138,7 @@ mkdir fairvisor-demo && cd fairvisor-demo } ``` -### 2. Run the edge +**2. Run the edge** ```bash docker run -d \ @@ -133,22 +147,35 @@ docker run -d \ -v "$(pwd)/policy.json:/etc/fairvisor/policy.json:ro" \ -e FAIRVISOR_CONFIG_FILE=/etc/fairvisor/policy.json \ -e FAIRVISOR_MODE=decision_service \ - ghcr.io/fairvisor/fairvisor-edge:v0.1.0 + ghcr.io/fairvisor/fairvisor-edge:latest ``` -### 3. Verify +**3. Verify** ```bash curl -sf http://localhost:8080/readyz # {"status":"ok"} -curl -s -w "\nHTTP %{http_code}\n" \ +# Allowed request → HTTP 204 +curl -s -o /dev/null -w "HTTP %{http_code}\n" \ -H "X-Original-Method: GET" \ -H "X-Original-URI: /api/data" \ -H "X-Forwarded-For: 10.0.0.1" \ http://localhost:8080/v1/decision + +# Rejected request — exhaust the burst (>10 requests) +for i in $(seq 1 12); do + curl -s -o /dev/null -w "HTTP %{http_code}\n" \ + -H "X-Original-Method: GET" \ + -H "X-Original-URI: /api/data" \ + -H "X-Forwarded-For: 10.0.0.1" \ + http://localhost:8080/v1/decision +done +# last requests → HTTP 429 X-Fairvisor-Reason: rate_limit_exceeded ``` +
+ > Full walkthrough: [docs.fairvisor.com/docs/quickstart](https://docs.fairvisor.com/docs/quickstart/) ## LLM token budget in 30 seconds @@ -182,7 +209,10 @@ The fastest path is **wrapper mode**: Fairvisor sits in front of the LLM API, en **2. Call the API** — token format `Bearer :`: ```bash -curl https://your-fairvisor-host/openai/v1/chat/completions -H "Authorization: Bearer eyJhbGc...:sk-proj-..." -H "Content-Type: application/json" -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello"}]}' +curl https://your-fairvisor-host/openai/v1/chat/completions \ + -H "Authorization: Bearer eyJhbGc...:sk-proj-..." \ + -H "Content-Type: application/json" \ + -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"Hello"}]}' ``` Fairvisor parses the JWT claims (no signature validation — the JWT is trusted as-is), extracts `org_id`, charges tokens against the budget, strips the `Authorization` header, and forwards with the upstream key. The upstream never sees the client JWT. @@ -298,7 +328,7 @@ All three modes use the same policy bundle and produce the same rejection header | If you need to… | Algorithm | Typical identity keys | Reject reason | |---|---|---|---| -| Cap request frequency | `token_bucket` | `jwt:user_id`, `header:x-api-key`, `ip:addr` | `rate_limit_exceeded` | +| Cap request frequency | `token_bucket` | `jwt:user_id`, `header:x-api-key`, `ip:address` | `rate_limit_exceeded` | | Cap cumulative spend | `cost_based` | `jwt:org_id`, `jwt:plan` | `budget_exhausted` | | Cap LLM tokens (TPM/TPD) | `token_bucket_llm` | `jwt:org_id`, `jwt:user_id` | `tpm_exceeded`, `tpd_exceeded` | | Instantly block a segment | kill switch | any descriptor | `kill_switch_active` | @@ -306,7 +336,7 @@ All three modes use the same policy bundle and produce the same rejection header | Stop runaway agent loops | loop detection | request fingerprint | `loop_detected` | | Clamp spend spikes | circuit breaker | global or policy scope | `circuit_breaker_open` | -Identity keys can be **JWT claims** (`jwt:org_id`, `jwt:plan`), **HTTP headers** (`header:x-api-key`), or **IP attributes** (`ip:addr`, `ip:country`). Combine multiple keys per rule for compound matching. +Identity keys can be **JWT claims** (`jwt:org_id`, `jwt:plan`), **HTTP headers** (`header:x-api-key`), or **IP attributes** (`ip:address`, `ip:country`). Combine multiple keys per rule for compound matching. ## Performance From c7d13454645096d891de3a3b9c20257a64cc01f3 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 14:37:06 +0100 Subject: [PATCH 42/64] fix(recipes): replace unsupported loop_detector rule with spec-level loop_detection config --- .../recipes/runaway-agent-guard/policy.json | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/recipes/runaway-agent-guard/policy.json b/examples/recipes/runaway-agent-guard/policy.json index 38de248..b3facab 100644 --- a/examples/recipes/runaway-agent-guard/policy.json +++ b/examples/recipes/runaway-agent-guard/policy.json @@ -8,23 +8,24 @@ "spec": { "selector": { "pathPrefix": "/", - "methods": ["POST"] + "methods": [ + "POST" + ] }, "mode": "enforce", + "loop_detection": { + "enabled": true, + "window_seconds": 60, + "threshold_identical_requests": 30, + "action": "reject", + "similarity": "exact" + }, "rules": [ - { - "name": "loop-detection", - "limit_keys": ["jwt:agent_id"], - "algorithm": "loop_detector", - "algorithm_config": { - "window_seconds": 60, - "max_requests": 30, - "cooldown_seconds": 120 - } - }, { "name": "agent-tpm-guard", - "limit_keys": ["jwt:agent_id"], + "limit_keys": [ + "jwt:agent_id" + ], "algorithm": "token_bucket_llm", "algorithm_config": { "tokens_per_minute": 50000, From a7f935d0ddffa00091db4f6af1ee1d0064995d60 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 14:48:28 +0100 Subject: [PATCH 43/64] =?UTF-8?q?fix(recipes):=20change=20cost=5Fbased=20p?= =?UTF-8?q?eriod=20"30d"=20=E2=86=92=20"7d"=20(only=205m/1h/1d/7d=20are=20?= =?UTF-8?q?valid)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/recipes/team-budgets/policy.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/recipes/team-budgets/policy.json b/examples/recipes/team-budgets/policy.json index 87d7c63..7a87d2c 100644 --- a/examples/recipes/team-budgets/policy.json +++ b/examples/recipes/team-budgets/policy.json @@ -29,7 +29,7 @@ "algorithm": "cost_based", "algorithm_config": { "budget": 50000, - "period": "30d", + "period": "7d", "cost_key": "fixed", "fixed_cost": 1, "staged_actions": [ From 8566c15e52bd551ad6a5538a177eb028bfbbe640 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 17:35:20 +0100 Subject: [PATCH 44/64] docs: rewrite 'no external state' bullet to focus on request latency --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d267450..5b01bdd 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ When you serve multiple tenants — customers, teams, or agentic pipelines — t We needed something that: - Understood token budgets, not just request counts - Could key limits on JWT claims (`org_id`, `plan`, `user_id`), not just IPs -- Added no external state — no Redis, no network round-trip in the hot path +- Kept every request fast — no Redis round-trip, no extra network call in the hot path - Could plug into nginx or Envoy *or* run standalone as a transparent LLM proxy We couldn't find it, so we built Fairvisor. From 1e9ffc69eaeba78b2a1589643e023f755bd1e789 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 17:53:04 +0100 Subject: [PATCH 45/64] =?UTF-8?q?docs:=20reword=20'does=20not=20replace'?= =?UTF-8?q?=20=E2=80=94=20Fairvisor=20can=20run=20standalone=20or=20alongs?= =?UTF-8?q?ide=20existing=20gateways?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b01bdd..0811f7d 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ If you have an existing gateway, the question is whether Fairvisor adds anything **If you are on Cloudflare or Akamai**, per-JWT-claim limits, LLM token budgets, and cost caps are not in the platform's model. If your limits are tenant-aware or cost-aware, you need something that runs in your own stack. -Fairvisor integrates *alongside* Kong, nginx, and Envoy — it is not a replacement. See [nginx auth_request →](https://docs.fairvisor.com/docs/gateway/nginx/) · [Envoy ext_authz →](https://docs.fairvisor.com/docs/gateway/envoy/) · [Kong / Traefik →](https://docs.fairvisor.com/docs/gateway/) for integration patterns. +Fairvisor can run alongside Kong, nginx, and Envoy — or as a standalone reverse proxy if you don't need a separate gateway. See [nginx auth_request →](https://docs.fairvisor.com/docs/gateway/nginx/) · [Envoy ext_authz →](https://docs.fairvisor.com/docs/gateway/envoy/) · [Kong / Traefik →](https://docs.fairvisor.com/docs/gateway/) for integration patterns. ## Quick start @@ -372,7 +372,7 @@ Reproduce: see [fairvisor/benchmark](https://github.com/fairvisor/benchmark) — | Envoy `ext_authz` | [docs/gateway/envoy](https://docs.fairvisor.com/docs/gateway/envoy/) | | Kong / Traefik | [docs/gateway](https://docs.fairvisor.com/docs/gateway/) | -Fairvisor integrates **alongside** Kong, nginx, Envoy, and Traefik — it does not replace them. +Fairvisor works alongside Kong, nginx, Envoy, and Traefik — or runs standalone as a reverse proxy when you don't need a separate gateway. ## CLI From 96de2a1593e067d9abb140cbfcca8fae20350eae Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:06:50 +0000 Subject: [PATCH 46/64] docs(readme): fix quickstart clone path --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 0811f7d..5c586f6 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ Fairvisor can run alongside Kong, nginx, and Envoy — or as a standalone revers ```bash git clone https://github.com/fairvisor/edge.git -cd examples/quickstart +cd edge/examples/quickstart docker compose up -d ``` @@ -422,4 +422,3 @@ docs/ reference documentation --- **Docs:** [docs.fairvisor.com](https://docs.fairvisor.com/docs/) · **Website:** [fairvisor.com](https://fairvisor.com) · **Quickstart:** [5 minutes to enforcement](https://docs.fairvisor.com/docs/quickstart/) - From a4b166305671b2c538033d1f5f8d10ce5a1a351e Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:20:54 +0000 Subject: [PATCH 47/64] fix(quickstart): build local image instead of ghcr --- examples/quickstart/README.md | 3 +++ examples/quickstart/docker-compose.yml | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md index 3acb943..375fa34 100644 --- a/examples/quickstart/README.md +++ b/examples/quickstart/README.md @@ -13,6 +13,9 @@ Go from `git clone` to working policy enforcement in one step. docker compose up -d ``` +The first run builds the `edge` image locally from `docker/Dockerfile`, so no +GHCR login is required. + Wait for the edge service to report healthy: ```bash diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 870812d..092b7a5 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -24,7 +24,9 @@ services: edge: - image: ghcr.io/fairvisor/fairvisor-edge:latest + build: + context: ../.. + dockerfile: docker/Dockerfile ports: - "8080:8080" environment: From 4a6a52cf76364467d143bee8965eeca247871a50 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:24:17 +0000 Subject: [PATCH 48/64] docs(quickstart): rename compose service to fairvisor --- examples/quickstart/README.md | 6 +++--- examples/quickstart/docker-compose.yml | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/examples/quickstart/README.md b/examples/quickstart/README.md index 375fa34..d9b637f 100644 --- a/examples/quickstart/README.md +++ b/examples/quickstart/README.md @@ -13,14 +13,14 @@ Go from `git clone` to working policy enforcement in one step. docker compose up -d ``` -The first run builds the `edge` image locally from `docker/Dockerfile`, so no +The first run builds the `fairvisor` image locally from `docker/Dockerfile`, so no GHCR login is required. -Wait for the edge service to report healthy: +Wait for the `fairvisor` service to report healthy: ```bash docker compose ps -# edge should show "healthy" +# fairvisor should show "healthy" ``` ## Verify enforcement diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 092b7a5..2efc128 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -19,11 +19,10 @@ # cannot be demonstrated with this mock stack. # # This file is also the base for the e2e-smoke CI check. -# CI extends it via tests/e2e/docker-compose.test.yml; do not diverge the -# service name, port, or volume contract without updating CI as well. +# CI expects the same port and volume contract; update CI too if those change. services: - edge: + fairvisor: build: context: ../.. dockerfile: docker/Dockerfile From a739b5bc28f28016b064445a73625d382fedd61c Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:26:14 +0000 Subject: [PATCH 49/64] chore(quickstart): reduce readyz healthcheck frequency --- examples/quickstart/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 2efc128..51cacf6 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -42,7 +42,7 @@ services: condition: service_healthy healthcheck: test: ["CMD", "curl", "-sf", "http://127.0.0.1:8080/readyz"] - interval: 2s + interval: 2m timeout: 2s retries: 15 start_period: 5s From 3ba11ceee66fef8bdb8ea0c45840249c32fb6ed0 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:29:27 +0000 Subject: [PATCH 50/64] chore(quickstart): remove mock_llm healthcheck --- examples/quickstart/docker-compose.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 51cacf6..60a9950 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -51,9 +51,3 @@ services: image: nginx:1.27-alpine volumes: - ./mock-llm.conf:/etc/nginx/nginx.conf:ro - healthcheck: - test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:80/"] - interval: 2s - timeout: 2s - retries: 10 - start_period: 5s From 78dc693d9117568b48bdc79b1a3e6632221e1c52 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:30:46 +0000 Subject: [PATCH 51/64] chore(quickstart): restore mock_llm healthcheck at low frequency --- examples/quickstart/docker-compose.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/quickstart/docker-compose.yml b/examples/quickstart/docker-compose.yml index 60a9950..59a2487 100644 --- a/examples/quickstart/docker-compose.yml +++ b/examples/quickstart/docker-compose.yml @@ -51,3 +51,9 @@ services: image: nginx:1.27-alpine volumes: - ./mock-llm.conf:/etc/nginx/nginx.conf:ro + healthcheck: + test: ["CMD", "wget", "-q", "-O", "-", "http://127.0.0.1:80/"] + interval: 2m + timeout: 2s + retries: 10 + start_period: 5s From cfd558e5efceafc96f6942472d81a9164d42ce60 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 17:34:31 +0000 Subject: [PATCH 52/64] fix(docker): add maxminddb dev lib and tune map hashes --- docker/Dockerfile | 1 + docker/nginx.conf.template | 2 ++ 2 files changed, 3 insertions(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index d1812e7..3c608c4 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,6 +5,7 @@ RUN apt-get update && apt-get upgrade -y --no-install-recommends \ gettext-base \ python3 \ libmaxminddb0 \ + libmaxminddb-dev \ mmdb-bin \ && rm -rf /var/lib/apt/lists/* diff --git a/docker/nginx.conf.template b/docker/nginx.conf.template index c0d4184..7786b79 100644 --- a/docker/nginx.conf.template +++ b/docker/nginx.conf.template @@ -25,6 +25,8 @@ worker_shutdown_timeout 35s; http { resolver 127.0.0.11 ipv6=off valid=30s; resolver_timeout 2s; + map_hash_max_size 4096; + map_hash_bucket_size 64; geo $is_tor_exit { default 0; From 111d14d6977a1630324a481ff4aad562f8fb5ee1 Mon Sep 17 00:00:00 2001 From: Codex Date: Sun, 22 Mar 2026 18:11:39 +0000 Subject: [PATCH 53/64] fix(nginx): increase map hash size for quickstart --- docker/nginx.conf.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/nginx.conf.template b/docker/nginx.conf.template index 7786b79..c5c77df 100644 --- a/docker/nginx.conf.template +++ b/docker/nginx.conf.template @@ -25,7 +25,7 @@ worker_shutdown_timeout 35s; http { resolver 127.0.0.11 ipv6=off valid=30s; resolver_timeout 2s; - map_hash_max_size 4096; + map_hash_max_size 16384; map_hash_bucket_size 64; geo $is_tor_exit { From a03320419e39b94c774b355b40f550a8dfe582a8 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 19:30:24 +0100 Subject: [PATCH 54/64] fix(nginx): set map_hash_max_size 131072 to cover all 85k ASN entries --- docker/nginx.conf.template | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/nginx.conf.template b/docker/nginx.conf.template index c5c77df..866981c 100644 --- a/docker/nginx.conf.template +++ b/docker/nginx.conf.template @@ -25,7 +25,7 @@ worker_shutdown_timeout 35s; http { resolver 127.0.0.11 ipv6=off valid=30s; resolver_timeout 2s; - map_hash_max_size 16384; + map_hash_max_size 131072; map_hash_bucket_size 64; geo $is_tor_exit { @@ -53,7 +53,8 @@ http { location = /livez { default_type text/plain; - return 200 "ok\n"; + return 200 "ok +"; } location = /readyz { @@ -104,7 +105,8 @@ http { } default_type text/plain; - return 404 "not found\n"; + return 404 "not found +"; } } } From 7772219a4c2f003d9f48345a4ebb116a828c7155 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 20:01:35 +0100 Subject: [PATCH 55/64] =?UTF-8?q?fix(nginx):=20map=5Fhash=5Fmax=5Fsize=202?= =?UTF-8?q?62144=20=E2=80=94=20need=20~3x=20entries=20for=20collision-free?= =?UTF-8?q?=20hash=20(85k=20ASNs)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/nginx.conf.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/nginx.conf.template b/docker/nginx.conf.template index 866981c..c745a65 100644 --- a/docker/nginx.conf.template +++ b/docker/nginx.conf.template @@ -25,7 +25,7 @@ worker_shutdown_timeout 35s; http { resolver 127.0.0.11 ipv6=off valid=30s; resolver_timeout 2s; - map_hash_max_size 131072; + map_hash_max_size 262144; map_hash_bucket_size 64; geo $is_tor_exit { From 8a7377705afda51968c5272ee30d26c646a87587 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 21:32:07 +0100 Subject: [PATCH 56/64] fix(cli): add bin/fairvisor-cli with corrected -I path for cli.* modules --- bin/fairvisor-cli | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 bin/fairvisor-cli diff --git a/bin/fairvisor-cli b/bin/fairvisor-cli new file mode 100644 index 0000000..1b1a490 --- /dev/null +++ b/bin/fairvisor-cli @@ -0,0 +1,7 @@ +#!/bin/bash +# Requires: OpenResty 'resty' in PATH (e.g. openresty package or OPENRESTY_HOME) + +SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)" + +exec resty -I "${SCRIPT_DIR}/src" -I "${SCRIPT_DIR}" \ + "${SCRIPT_DIR}/cli/main.lua" "$@" From 60927038d6376b36b226a4b10e13fb5c946bc0c5 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 21:32:31 +0100 Subject: [PATCH 57/64] =?UTF-8?q?fix(cli):=20Dockerfile.cli=20=E2=86=92=20?= =?UTF-8?q?bin/fairvisor-cli,=20ENTRYPOINT=20updated?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docker/Dockerfile.cli | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.cli b/docker/Dockerfile.cli index bf52315..97f263f 100644 --- a/docker/Dockerfile.cli +++ b/docker/Dockerfile.cli @@ -9,8 +9,8 @@ WORKDIR /opt/fairvisor COPY src /opt/fairvisor/src COPY cli /opt/fairvisor/cli -COPY bin/fairvisor /opt/fairvisor/bin/fairvisor +COPY bin/fairvisor-cli /opt/fairvisor/bin/fairvisor-cli -RUN chmod +x /opt/fairvisor/bin/fairvisor +RUN chmod +x /opt/fairvisor/bin/fairvisor-cli -ENTRYPOINT ["/opt/fairvisor/bin/fairvisor"] +ENTRYPOINT ["/opt/fairvisor/bin/fairvisor-cli"] From fac3632bc36bb895aca85c373b2775ef94e37676 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 21:32:31 +0100 Subject: [PATCH 58/64] =?UTF-8?q?docs(cli):=20rename=20fairvisor=20?= =?UTF-8?q?=E2=86=92=20fairvisor-cli,=20fix=20resty=20-I=20path=20in=20doc?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cli/README.md | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cli/README.md b/cli/README.md index bd18187..cd3cbb8 100644 --- a/cli/README.md +++ b/cli/README.md @@ -12,40 +12,40 @@ Command-line tool for scaffolding policies, validating configs, dry-run testing, From the repo root: ```bash -./bin/fairvisor [options] +./bin/fairvisor-cli [options] ``` Or with `resty` directly (e.g. from another directory, adjusting `-I` paths): ```bash -resty -I /path/to/fv-oss/src -I /path/to/fv-oss/cli /path/to/fv-oss/cli/main.lua [options] +resty -I /path/to/fv-oss/src -I /path/to/fv-oss /path/to/fv-oss/cli/main.lua [options] ``` -`bin/fairvisor` sets `-I` to the repo's `src` and `cli` so that `require("cli.commands.init")` and `require("fairvisor.bundle_loader")` resolve correctly. +`bin/fairvisor-cli` sets `-I` to the repo's `src` and root (for `cli.*` modules) so that `require("cli.commands.init")` and `require("fairvisor.bundle_loader")` resolve correctly. ## Commands | Command | Description | |--------|-------------| -| `fairvisor init [--template=api\|llm\|webhook]` | Generate `policy.json` and `edge.env.example` in the current directory. | -| `fairvisor validate ` | Validate policy JSON; exit 0 if valid, non-zero with errors otherwise. | -| `fairvisor test [--requests=] [--format=table\|json]` | Dry-run mock requests through the rule engine. | -| `fairvisor connect [--token=TOKEN] [--url=URL] [--output=PATH]` | Write credentials, verify SaaS connection, optionally download initial bundle. | -| `fairvisor status [--edge-url=URL] [--format=table\|json]` | Show policy version, SaaS connection, counters. | -| `fairvisor logs [--action=ACTION] [--reason=REASON]` | Stream structured logs with optional filters. | -| `fairvisor version` | Print CLI version. | -| `fairvisor help` | Print command list and usage. | +| `fairvisor-cli init [--template=api\|llm\|webhook]` | Generate `policy.json` and `edge.env.example` in the current directory. | +| `fairvisor-cli validate ` | Validate policy JSON; exit 0 if valid, non-zero with errors otherwise. | +| `fairvisor-cli test [--requests=] [--format=table\|json]` | Dry-run mock requests through the rule engine. | +| `fairvisor-cli connect [--token=TOKEN] [--url=URL] [--output=PATH]` | Write credentials, verify SaaS connection, optionally download initial bundle. | +| `fairvisor-cli status [--edge-url=URL] [--format=table\|json]` | Show policy version, SaaS connection, counters. | +| `fairvisor-cli logs [--action=ACTION] [--reason=REASON]` | Stream structured logs with optional filters. | +| `fairvisor-cli version` | Print CLI version. | +| `fairvisor-cli help` | Print command list and usage. | ## Examples ```bash -fairvisor init -fairvisor init --template=llm -fairvisor validate policy.json -fairvisor test policy.json -fairvisor connect --token=eyJ... -fairvisor version -fairvisor help +fairvisor-cli init +fairvisor-cli init --template=llm +fairvisor-cli validate policy.json +fairvisor-cli test policy.json +fairvisor-cli connect --token=eyJ... +fairvisor-cli version +fairvisor-cli help ``` ## Tests From f2059fc4728681b05c10ef3aa80c3176cbe89309 Mon Sep 17 00:00:00 2001 From: Lev Date: Sun, 22 Mar 2026 21:45:19 +0100 Subject: [PATCH 59/64] =?UTF-8?q?docs(readme):=20fairvisor=20=E2=86=92=20f?= =?UTF-8?q?airvisor-cli=20in=20CLI=20section?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5c586f6..2f5c0b0 100644 --- a/README.md +++ b/README.md @@ -377,12 +377,12 @@ Fairvisor works alongside Kong, nginx, Envoy, and Traefik — or runs standalone ## CLI ```bash -fairvisor init --template=api # scaffold a policy bundle -fairvisor validate policy.json # validate before deploying -fairvisor test --dry-run # shadow-mode replay -fairvisor status # edge health and loaded bundle info -fairvisor logs # tail rejection events -fairvisor connect # connect to SaaS control plane +fairvisor-cli init --template=api # scaffold a policy bundle +fairvisor-cli validate policy.json # validate before deploying +fairvisor-cli test --dry-run # shadow-mode replay +fairvisor-cli status # edge health and loaded bundle info +fairvisor-cli logs # tail rejection events +fairvisor-cli connect # connect to SaaS control plane ``` ## SaaS control plane (optional) From ade005ae88a29e10b921501d926d129c5566add4 Mon Sep 17 00:00:00 2001 From: Codex Date: Mon, 23 Mar 2026 11:40:48 +0000 Subject: [PATCH 60/64] feat(limiter): improve token count accuracy and max_completion_tokens support - Fix _simple_word_estimate to properly skip false-positive "content" key matches (e.g. when key appears inside a string value) by verifying the value separator (: "...") before counting characters - Add _extract_max_tokens() to parse max_tokens / max_completion_tokens from raw request body when request_context.max_tokens is not set, enabling accurate budget reservation for OpenAI-compatible payloads Co-Authored-By: Claude Sonnet 4.6 --- src/fairvisor/llm_limiter.lua | 49 ++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/src/fairvisor/llm_limiter.lua b/src/fairvisor/llm_limiter.lua index 3b29bc0..d184bc7 100644 --- a/src/fairvisor/llm_limiter.lua +++ b/src/fairvisor/llm_limiter.lua @@ -189,23 +189,32 @@ local function _simple_word_estimate(request_context) local array_end = array_start and string_find(body, "]", array_start, true) if array_start and array_end then local segment = string_sub(body, array_start, array_end) - local marker = "\"content\":\"" - local marker_len = #marker local position = 1 local char_count = 0 while true do - local start_pos = string_find(segment, marker, position, true) - if not start_pos then + -- Find "content" key + local key_start = string_find(segment, "\"content\"", position, true) + if not key_start then break end - local content_start = start_pos + marker_len - local content_end = string_find(segment, "\"", content_start, true) - if not content_end then - break + + -- Look for value start: : "..." + -- pattern: ^%s*:%s*" + local val_marker_start, val_marker_end = string_find(segment, "^%s*:%s*\"", key_start + 9) + + if not val_marker_start then + -- False positive (e.g. key was in a string), skip it + position = key_start + 7 + else + local content_start = val_marker_end + 1 + local content_end = string_find(segment, "\"", content_start, true) + if not content_end then + break + end + char_count = char_count + (content_end - content_start) + position = content_end + 1 end - char_count = char_count + (content_end - content_start) - position = content_end + 1 end return ceil(char_count / 4) @@ -215,6 +224,21 @@ local function _simple_word_estimate(request_context) return ceil(#body / 4) end +local function _extract_max_tokens(body) + if type(body) ~= "string" or body == "" then + return nil + end + -- simple regex scan for "max_tokens": 123 + -- or "max_completion_tokens": 123 + local s, e, val = string_find(body, '"max_tokens"%s*:%s*(%d+)') + if val then return tonumber(val) end + + s, e, val = string_find(body, '"max_completion_tokens"%s*:%s*(%d+)') + if val then return tonumber(val) end + + return nil +end + local function _check_tpd_budget(dict, key, config, cost, now) local ttl = _seconds_until_midnight_utc(now) local new_total, incr_err = dict:incr(key, cost, 0, ttl) @@ -365,6 +389,11 @@ function _M.check(dict, key, config, request_context, now) local max_completion = config.default_max_completion if request_context and type(request_context.max_tokens) == "number" and request_context.max_tokens > 0 then max_completion = request_context.max_tokens + elseif request_context and request_context.body then + local extracted = _extract_max_tokens(request_context.body) + if extracted and extracted > 0 then + max_completion = extracted + end end if config.max_completion_tokens and max_completion > config.max_completion_tokens then max_completion = config.max_completion_tokens From 96fbdc11d21230b9fa9b7b7a04b89d6cf30be983 Mon Sep 17 00:00:00 2001 From: Codex Date: Mon, 23 Mar 2026 11:51:00 +0000 Subject: [PATCH 61/64] =?UTF-8?q?fix(limiter):=20fix=20luacheck=20warnings?= =?UTF-8?q?=20=E2=80=94=20trailing=20whitespace=20and=20unused=20vars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove trailing whitespace on blank lines (lines 201, 205) - Replace unused s, e variables with _ in _extract_max_tokens() Co-Authored-By: Claude Sonnet 4.6 --- src/fairvisor/llm_limiter.lua | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fairvisor/llm_limiter.lua b/src/fairvisor/llm_limiter.lua index d184bc7..ab3d3d4 100644 --- a/src/fairvisor/llm_limiter.lua +++ b/src/fairvisor/llm_limiter.lua @@ -198,11 +198,11 @@ local function _simple_word_estimate(request_context) if not key_start then break end - + -- Look for value start: : "..." -- pattern: ^%s*:%s*" local val_marker_start, val_marker_end = string_find(segment, "^%s*:%s*\"", key_start + 9) - + if not val_marker_start then -- False positive (e.g. key was in a string), skip it position = key_start + 7 @@ -230,10 +230,10 @@ local function _extract_max_tokens(body) end -- simple regex scan for "max_tokens": 123 -- or "max_completion_tokens": 123 - local s, e, val = string_find(body, '"max_tokens"%s*:%s*(%d+)') + local _, _, val = string_find(body, '"max_tokens"%s*:%s*(%d+)') if val then return tonumber(val) end - s, e, val = string_find(body, '"max_completion_tokens"%s*:%s*(%d+)') + _, _, val = string_find(body, '"max_completion_tokens"%s*:%s*(%d+)') if val then return tonumber(val) end return nil From 843a26c2f2c90b2e4226d265cba561ada0706d0d Mon Sep 17 00:00:00 2001 From: Codex Date: Mon, 23 Mar 2026 11:53:32 +0000 Subject: [PATCH 62/64] test(limiter): add scenarios for max_completion_tokens and improved JSON parsing --- spec/unit/features/llm_limiter.feature | 38 ++++++++++++++++++++++++++ spec/unit/llm_limiter_spec.lua | 5 ++++ 2 files changed, 43 insertions(+) diff --git a/spec/unit/features/llm_limiter.feature b/spec/unit/features/llm_limiter.feature index e9d3828..5519044 100644 --- a/spec/unit/features/llm_limiter.feature +++ b/spec/unit/features/llm_limiter.feature @@ -190,3 +190,41 @@ Feature: LLM limiter module behavior And the llm limiter config is validated When I build error response for reason "tpm_exceeded" Then error response has OpenAI rate limit shape + + Rule: New features — max_completion_tokens and improved JSON parsing + Scenario: max_completion_tokens is extracted from body when max_tokens is missing + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config has default_max_completion 1000 + And the llm limiter config is validated + And the request body is '{"messages":[{"role":"user","content":"hello"}],"max_completion_tokens":2000}' + When I run llm check at now 1700000000 + Then check is allowed + And reserved equals estimated_total 2002 + + Scenario: improved JSON parsing handles spaces and false positives + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config uses estimator "simple_word" + And the llm limiter config is validated + And the request body is '{"messages":[{"role":"user", "content" : "12345678"}]}' + When I estimate prompt tokens + Then prompt estimate equals 2 + + Scenario: simple_word parsing multiple messages + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config uses estimator "simple_word" + And the llm limiter config is validated + And the request body is '{"messages":[{"role":"user","content":"hello"},{"role":"assistant","content":"world!"}]}' + When I estimate prompt tokens + Then prompt estimate equals 3 + + Scenario: simple_word fallback when no messages key + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config uses estimator "simple_word" + And the llm limiter config is validated + And the request body is '{"input":"test"}' + When I estimate prompt tokens + Then prompt estimate equals 4 diff --git a/spec/unit/llm_limiter_spec.lua b/spec/unit/llm_limiter_spec.lua index aecf8e1..362b3ae 100644 --- a/spec/unit/llm_limiter_spec.lua +++ b/spec/unit/llm_limiter_spec.lua @@ -129,6 +129,11 @@ runner:given("^the request body is empty$", function(ctx) ctx.request_context.body = "" end) +runner:given("^the request body is '([^']+)'$", function(ctx, body) + ctx.request_context = ctx.request_context or {} + ctx.request_context.body = body +end) + runner:given("^the request body has (%d+) prompt characters in messages$", function(ctx, chars) local char_count = tonumber(chars) local content = string.rep("a", char_count) From 64b3b5824d92ebbaa5192aa6a8c211f837174f47 Mon Sep 17 00:00:00 2001 From: Codex Date: Mon, 23 Mar 2026 12:01:21 +0000 Subject: [PATCH 63/64] test(limiter): cover max_tokens body field and default fallback paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two scenarios to exercise the uncovered branches in _extract_max_tokens: - body contains "max_tokens" field → uses it (covers return tonumber(val) branch) - body has no max_tokens/max_completion_tokens → falls back to default_max_completion (covers return nil branch) Fixes coverage regression (91.61% < 91.63% threshold). Co-Authored-By: Claude Sonnet 4.6 --- spec/unit/features/llm_limiter.feature | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/spec/unit/features/llm_limiter.feature b/spec/unit/features/llm_limiter.feature index 5519044..d33e4b1 100644 --- a/spec/unit/features/llm_limiter.feature +++ b/spec/unit/features/llm_limiter.feature @@ -228,3 +228,23 @@ Feature: LLM limiter module behavior And the request body is '{"input":"test"}' When I estimate prompt tokens Then prompt estimate equals 4 + + Scenario: max_tokens field in body is used when request_context.max_tokens is absent + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config has default_max_completion 1000 + And the llm limiter config is validated + And the request body is '{"messages":[{"role":"user","content":"hello"}],"max_tokens":500}' + When I run llm check at now 1700000000 + Then check is allowed + And reserved equals estimated_total 502 + + Scenario: body with no max_tokens field falls back to default_max_completion + Given the nginx mock environment is reset + And a valid llm limiter config with tokens_per_minute 10000 + And the config has default_max_completion 800 + And the llm limiter config is validated + And the request body is '{"messages":[{"role":"user","content":"hi"}]}' + When I run llm check at now 1700000000 + Then check is allowed + And reserved equals estimated_total 801 From fa72ccd38903eaa1b3cb18a4ffa07e41d1190a10 Mon Sep 17 00:00:00 2001 From: Codex Date: Thu, 26 Mar 2026 12:24:35 +0000 Subject: [PATCH 64/64] docs: update README subtitle to sharper tagline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace "Turn API limits into enforceable business policy." with "The LLM rate limiter your multi-tenant product was missing." — more direct, audience-specific, and immediately recognisable for the developer reading on GitHub. Co-Authored-By: Claude Sonnet 4.6 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f5c0b0..1c68b7f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@

FAIRVISOR

-

Stop one tenant from exhausting everyone's budget.

+

The LLM rate limiter your multi-tenant product was missing.

License: MPL-2.0