diff --git a/src/cli.ts b/src/cli.ts index cc07bfd..5822222 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -60,6 +60,7 @@ import { registerEvalCommand } from './commands/eval.js'; import { registerCognitionCommand } from './commands/cognition.js'; import { registerCatalogCommands } from './commands/catalog.js'; import { registerReleaseCommands } from './commands/release-check.js'; +import { registerObservabilityCommands } from './commands/observability.js'; // All other command handlers are lazy-loaded via dynamic import() inside // action handlers. Only the invoked command's dependencies are loaded, @@ -1051,6 +1052,7 @@ registerCognitionCommand(program); // IDP — service catalog, scorecards, release checks registerCatalogCommands(program); registerReleaseCommands(program); +registerObservabilityCommands(program); // Providers command - show LLM CLI availability for multi-LLM support program diff --git a/src/commands/observability.ts b/src/commands/observability.ts new file mode 100644 index 0000000..07b1803 --- /dev/null +++ b/src/commands/observability.ts @@ -0,0 +1,97 @@ +/** + * squads obs — observability commands + * + * squads obs history Execution history with tokens/cost + * squads obs cost Spend summary by squad and model + */ + +import { Command } from 'commander'; +import { queryExecutions, calculateCostSummary } from '../lib/observability.js'; +import { colors, bold, RESET, writeLine } from '../lib/terminal.js'; + +export function registerObservabilityCommands(program: Command): void { + const obs = program + .command('obs') + .description('Observability — execution history, token costs, and trends'); + + obs + .command('history') + .description('Show execution history with tokens and cost') + .option('-s, --squad ', 'Filter by squad') + .option('-a, --agent ', 'Filter by agent') + .option('-n, --limit ', 'Number of records', '20') + .option('--since ', 'Since date (ISO or relative: 1d, 7d, 30d)') + .option('--json', 'Output as JSON') + .action((opts) => { + let since = opts.since; + if (since && /^\d+d$/.test(since)) { + const days = parseInt(since, 10); + since = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString(); + } + + const records = queryExecutions({ + squad: opts.squad, agent: opts.agent, since, limit: parseInt(opts.limit, 10), + }); + + if (records.length === 0) { + writeLine(`\n ${colors.dim}No executions found. Run \`squads run \` to generate data.${RESET}\n`); + return; + } + + if (opts.json) { console.log(JSON.stringify(records, null, 2)); return; } + + writeLine(`\n ${bold}Execution History${RESET} (${records.length} records)\n`); + + for (const r of records) { + const icon = r.status === 'completed' ? `${colors.green}pass${RESET}` + : r.status === 'failed' ? `${colors.red}fail${RESET}` : `${colors.yellow}timeout${RESET}`; + const dur = r.duration_ms > 60000 ? `${Math.round(r.duration_ms / 60000)}m` : `${Math.round(r.duration_ms / 1000)}s`; + const cost = r.cost_usd > 0 ? `$${r.cost_usd.toFixed(3)}` : '$—'; + const tok = (r.input_tokens + r.output_tokens) > 0 ? `${(r.input_tokens + r.output_tokens).toLocaleString()} tok` : '— tok'; + const date = r.ts.slice(0, 16).replace('T', ' '); + + writeLine(` ${icon} ${bold}${r.squad}/${r.agent}${RESET} ${colors.dim}${date} ${dur} ${tok} ${cost} ${r.model}${RESET}`); + if (r.error) writeLine(` ${colors.red}${r.error.slice(0, 80)}${RESET}`); + } + writeLine(); + }); + + obs + .command('cost') + .description('Show token spend summary') + .option('-p, --period ', 'Time period: today, 7d, 30d, all', '7d') + .option('--json', 'Output as JSON') + .action((opts) => { + const summary = calculateCostSummary(opts.period); + + if (summary.total_runs === 0) { + writeLine(`\n ${colors.dim}No executions in the last ${opts.period}.${RESET}\n`); + return; + } + + if (opts.json) { console.log(JSON.stringify(summary, null, 2)); return; } + + writeLine(`\n ${bold}Cost Summary${RESET} (${summary.period})`); + writeLine(`\n Total: ${bold}$${summary.total_cost.toFixed(2)}${RESET} across ${summary.total_runs} runs`); + writeLine(` Tokens: ${summary.total_input_tokens.toLocaleString()} in / ${summary.total_output_tokens.toLocaleString()} out\n`); + + const squads = Object.entries(summary.by_squad).sort((a, b) => b[1].cost - a[1].cost); + if (squads.length > 0) { + writeLine(` ${colors.cyan}By Squad${RESET}`); + for (const [name, data] of squads) { + const bar = '█'.repeat(Math.max(1, Math.round(data.cost / (summary.total_cost || 1) * 20))); + writeLine(` ${name.padEnd(20)} ${colors.dim}${bar}${RESET} $${data.cost.toFixed(2)} (${data.runs} runs, avg $${data.avg_cost.toFixed(3)})`); + } + writeLine(); + } + + const models = Object.entries(summary.by_model).sort((a, b) => b[1].cost - a[1].cost); + if (models.length > 0) { + writeLine(` ${colors.cyan}By Model${RESET}`); + for (const [name, data] of models) { + writeLine(` ${name.padEnd(30)} $${data.cost.toFixed(2)} (${data.runs} runs)`); + } + writeLine(); + } + }); +} diff --git a/src/lib/execution-engine.ts b/src/lib/execution-engine.ts index fd2d269..57292b2 100644 --- a/src/lib/execution-engine.ts +++ b/src/lib/execution-engine.ts @@ -28,6 +28,7 @@ import { registerContextWithBridge, updateExecutionStatus, } from './execution-log.js'; +import { logObservability, captureSessionUsage, type ObservabilityRecord } from './observability.js'; import { findMemoryDir } from './memory.js'; import { detectProviderFromModel } from './providers.js'; import { getBridgeUrl } from './env-config.js'; @@ -460,9 +461,32 @@ export function executeForeground(config: { claude.on('close', async (code) => { const durationMs = Date.now() - config.startMs; + // Capture token usage from Claude Code's session JSONL + const sessionUsage = captureSessionUsage(config.startMs); + + const obsRecord: ObservabilityRecord = { + ts: new Date().toISOString(), + id: config.execContext.executionId, + squad: config.squadName, + agent: config.agentName, + provider: config.provider || 'anthropic', + model: sessionUsage?.model || config.agentEnv.SQUADS_MODEL || 'unknown', + trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'], + status: code === 0 ? 'completed' : 'failed', + duration_ms: durationMs, + input_tokens: sessionUsage?.input_tokens || 0, + output_tokens: sessionUsage?.output_tokens || 0, + cache_read_tokens: sessionUsage?.cache_read_tokens || 0, + cache_write_tokens: sessionUsage?.cache_write_tokens || 0, + cost_usd: sessionUsage?.cost_usd || 0, + context_tokens: 0, + error: code !== 0 ? `Claude exited with code ${code}` : undefined, + }; + logObservability(obsRecord); + if (code === 0) { updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'completed', { - outcome: 'Session completed successfully', + outcome: `Session completed (${sessionUsage?.input_tokens || 0} in / ${sessionUsage?.output_tokens || 0} out, $${(sessionUsage?.cost_usd || 0).toFixed(3)})`, durationMs, }); @@ -486,6 +510,22 @@ export function executeForeground(config: { claude.on('error', (err) => { const durationMs = Date.now() - config.startMs; + + logObservability({ + ts: new Date().toISOString(), + id: config.execContext.executionId, + squad: config.squadName, + agent: config.agentName, + provider: config.provider || 'anthropic', + model: 'unknown', + trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'], + status: 'failed', + duration_ms: durationMs, + input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0, + cost_usd: 0, context_tokens: 0, + error: String(err), + }); + updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', { error: String(err), durationMs, diff --git a/src/lib/observability.ts b/src/lib/observability.ts new file mode 100644 index 0000000..76f6df8 --- /dev/null +++ b/src/lib/observability.ts @@ -0,0 +1,278 @@ +/** + * Local observability — execution logging to JSONL with token capture. + * + * Every squads run appends one record to .agents/observability/executions.jsonl. + * Token/cost data is captured from Claude Code's session JSONL files after run. + * + * No external dependencies. Git-tracked. Readable by agents and humans. + */ + +import { existsSync, readFileSync, appendFileSync, mkdirSync, readdirSync, statSync } from 'fs'; +import { join, dirname } from 'path'; +import { findProjectRoot } from './squad-parser.js'; + +// ── Types ──────────────────────────────────────────────────────────── + +export interface ObservabilityRecord { + ts: string; + id: string; + squad: string; + agent: string; + provider: string; + model: string; + trigger: 'manual' | 'scheduled' | 'event' | 'smart'; + status: 'completed' | 'failed' | 'timeout'; + duration_ms: number; + input_tokens: number; + output_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; + cost_usd: number; + context_tokens: number; + error?: string; + task?: string; +} + +export interface QueryOptions { + squad?: string; + agent?: string; + status?: string; + since?: string; + limit?: number; +} + +export interface CostSummary { + period: string; + total_cost: number; + total_runs: number; + total_input_tokens: number; + total_output_tokens: number; + by_squad: Record; + by_model: Record; +} + +// ── Model Pricing (per 1M tokens) ──────────────────────────────────── + +const MODEL_PRICING: Record = { + 'claude-opus-4-6': { input: 15.0, output: 75.0, cache_read: 1.5, cache_write: 18.75 }, + 'claude-opus-4-5-20251101': { input: 15.0, output: 75.0, cache_read: 1.5, cache_write: 18.75 }, + 'claude-sonnet-4-6': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-sonnet-4-5-20250514': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-sonnet-4-20250514': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, + 'claude-haiku-4-5-20251001': { input: 0.80, output: 4.0, cache_read: 0.08, cache_write: 1.0 }, + 'default': { input: 3.0, output: 15.0, cache_read: 0.3, cache_write: 3.75 }, +}; + +// ── Paths ──────────────────────────────────────────────────────────── + +function getObservabilityDir(): string | null { + const root = findProjectRoot(); + if (!root) return null; + return join(root, '.agents', 'observability'); +} + +function getLogPath(): string | null { + const dir = getObservabilityDir(); + if (!dir) return null; + return join(dir, 'executions.jsonl'); +} + +// ── Claude Code Session Parsing ────────────────────────────────────── + +interface SessionUsage { + model: string; + input_tokens: number; + output_tokens: number; + cache_read_tokens: number; + cache_write_tokens: number; + cost_usd: number; + messages: number; +} + +/** + * Find the most recently modified Claude Code session JSONL file. + * Claude Code writes sessions to ~/.claude/projects//*.jsonl + */ +function findRecentSessionFile(afterTimestamp: number): string | null { + const home = process.env.HOME || ''; + const projectsDir = join(home, '.claude', 'projects'); + if (!existsSync(projectsDir)) return null; + + let newest: { path: string; mtime: number } | null = null; + + try { + for (const projDir of readdirSync(projectsDir)) { + const projPath = join(projectsDir, projDir); + try { + if (!statSync(projPath).isDirectory()) continue; + } catch { continue; } + + for (const file of readdirSync(projPath)) { + if (!file.endsWith('.jsonl')) continue; + const filePath = join(projPath, file); + try { + const mtime = statSync(filePath).mtimeMs; + // Only consider files modified after the run started + if (mtime > afterTimestamp && (!newest || mtime > newest.mtime)) { + newest = { path: filePath, mtime }; + } + } catch { continue; } + } + } + } catch { /* projects dir read error */ } + + return newest?.path || null; +} + +/** + * Parse a Claude Code session JSONL file and extract usage totals. + */ +function parseSessionUsage(sessionPath: string): SessionUsage | null { + try { + const content = readFileSync(sessionPath, 'utf-8'); + const lines = content.split('\n').filter(Boolean); + + const usage: SessionUsage = { + model: 'unknown', + input_tokens: 0, + output_tokens: 0, + cache_read_tokens: 0, + cache_write_tokens: 0, + cost_usd: 0, + messages: 0, + }; + + for (const line of lines) { + try { + const record = JSON.parse(line); + + if (record.type === 'assistant') { + const msg = record.message || {}; + const u = msg.usage || {}; + + if (u.input_tokens || u.output_tokens) { + usage.messages++; + usage.input_tokens += u.input_tokens || 0; + usage.output_tokens += u.output_tokens || 0; + usage.cache_read_tokens += u.cache_read_input_tokens || 0; + usage.cache_write_tokens += u.cache_creation_input_tokens || 0; + } + + if (!usage.model || usage.model === 'unknown') { + usage.model = msg.model || 'unknown'; + } + } + + // Capture cost if directly available + if (record.costUSD) { + usage.cost_usd += record.costUSD; + } + } catch { /* skip malformed lines */ } + } + + if (usage.messages === 0) return null; + + // Calculate cost from tokens if not directly available + if (usage.cost_usd === 0) { + const pricing = MODEL_PRICING[usage.model] || MODEL_PRICING['default']; + usage.cost_usd = ( + (usage.input_tokens / 1_000_000) * pricing.input + + (usage.output_tokens / 1_000_000) * pricing.output + + (usage.cache_read_tokens / 1_000_000) * pricing.cache_read + + (usage.cache_write_tokens / 1_000_000) * pricing.cache_write + ); + } + + return usage; + } catch { + return null; + } +} + +/** + * Capture usage from the most recent Claude Code session. + * Call this after a foreground run completes. + */ +export function captureSessionUsage(runStartedAt: number): SessionUsage | null { + const sessionFile = findRecentSessionFile(runStartedAt); + if (!sessionFile) return null; + return parseSessionUsage(sessionFile); +} + +// ── Write ──────────────────────────────────────────────────────────── + +export function logObservability(record: ObservabilityRecord): void { + const logPath = getLogPath(); + if (!logPath) return; + + const dir = dirname(logPath); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + + appendFileSync(logPath, JSON.stringify(record) + '\n'); +} + +// ── Read ───────────────────────────────────────────────────────────── + +export function queryExecutions(opts: QueryOptions = {}): ObservabilityRecord[] { + const logPath = getLogPath(); + if (!logPath || !existsSync(logPath)) return []; + + const lines = readFileSync(logPath, 'utf-8').trim().split('\n').filter(Boolean); + let records: ObservabilityRecord[] = []; + + for (const line of lines) { + try { records.push(JSON.parse(line)); } catch { /* skip */ } + } + + if (opts.squad) records = records.filter(r => r.squad === opts.squad); + if (opts.agent) records = records.filter(r => r.agent === opts.agent); + if (opts.status) records = records.filter(r => r.status === opts.status); + if (opts.since) { + const since = new Date(opts.since).getTime(); + records = records.filter(r => new Date(r.ts).getTime() >= since); + } + + records.sort((a, b) => new Date(b.ts).getTime() - new Date(a.ts).getTime()); + if (opts.limit) records = records.slice(0, opts.limit); + + return records; +} + +export function calculateCostSummary(period: 'today' | '7d' | '30d' | 'all' = '7d'): CostSummary { + const now = Date.now(); + const cutoffs: Record = { + 'today': now - 24 * 60 * 60 * 1000, + '7d': now - 7 * 24 * 60 * 60 * 1000, + '30d': now - 30 * 24 * 60 * 60 * 1000, + 'all': 0, + }; + + const since = new Date(cutoffs[period] || cutoffs['7d']).toISOString(); + const records = queryExecutions({ since }); + + const bySquad: Record = {}; + const byModel: Record = {}; + let totalCost = 0, totalInput = 0, totalOutput = 0; + + for (const r of records) { + totalCost += r.cost_usd; + totalInput += r.input_tokens; + totalOutput += r.output_tokens; + + if (!bySquad[r.squad]) bySquad[r.squad] = { cost: 0, runs: 0, avg_cost: 0 }; + bySquad[r.squad].cost += r.cost_usd; + bySquad[r.squad].runs += 1; + + if (!byModel[r.model]) byModel[r.model] = { cost: 0, runs: 0 }; + byModel[r.model].cost += r.cost_usd; + byModel[r.model].runs += 1; + } + + for (const squad of Object.values(bySquad)) { + squad.avg_cost = squad.runs > 0 ? squad.cost / squad.runs : 0; + } + + return { period, total_cost: totalCost, total_runs: records.length, total_input_tokens: totalInput, total_output_tokens: totalOutput, by_squad: bySquad, by_model: byModel }; +}