Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import { registerEvalCommand } from './commands/eval.js';
import { registerCognitionCommand } from './commands/cognition.js';
import { registerCatalogCommands } from './commands/catalog.js';
import { registerReleaseCommands } from './commands/release-check.js';
import { registerObservabilityCommands } from './commands/observability.js';

// All other command handlers are lazy-loaded via dynamic import() inside
// action handlers. Only the invoked command's dependencies are loaded,
Expand Down Expand Up @@ -1051,6 +1052,7 @@ registerCognitionCommand(program);
// IDP — service catalog, scorecards, release checks
registerCatalogCommands(program);
registerReleaseCommands(program);
registerObservabilityCommands(program);

// Providers command - show LLM CLI availability for multi-LLM support
program
Expand Down
97 changes: 97 additions & 0 deletions src/commands/observability.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/**
* squads obs — observability commands
*
* squads obs history Execution history with tokens/cost
* squads obs cost Spend summary by squad and model
*/

import { Command } from 'commander';
import { queryExecutions, calculateCostSummary } from '../lib/observability.js';
import { colors, bold, RESET, writeLine } from '../lib/terminal.js';

export function registerObservabilityCommands(program: Command): void {
const obs = program
.command('obs')
.description('Observability — execution history, token costs, and trends');

obs
.command('history')
.description('Show execution history with tokens and cost')
.option('-s, --squad <squad>', 'Filter by squad')
.option('-a, --agent <agent>', 'Filter by agent')
.option('-n, --limit <n>', 'Number of records', '20')
.option('--since <date>', 'Since date (ISO or relative: 1d, 7d, 30d)')
.option('--json', 'Output as JSON')
.action((opts) => {
let since = opts.since;
if (since && /^\d+d$/.test(since)) {
const days = parseInt(since, 10);
since = new Date(Date.now() - days * 24 * 60 * 60 * 1000).toISOString();
}

const records = queryExecutions({
squad: opts.squad, agent: opts.agent, since, limit: parseInt(opts.limit, 10),
});

if (records.length === 0) {
writeLine(`\n ${colors.dim}No executions found. Run \`squads run <squad>\` to generate data.${RESET}\n`);
return;
}

if (opts.json) { console.log(JSON.stringify(records, null, 2)); return; }

writeLine(`\n ${bold}Execution History${RESET} (${records.length} records)\n`);

for (const r of records) {
const icon = r.status === 'completed' ? `${colors.green}pass${RESET}`
: r.status === 'failed' ? `${colors.red}fail${RESET}` : `${colors.yellow}timeout${RESET}`;
const dur = r.duration_ms > 60000 ? `${Math.round(r.duration_ms / 60000)}m` : `${Math.round(r.duration_ms / 1000)}s`;
const cost = r.cost_usd > 0 ? `$${r.cost_usd.toFixed(3)}` : '$—';
const tok = (r.input_tokens + r.output_tokens) > 0 ? `${(r.input_tokens + r.output_tokens).toLocaleString()} tok` : '— tok';
const date = r.ts.slice(0, 16).replace('T', ' ');

writeLine(` ${icon} ${bold}${r.squad}/${r.agent}${RESET} ${colors.dim}${date} ${dur} ${tok} ${cost} ${r.model}${RESET}`);
if (r.error) writeLine(` ${colors.red}${r.error.slice(0, 80)}${RESET}`);
}
writeLine();
});

obs
.command('cost')
.description('Show token spend summary')
.option('-p, --period <period>', 'Time period: today, 7d, 30d, all', '7d')
.option('--json', 'Output as JSON')
.action((opts) => {
const summary = calculateCostSummary(opts.period);

if (summary.total_runs === 0) {
writeLine(`\n ${colors.dim}No executions in the last ${opts.period}.${RESET}\n`);
return;
}

if (opts.json) { console.log(JSON.stringify(summary, null, 2)); return; }

writeLine(`\n ${bold}Cost Summary${RESET} (${summary.period})`);
writeLine(`\n Total: ${bold}$${summary.total_cost.toFixed(2)}${RESET} across ${summary.total_runs} runs`);
writeLine(` Tokens: ${summary.total_input_tokens.toLocaleString()} in / ${summary.total_output_tokens.toLocaleString()} out\n`);

const squads = Object.entries(summary.by_squad).sort((a, b) => b[1].cost - a[1].cost);
if (squads.length > 0) {
writeLine(` ${colors.cyan}By Squad${RESET}`);
for (const [name, data] of squads) {
const bar = '█'.repeat(Math.max(1, Math.round(data.cost / (summary.total_cost || 1) * 20)));
writeLine(` ${name.padEnd(20)} ${colors.dim}${bar}${RESET} $${data.cost.toFixed(2)} (${data.runs} runs, avg $${data.avg_cost.toFixed(3)})`);
}
writeLine();
}

const models = Object.entries(summary.by_model).sort((a, b) => b[1].cost - a[1].cost);
if (models.length > 0) {
writeLine(` ${colors.cyan}By Model${RESET}`);
for (const [name, data] of models) {
writeLine(` ${name.padEnd(30)} $${data.cost.toFixed(2)} (${data.runs} runs)`);
}
writeLine();
}
});
}
42 changes: 41 additions & 1 deletion src/lib/execution-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
registerContextWithBridge,
updateExecutionStatus,
} from './execution-log.js';
import { logObservability, captureSessionUsage, type ObservabilityRecord } from './observability.js';
import { findMemoryDir } from './memory.js';
import { detectProviderFromModel } from './providers.js';
import { getBridgeUrl } from './env-config.js';
Expand Down Expand Up @@ -460,9 +461,32 @@ export function executeForeground(config: {
claude.on('close', async (code) => {
const durationMs = Date.now() - config.startMs;

// Capture token usage from Claude Code's session JSONL
const sessionUsage = captureSessionUsage(config.startMs);

const obsRecord: ObservabilityRecord = {
ts: new Date().toISOString(),
id: config.execContext.executionId,
squad: config.squadName,
agent: config.agentName,
provider: config.provider || 'anthropic',
model: sessionUsage?.model || config.agentEnv.SQUADS_MODEL || 'unknown',
trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'],
status: code === 0 ? 'completed' : 'failed',
duration_ms: durationMs,
input_tokens: sessionUsage?.input_tokens || 0,
output_tokens: sessionUsage?.output_tokens || 0,
cache_read_tokens: sessionUsage?.cache_read_tokens || 0,
cache_write_tokens: sessionUsage?.cache_write_tokens || 0,
cost_usd: sessionUsage?.cost_usd || 0,
context_tokens: 0,
error: code !== 0 ? `Claude exited with code ${code}` : undefined,
};
logObservability(obsRecord);

if (code === 0) {
updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'completed', {
outcome: 'Session completed successfully',
outcome: `Session completed (${sessionUsage?.input_tokens || 0} in / ${sessionUsage?.output_tokens || 0} out, $${(sessionUsage?.cost_usd || 0).toFixed(3)})`,
durationMs,
});

Expand All @@ -486,6 +510,22 @@ export function executeForeground(config: {

claude.on('error', (err) => {
const durationMs = Date.now() - config.startMs;

logObservability({
ts: new Date().toISOString(),
id: config.execContext.executionId,
squad: config.squadName,
agent: config.agentName,
provider: config.provider || 'anthropic',
model: 'unknown',
trigger: (config.execContext.trigger || 'manual') as ObservabilityRecord['trigger'],
status: 'failed',
duration_ms: durationMs,
input_tokens: 0, output_tokens: 0, cache_read_tokens: 0, cache_write_tokens: 0,
cost_usd: 0, context_tokens: 0,
error: String(err),
});
Comment on lines +514 to +527

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There's significant code duplication between this on('error') handler and the on('close') handler (lines 467-484) for creating the ObservabilityRecord. This can make maintenance harder, as changes might need to be applied in two places.

Consider creating a helper function to construct and log the ObservabilityRecord. This function could take parameters like status, error, and session usage to handle both success and failure cases, reducing duplication and centralizing the logic.


updateExecutionStatus(config.squadName, config.agentName, config.execContext.executionId, 'failed', {
error: String(err),
durationMs,
Expand Down
Loading
Loading