Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 165 additions & 3 deletions apps/cli/src/commands/results/serve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

import { existsSync, readFileSync, writeFileSync } from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { command, number, option, optional, positional, string } from 'cmd-ts';

import type { EvaluationResult } from '@agentv/core';
Expand Down Expand Up @@ -141,12 +142,22 @@ export function createApp(
resultDir: string,
cwd?: string,
sourceFile?: string,
options?: { studioDir?: string | false },
): Hono {
const searchDir = cwd ?? resultDir;
const app = new Hono();

// Dashboard HTML
// Dashboard HTML — serve Studio SPA if available, otherwise inline HTML.
// Pass studioDir: false to disable SPA serving (used in tests).
const studioDistPath =
options?.studioDir === false ? undefined : (options?.studioDir ?? resolveStudioDistDir());
app.get('/', (c) => {
if (studioDistPath) {
const indexPath = path.join(studioDistPath, 'index.html');
if (existsSync(indexPath)) {
return c.html(readFileSync(indexPath, 'utf8'));
}
}
return c.html(generateServeHtml(results, sourceFile));
});

Expand Down Expand Up @@ -238,9 +249,160 @@ export function createApp(
return c.json(existing);
});

// ── New Studio API endpoints ──────────────────────────────────────────

// Categories for a specific run (grouped by eval_set or target)
app.get('/api/runs/:filename/categories', (c) => {
const filename = c.req.param('filename');
const metas = listResultFiles(searchDir);
const meta = metas.find((m) => m.filename === filename);
if (!meta) {
return c.json({ error: 'Run not found' }, 404);
}
try {
const loaded = patchTestIds(loadManifestResults(meta.path));
const categoryMap = new Map<string, { total: number; passed: number; scoreSum: number }>();
for (const r of loaded) {
const cat = r.eval_set ?? r.target ?? 'default';
const entry = categoryMap.get(cat) ?? { total: 0, passed: 0, scoreSum: 0 };
entry.total++;
if (r.score >= 1) entry.passed++;
entry.scoreSum += r.score;
categoryMap.set(cat, entry);
}
const categories = [...categoryMap.entries()].map(([name, entry]) => ({
name,
total: entry.total,
passed: entry.passed,
failed: entry.total - entry.passed,
avg_score: entry.total > 0 ? entry.scoreSum / entry.total : 0,
}));
return c.json({ categories });
} catch {
return c.json({ error: 'Failed to load categories' }, 500);
}
});

// Full eval detail with hydrated artifacts
app.get('/api/runs/:filename/evals/:evalId', (c) => {
const filename = c.req.param('filename');
const evalId = c.req.param('evalId');
const metas = listResultFiles(searchDir);
const meta = metas.find((m) => m.filename === filename);
if (!meta) {
return c.json({ error: 'Run not found' }, 404);
}
try {
const loaded = patchTestIds(loadManifestResults(meta.path));
const result = loaded.find((r) => r.testId === evalId);
if (!result) {
return c.json({ error: 'Eval not found' }, 404);
}
return c.json({ eval: result });
} catch {
return c.json({ error: 'Failed to load eval' }, 500);
}
});

// Aggregated index across all runs (for leaderboard)
app.get('/api/index', (c) => {
const metas = listResultFiles(searchDir);
const entries = metas.map((m) => {
let totalCostUsd = 0;
try {
const loaded = patchTestIds(loadManifestResults(m.path));
totalCostUsd = loaded.reduce((sum, r) => sum + (r.costUsd ?? 0), 0);
} catch {
// ignore load errors for aggregate
}
return {
run_filename: m.filename,
test_count: m.testCount,
pass_rate: m.passRate,
avg_score: m.avgScore,
total_cost_usd: totalCostUsd,
timestamp: m.timestamp,
};
});
return c.json({ entries });
});

// ── Static file serving for Studio SPA ────────────────────────────────

if (studioDistPath) {
// Serve static assets from studio dist
app.get('/assets/*', (c) => {
const assetPath = c.req.path;
const filePath = path.join(studioDistPath, assetPath);
if (!existsSync(filePath)) {
return c.notFound();
}
const content = readFileSync(filePath);
const ext = path.extname(filePath);
const mimeTypes: Record<string, string> = {
'.js': 'application/javascript',
'.css': 'text/css',
'.html': 'text/html',
'.json': 'application/json',
'.svg': 'image/svg+xml',
'.png': 'image/png',
'.woff2': 'font/woff2',
'.woff': 'font/woff',
};
const contentType = mimeTypes[ext] ?? 'application/octet-stream';
return new Response(content, {
headers: {
'Content-Type': contentType,
'Cache-Control': 'public, max-age=31536000, immutable',
},
});
});

// SPA fallback: serve index.html for any non-API route that isn't matched
app.get('*', (c) => {
if (c.req.path.startsWith('/api/')) {
return c.json({ error: 'Not found' }, 404);
}
const indexPath = path.join(studioDistPath, 'index.html');
if (existsSync(indexPath)) {
return c.html(readFileSync(indexPath, 'utf8'));
}
return c.notFound();
});
}

return app;
}

/**
* Resolve the path to the studio dist directory.
*
* Searches several candidate locations covering:
* - Running from TypeScript source (`bun apps/cli/src/cli.ts`)
* - Running from built dist (`bun apps/cli/dist/cli.js`)
* - Published npm package (studio bundled inside `dist/studio/`)
*/
function resolveStudioDistDir(): string | undefined {
const currentDir =
typeof __dirname !== 'undefined' ? __dirname : path.dirname(fileURLToPath(import.meta.url));
const candidates = [
// From src/commands/results/ → sibling apps/studio/dist
path.resolve(currentDir, '../../../../studio/dist'),
// From dist/ → sibling apps/studio/dist (monorepo dev)
path.resolve(currentDir, '../../studio/dist'),
// Bundled inside CLI dist (published package)
path.resolve(currentDir, '../studio'),
// From dist/ in monorepo root context
path.resolve(currentDir, '../../../apps/studio/dist'),
];
for (const candidate of candidates) {
if (existsSync(candidate) && existsSync(path.join(candidate, 'index.html'))) {
return candidate;
}
}
return undefined;
}

/**
* Strip heavy fields (requests, trace) from results for JSON API responses.
* Mirrors the logic used in generateServeHtml for the embedded DATA.
Expand Down Expand Up @@ -952,8 +1114,8 @@ const SERVE_SCRIPT = `
// ── CLI command ──────────────────────────────────────────────────────────

export const resultsServeCommand = command({
name: 'serve',
description: 'Start a local HTTP server to review evaluation results',
name: 'studio',
description: 'Start AgentV Studio — a local dashboard for reviewing evaluation results',
args: {
source: positional({
type: optional(string),
Expand Down
4 changes: 3 additions & 1 deletion apps/cli/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ export const app = subcommands({
pipeline: pipelineCommand,
results: resultsCommand,
self: selfCommand,
serve: resultsServeCommand,
studio: resultsServeCommand,
serve: resultsServeCommand, // hidden alias for backward compatibility
trace: traceCommand,
transpile: transpileCommand,
trim: trimCommand,
Expand Down Expand Up @@ -61,6 +62,7 @@ const TOP_LEVEL_COMMANDS = new Set([
'results',
'self',
'serve',
'studio',
'trace',
'transpile',
'trim',
Expand Down
19 changes: 11 additions & 8 deletions apps/cli/test/commands/results/serve.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,13 @@ describe('serve app', () => {
rmSync(tempDir, { recursive: true, force: true });
});

/** Disable SPA serving in tests so inline HTML dashboard assertions pass */
const noStudio = { studioDir: false as const };

function makeApp() {
const content = toJsonl(RESULT_A, RESULT_B);
const results = loadResults(content);
return createApp(results, tempDir);
return createApp(results, tempDir, undefined, undefined, noStudio);
}

// ── GET / ──────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -272,7 +275,7 @@ describe('serve app', () => {

describe('empty state', () => {
it('serves dashboard HTML with empty results', async () => {
const app = createApp([], tempDir);
const app = createApp([], tempDir, undefined, undefined, noStudio);
const res = await app.request('/');
expect(res.status).toBe(200);
const html = await res.text();
Expand All @@ -282,7 +285,7 @@ describe('serve app', () => {
});

it('serves feedback API with empty results', async () => {
const app = createApp([], tempDir);
const app = createApp([], tempDir, undefined, undefined, noStudio);
const res = await app.request('/api/feedback');
expect(res.status).toBe(200);
const data = await res.json();
Expand All @@ -294,7 +297,7 @@ describe('serve app', () => {

describe('GET /api/runs', () => {
it('returns empty runs list for temp directory', async () => {
const app = createApp([], tempDir);
const app = createApp([], tempDir, undefined, undefined, noStudio);
const res = await app.request('/api/runs');
expect(res.status).toBe(200);
const data = (await res.json()) as { runs: unknown[] };
Expand All @@ -306,7 +309,7 @@ describe('serve app', () => {

describe('GET /api/runs/:filename', () => {
it('returns 404 for nonexistent run', async () => {
const app = createApp([], tempDir);
const app = createApp([], tempDir, undefined, undefined, noStudio);
const res = await app.request('/api/runs/nonexistent');
expect(res.status).toBe(404);
const data = (await res.json()) as { error: string };
Expand All @@ -319,7 +322,7 @@ describe('serve app', () => {
const filename = 'eval_2026-03-25T10-00-00-000Z.jsonl';
writeFileSync(path.join(runsDir, filename), toJsonl(RESULT_A, RESULT_B));

const app = createApp([], tempDir, tempDir);
const app = createApp([], tempDir, tempDir, undefined, noStudio);
const res = await app.request(`/api/runs/${filename}`);
expect(res.status).toBe(200);
const data = (await res.json()) as { results: { testId: string }[]; source: string };
Expand All @@ -343,15 +346,15 @@ describe('serve app', () => {
it('embeds INITIAL_SOURCE when sourceFile is provided', async () => {
const content = toJsonl(RESULT_A, RESULT_B);
const results = loadResults(content);
const app = createApp(results, tempDir, tempDir, '/some/path/results-2026.jsonl');
const app = createApp(results, tempDir, tempDir, '/some/path/results-2026.jsonl', noStudio);
const res = await app.request('/');
const html = await res.text();
expect(html).toContain('INITIAL_SOURCE');
expect(html).toContain('results-2026.jsonl');
});

it('sets INITIAL_SOURCE to null when no sourceFile', async () => {
const app = createApp([], tempDir);
const app = createApp([], tempDir, undefined, undefined, noStudio);
const res = await app.request('/');
const html = await res.text();
expect(html).toContain('INITIAL_SOURCE = null');
Expand Down
13 changes: 12 additions & 1 deletion apps/cli/tsup.config.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { cpSync, rmSync } from 'node:fs';
import { cpSync, existsSync, rmSync } from 'node:fs';
import path from 'node:path';
import { defineConfig } from 'tsup';

Expand Down Expand Up @@ -45,5 +45,16 @@ export default defineConfig({
});

console.log('✓ Template files copied to dist/templates');

// Copy studio dist if available (built by apps/studio)
const studioDistDir = path.resolve('..', 'studio', 'dist');
const cliStudioDir = path.join('dist', 'studio');
if (existsSync(studioDistDir)) {
rmSync(cliStudioDir, { recursive: true, force: true });
cpSync(studioDistDir, cliStudioDir, { recursive: true });
console.log('✓ Studio dist copied to dist/studio');
} else {
console.log('⚠ Studio dist not found at', studioDistDir, '— skipping');
}
},
});
12 changes: 12 additions & 0 deletions apps/studio/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!doctype html>
<html lang="en" class="dark">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>AgentV Studio</title>
</head>
<body class="bg-gray-950 text-gray-100">
<div id="root"></div>
<script type="module" src="/src/main.tsx"></script>
</body>
</html>
28 changes: 28 additions & 0 deletions apps/studio/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"name": "@agentv/studio",
"version": "0.0.1",
"private": true,
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"preview": "vite preview"
},
"dependencies": {
"@monaco-editor/react": "^4.7.0",
"@tanstack/react-query": "^5.75.5",
"@tanstack/react-router": "^1.120.3",
"react": "^19.1.0",
"react-dom": "^19.1.0"
},
"devDependencies": {
"@tailwindcss/vite": "^4.1.7",
"@tanstack/router-plugin": "^1.120.3",
"@types/react": "^19.1.4",
"@types/react-dom": "^19.1.5",
"@vitejs/plugin-react": "^4.5.2",
"tailwindcss": "^4.1.7",
"typescript": "^5.8.3",
"vite": "^6.3.5"
}
}
Loading
Loading