From 4900affaed5d423b085b030f98c34d81c171537d Mon Sep 17 00:00:00 2001 From: Christopher Date: Sat, 28 Mar 2026 13:06:59 +0000 Subject: [PATCH] chore: remove unnecessary backward-compat shims for eval_set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit eval_set was never user-facing in YAML — drop deprecated aliases from Zod schema, manifest reader, artifact writer, pipeline commands, trace stats, and example scripts. Co-Authored-By: Claude Opus 4.6 (1M context) --- apps/cli/src/commands/eval/artifact-writer.ts | 3 +-- apps/cli/src/commands/pipeline/bench.ts | 2 +- apps/cli/src/commands/pipeline/grade.ts | 2 +- apps/cli/src/commands/results/manifest.ts | 3 +-- apps/cli/src/commands/trace/stats.ts | 1 - .../scripts/score-grader-benchmark.ts | 3 +-- packages/core/src/evaluation/validation/eval-file.schema.ts | 1 - .../skills/agentv-eval-writer/references/eval-schema.json | 6 ------ 8 files changed, 5 insertions(+), 16 deletions(-) diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts index 1abf8068..3089a5c6 100644 --- a/apps/cli/src/commands/eval/artifact-writer.ts +++ b/apps/cli/src/commands/eval/artifact-writer.ts @@ -463,8 +463,7 @@ function safeTargetId(target: string | undefined): string { } function getDataset(result: EvaluationResult): string | undefined { - const record = result as EvaluationResult & { eval_set?: string; evalSet?: string }; - return result.dataset ?? record.eval_set ?? record.evalSet; + return result.dataset; } function buildArtifactSubdir(result: EvaluationResult): string { diff --git a/apps/cli/src/commands/pipeline/bench.ts b/apps/cli/src/commands/pipeline/bench.ts index 1fc50d53..1c829dc7 100644 --- a/apps/cli/src/commands/pipeline/bench.ts +++ b/apps/cli/src/commands/pipeline/bench.ts @@ -45,7 +45,7 @@ export const evalBenchCommand = command({ const manifest = JSON.parse(await readFile(join(exportDir, 'manifest.json'), 'utf8')); const testIds: string[] = manifest.test_ids; const targetName: string = manifest.target?.name ?? 'unknown'; - const evalSet: string = manifest.dataset ?? manifest.eval_set ?? ''; + const evalSet: string = manifest.dataset ?? ''; const experiment: string | undefined = manifest.experiment; const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, '_') : ''; diff --git a/apps/cli/src/commands/pipeline/grade.ts b/apps/cli/src/commands/pipeline/grade.ts index cb12430f..c491c9e9 100644 --- a/apps/cli/src/commands/pipeline/grade.ts +++ b/apps/cli/src/commands/pipeline/grade.ts @@ -40,7 +40,7 @@ export const evalGradeCommand = command({ const manifestPath = join(exportDir, 'manifest.json'); const manifest = JSON.parse(await readFile(manifestPath, 'utf8')); const testIds: string[] = manifest.test_ids; - const evalSet: string = manifest.dataset ?? manifest.eval_set ?? ''; + const evalSet: string = manifest.dataset ?? ''; const safeEvalSet = evalSet ? evalSet.replace(/[\/\\:*?"<>|]/g, '_') : ''; let totalGraders = 0; diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts index f4865b80..fe642d36 100644 --- a/apps/cli/src/commands/results/manifest.ts +++ b/apps/cli/src/commands/results/manifest.ts @@ -12,7 +12,6 @@ export interface ResultManifestRecord { readonly test_id?: string; readonly eval_id?: string; readonly dataset?: string; - readonly eval_set?: string; // deprecated alias for dataset readonly experiment?: string; readonly target?: string; readonly score: number; @@ -125,7 +124,7 @@ function hydrateManifestRecord(baseDir: string, record: ResultManifestRecord): E return { timestamp: record.timestamp, testId, - dataset: record.dataset ?? record.eval_set, + dataset: record.dataset, target: record.target, score: record.score, executionStatus: record.execution_status, diff --git a/apps/cli/src/commands/trace/stats.ts b/apps/cli/src/commands/trace/stats.ts index dba30ba1..6a88d10d 100644 --- a/apps/cli/src/commands/trace/stats.ts +++ b/apps/cli/src/commands/trace/stats.ts @@ -110,7 +110,6 @@ function groupResults(results: RawResult[], groupBy?: string): GroupedResults[] key = result.target ?? 'unknown'; break; case 'dataset': - case 'eval-set': key = result.dataset ?? 'unknown'; break; case 'test-id': diff --git a/examples/showcase/offline-grader-benchmark/scripts/score-grader-benchmark.ts b/examples/showcase/offline-grader-benchmark/scripts/score-grader-benchmark.ts index a3d50aa7..25d11bc2 100644 --- a/examples/showcase/offline-grader-benchmark/scripts/score-grader-benchmark.ts +++ b/examples/showcase/offline-grader-benchmark/scripts/score-grader-benchmark.ts @@ -17,7 +17,6 @@ type EvalResult = { timestamp?: string; test_id?: string; dataset?: string; - eval_set?: string; // backward compat target?: string; input?: string; output_text?: string; @@ -222,7 +221,7 @@ for (const line of rawResults) { const output = { timestamp: result.timestamp, test_id: result.test_id, - dataset: result.dataset ?? result.eval_set, + dataset: result.dataset, target: labelOverride ?? result.target ?? labelFromPath(resultsPath), input: result.input, output_text: result.output_text, diff --git a/packages/core/src/evaluation/validation/eval-file.schema.ts b/packages/core/src/evaluation/validation/eval-file.schema.ts index d859a2d6..365aa569 100644 --- a/packages/core/src/evaluation/validation/eval-file.schema.ts +++ b/packages/core/src/evaluation/validation/eval-file.schema.ts @@ -350,7 +350,6 @@ const EvalTestSchema = z.object({ metadata: z.record(z.unknown()).optional(), conversation_id: z.string().optional(), dataset: z.string().optional(), - eval_set: z.string().optional(), // deprecated alias for dataset note: z.string().optional(), }); diff --git a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json index 840b9e81..809cc893 100644 --- a/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json +++ b/plugins/agentv-dev/skills/agentv-eval-writer/references/eval-schema.json @@ -6390,9 +6390,6 @@ "dataset": { "type": "string" }, - "eval_set": { - "type": "string" - }, "note": { "type": "string" } @@ -12704,9 +12701,6 @@ "dataset": { "type": "string" }, - "eval_set": { - "type": "string" - }, "note": { "type": "string" }