From d88b02021a24d536c4706a28fa4d91dfda15be28 Mon Sep 17 00:00:00 2001 From: System Two Date: Thu, 9 Apr 2026 06:30:01 +0000 Subject: [PATCH] chore(develop): 048-deep-eval-validate-parse --- src/cli/commands/eval/validate.rs | 23 ++++++ tests/cli/eval_tests.rs | 120 ++++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/src/cli/commands/eval/validate.rs b/src/cli/commands/eval/validate.rs index 2ac2f34..d46d743 100644 --- a/src/cli/commands/eval/validate.rs +++ b/src/cli/commands/eval/validate.rs @@ -5,7 +5,9 @@ use crate::cli::error::{CliError, CliResult}; use aikit_sdk::{is_agent_available, is_runnable, runnable_agents}; use clap::Args; use fastskill::core::project::resolve_project_file; +use fastskill::eval::checks::load_checks; use fastskill::eval::config::resolve_eval_config; +use fastskill::eval::suite::load_suite; use fastskill::OutputFormat; use std::env; @@ -67,6 +69,23 @@ pub async fn execute_validate(args: ValidateArgs) -> CliResult<()> { let eval_config = resolve_eval_config(&resolution.path, &project_root) .map_err(|e| CliError::Config(e.to_string()))?; + // Parse and validate prompts CSV + let suite = + load_suite(&eval_config.prompts_path).map_err(|e| CliError::Config(e.to_string()))?; + let case_count = suite.cases.len(); + + // Parse and validate checks TOML if present and exists + let check_count = if let Some(ref checks_path) = eval_config.checks_path { + if checks_path.exists() { + let checks = load_checks(checks_path).map_err(|e| CliError::Config(e.to_string()))?; + checks.len() + } else { + 0 + } + } else { + 0 + }; + // Check agent availability if --agent was specified if let Some(ref agent_key) = args.agent { let available = is_agent_available(agent_key); @@ -92,6 +111,8 @@ pub async fn execute_validate(args: ValidateArgs) -> CliResult<()> { "timeout_seconds": eval_config.timeout_seconds, "fail_on_missing_agent": eval_config.fail_on_missing_agent, "project_root": eval_config.project_root, + "case_count": case_count, + "check_count": check_count, }); println!( "{}", @@ -100,8 +121,10 @@ pub async fn execute_validate(args: ValidateArgs) -> CliResult<()> { } else { println!("eval configuration: valid"); println!(" prompts: {}", eval_config.prompts_path.display()); + println!(" cases: {}", case_count); if let Some(ref checks) = eval_config.checks_path { println!(" checks: {}", checks.display()); + println!(" check count: {}", check_count); } println!(" timeout: {}s", eval_config.timeout_seconds); println!( diff --git a/tests/cli/eval_tests.rs b/tests/cli/eval_tests.rs index f98a08f..4ecea72 100644 --- a/tests/cli/eval_tests.rs +++ b/tests/cli/eval_tests.rs @@ -169,6 +169,126 @@ fn test_eval_validate_with_eval_config() { ); } +#[test] +fn test_eval_validate_invalid_csv_missing_column() { + use std::fs; + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + + let evals_dir = dir.path().join("evals"); + fs::create_dir_all(&evals_dir).unwrap(); + // CSV missing required 'should_trigger' column + fs::write( + evals_dir.join("prompts.csv"), + "id,prompt\ntest-1,\"Test prompt\"\n", + ) + .unwrap(); + + fs::write(dir.path().join("SKILL.md"), "# Test Skill\n").unwrap(); + fs::write( + dir.path().join("skill-project.toml"), + "[metadata]\nid = \"test-skill\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\ntimeout_seconds = 300\nfail_on_missing_agent = false\n", + ) + .unwrap(); + + let result = run_fastskill_command(&["eval", "validate"], Some(dir.path())); + assert!( + !result.success, + "Expected eval validate to fail due to missing CSV column" + ); + let combined = format!("{}{}", result.stdout, result.stderr); + assert!( + combined.contains("EVAL_INVALID_CSV") || combined.contains("should_trigger"), + "Expected EVAL_INVALID_CSV error, got: {}", + combined + ); +} + +#[test] +fn test_eval_validate_invalid_checks_toml() { + use std::fs; + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + + let evals_dir = dir.path().join("evals"); + fs::create_dir_all(&evals_dir).unwrap(); + fs::write( + evals_dir.join("prompts.csv"), + "id,prompt,should_trigger,tags,workspace_subdir\ntest-1,\"Test prompt\",true,\"basic\",\n", + ) + .unwrap(); + // Invalid TOML syntax + fs::write( + evals_dir.join("checks.toml"), + "[[check]\nname = broken toml {\n", + ) + .unwrap(); + + fs::write(dir.path().join("SKILL.md"), "# Test Skill\n").unwrap(); + fs::write( + dir.path().join("skill-project.toml"), + "[metadata]\nid = \"test-skill\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\nchecks = \"evals/checks.toml\"\ntimeout_seconds = 300\nfail_on_missing_agent = false\n", + ) + .unwrap(); + + let result = run_fastskill_command(&["eval", "validate"], Some(dir.path())); + assert!( + !result.success, + "Expected eval validate to fail due to invalid checks TOML" + ); + let combined = format!("{}{}", result.stdout, result.stderr); + assert!( + combined.contains("EVAL_CHECKS_INVALID") + || combined.contains("TOML") + || combined.contains("toml"), + "Expected EVAL_CHECKS_INVALID error, got: {}", + combined + ); +} + +#[test] +fn test_eval_validate_with_counts_in_json_output() { + use std::fs; + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + + let evals_dir = dir.path().join("evals"); + fs::create_dir_all(&evals_dir).unwrap(); + fs::write( + evals_dir.join("prompts.csv"), + "id,prompt,should_trigger,tags,workspace_subdir\ntest-1,\"Test prompt\",true,\"basic\",\ntest-2,\"Another prompt\",false,\"\",\n", + ) + .unwrap(); + fs::write( + evals_dir.join("checks.toml"), + "[[check]]\nname = \"trigger_expectation\"\npattern = \"fastskill\"\nexpected = true\n", + ) + .unwrap(); + + fs::write(dir.path().join("SKILL.md"), "# Test Skill\n").unwrap(); + fs::write( + dir.path().join("skill-project.toml"), + "[metadata]\nid = \"test-skill\"\n\n[tool.fastskill.eval]\nprompts = \"evals/prompts.csv\"\nchecks = \"evals/checks.toml\"\ntimeout_seconds = 300\nfail_on_missing_agent = false\n", + ) + .unwrap(); + + let result = run_fastskill_command(&["eval", "validate", "--json"], Some(dir.path())); + assert!( + result.success, + "Expected eval validate to succeed, got stdout: {}, stderr: {}", + result.stdout, result.stderr + ); + + let json_start = result.stdout.find('{').unwrap(); + let output: serde_json::Value = serde_json::from_str(&result.stdout[json_start..]).unwrap(); + assert_eq!(output["valid"], true); + assert_eq!(output["case_count"], 2); + assert_eq!(output["check_count"], 1); +} + #[test] fn test_eval_report_requires_run_dir() { let result = run_fastskill_command(&["eval", "report"], None);