diff --git a/dev-suite/src/orchestrator.py b/dev-suite/src/orchestrator.py index 8f786d5..201f7f4 100644 --- a/dev-suite/src/orchestrator.py +++ b/dev-suite/src/orchestrator.py @@ -41,6 +41,7 @@ from .sandbox.e2b_runner import E2BRunner, SandboxResult from .sandbox.validation_commands import ( ValidationPlan, + ValidationStrategy, format_validation_summary, get_validation_plan, ) @@ -561,7 +562,8 @@ def qa_node(state: GraphState, config: RunnableConfig | None = None) -> dict: return {"failure_report": failure_report, "status": status, "tokens_used": tokens_used, "retry_count": new_retry_count, "trace": trace, "memory_writes": memory_writes, "tool_calls_log": tool_calls_log} -def _run_sandbox_validation(commands, template, generated_code, parsed_files=None, timeout=120): +def _run_sandbox_tests(commands, template, parsed_files=None, timeout=120): + """Run TEST_SUITE validation: sequential commands in sandbox.""" api_key = os.getenv("E2B_API_KEY") if not api_key: return None @@ -569,8 +571,19 @@ def _run_sandbox_validation(commands, template, generated_code, parsed_files=Non project_files = None if parsed_files: project_files = {pf["path"]: pf["content"] for pf in parsed_files} - compound_cmd = " && ".join(commands) - return runner.run_tests(test_command=compound_cmd, project_files=project_files, timeout=timeout, template=template) + return runner.run_tests(commands=commands, project_files=project_files, timeout=timeout, template=template) + + +def _run_sandbox_script(script_file, template, parsed_files=None, timeout=30): + """Run SCRIPT_EXEC validation: execute a single script.""" + api_key = os.getenv("E2B_API_KEY") + if not api_key: + return None + runner = E2BRunner(api_key=api_key, default_timeout=timeout) + project_files = None + if parsed_files: + project_files = {pf["path"]: pf["content"] for pf in parsed_files} + return runner.run_script(script_file=script_file, project_files=project_files, timeout=timeout, template=template) def sandbox_validate_node(state: GraphState) -> dict: @@ -582,21 +595,64 @@ def sandbox_validate_node(state: GraphState) -> dict: return {"sandbox_result": None, "trace": trace} plan = get_validation_plan(blueprint.target_files) trace.append(f"sandbox_validate: {plan.description}") - if not plan.commands: + + if plan.strategy == ValidationStrategy.SKIP: trace.append("sandbox_validate: no code validation needed -- skipping") return {"sandbox_result": None, "trace": trace} - template_label = plan.template or "default" - trace.append(f"sandbox_validate: template={template_label}, commands={len(plan.commands)}") - generated_code = state.get("generated_code", "") + parsed_files = state.get("parsed_files", []) - if parsed_files: - trace.append(f"sandbox_validate: loading {len(parsed_files)} files into sandbox") + + # CR fix: guard against empty parsed_files before dispatching to sandbox. + # apply_code_node can leave parsed_files=[] on parse/path-validation failures. + # Running SCRIPT_EXEC/TEST_SUITE/LINT_ONLY with no files causes spurious errors. + if plan.strategy in { + ValidationStrategy.SCRIPT_EXEC, + ValidationStrategy.TEST_SUITE, + ValidationStrategy.LINT_ONLY, + }: + if not parsed_files: + trace.append("sandbox_validate: no parsed files available -- skipping") + return {"sandbox_result": None, "trace": trace} + + trace.append(f"sandbox_validate: loading {len(parsed_files)} files into sandbox") + + template_label = plan.template or "default" + trace.append(f"sandbox_validate: strategy={plan.strategy.value}, template={template_label}") + try: - result = _run_sandbox_validation(commands=plan.commands, template=plan.template, generated_code=generated_code, parsed_files=parsed_files if parsed_files else None) + result = None + if plan.strategy == ValidationStrategy.SCRIPT_EXEC: + trace.append(f"sandbox_validate: executing script {plan.script_file}") + result = _run_sandbox_script( + script_file=plan.script_file, + template=plan.template, + parsed_files=parsed_files if parsed_files else None, + ) + # CR fix: fold LINT_ONLY into TEST_SUITE -- both run commands + # sequentially via _run_sandbox_tests(). LINT_ONLY was falling + # through to the skip branch instead of executing plan.commands. + elif plan.strategy in {ValidationStrategy.TEST_SUITE, ValidationStrategy.LINT_ONLY}: + trace.append( + f"sandbox_validate: running {len(plan.commands)} command(s) sequentially" + ) + result = _run_sandbox_tests( + commands=plan.commands, + template=plan.template, + parsed_files=parsed_files if parsed_files else None, + ) + else: + trace.append(f"sandbox_validate: unhandled strategy {plan.strategy.value} -- skipping") + return {"sandbox_result": None, "trace": trace} + if result is None: logger.warning("[SANDBOX] E2B_API_KEY not configured -- sandbox validation SKIPPED.") trace.append("sandbox_validate: WARNING -- E2B_API_KEY not configured, sandbox validation skipped") return {"sandbox_result": None, "trace": trace} + + if result.warnings: + for w in result.warnings: + trace.append(f"sandbox_validate: WARNING -- {w}") + trace.append(f"sandbox_validate: exit_code={result.exit_code}, passed={result.tests_passed}, failed={result.tests_failed}") logger.info("[SANDBOX] Validation complete: exit=%d, passed=%s, failed=%s", result.exit_code, result.tests_passed, result.tests_failed) return {"sandbox_result": result, "trace": trace} diff --git a/dev-suite/src/sandbox/e2b_runner.py b/dev-suite/src/sandbox/e2b_runner.py index be5a217..7ae409f 100644 --- a/dev-suite/src/sandbox/e2b_runner.py +++ b/dev-suite/src/sandbox/e2b_runner.py @@ -11,6 +11,9 @@ - Default: bare Python (e2b-code-interpreter base image) - Fullstack: Python + Node.js 22 + pnpm + SvelteKit toolchain - Templates configured via E2B_TEMPLATE_DEFAULT / E2B_TEMPLATE_FULLSTACK env vars + +Issue #95: Added validation_skipped/warnings fields to SandboxResult, +sequential run_tests() execution, and run_script() for simple scripts. """ import os @@ -47,6 +50,8 @@ class SandboxResult(BaseModel): output_summary: str = "" files_modified: list[str] = [] timed_out: bool = False + validation_skipped: bool = False + warnings: list[str] = [] # -- Secret Patterns for Scanning -- @@ -173,16 +178,7 @@ def _extract_execution_error(error_obj) -> str: # -- Template Configuration -- def _get_template_id(template: str | None) -> str | None: - """Resolve a template name to an E2B template ID. - - Template names map to environment variables: - - "fullstack" -> E2B_TEMPLATE_FULLSTACK - - "default" -> E2B_TEMPLATE_DEFAULT - - None -> E2B_TEMPLATE_DEFAULT (if set), else bare E2B default - - If the value looks like a raw template ID (no env var mapping), - it's used directly. - """ + """Resolve a template name to an E2B template ID.""" if template is None: return os.getenv("E2B_TEMPLATE_DEFAULT") or None @@ -210,12 +206,7 @@ def _get_template_id(template: str | None) -> str | None: def select_template_for_files(target_files: list[str]) -> str | None: - """Select the appropriate sandbox template based on target file types. - - Returns: - Template name ("fullstack" or None for default Python). - Returns None if no files are provided or all files are Python-only. - """ + """Select the appropriate sandbox template based on target file types.""" if not target_files: return None @@ -234,24 +225,14 @@ def select_template_for_files(target_files: list[str]) -> str | None: class E2BRunner: """Execute code in E2B sandboxes with structured output. - The API key is read from the E2B_API_KEY environment variable. - Make sure it's set in your .env file or shell environment. - - Correct usage per E2B SDK v2 (from official docs): - Sandbox.create() is the factory method - never call Sandbox() directly. - Usage: runner = E2BRunner() result = runner.run("print('hello')") - print(result.output_summary) - - # With a custom template: - result = runner.run("console.log('hi')", template="fullstack") + result = runner.run_script("hello.py", project_files={"hello.py": "print('hi')"}) + result = runner.run_tests(commands=["pytest tests/"], project_files={...}) """ def __init__(self, api_key: str | None = None, default_timeout: int = 30): - # E2B SDK reads from E2B_API_KEY env var automatically. - # If an explicit key is passed, set it in the environment. if api_key: os.environ["E2B_API_KEY"] = api_key self._default_timeout = default_timeout @@ -264,49 +245,28 @@ def run( timeout: int | None = None, template: str | None = None, ) -> SandboxResult: - """Execute code in a sandbox and return structured results. - - Args: - code: Python code to execute. - profile: Security profile (LOCKED or PERMISSIVE). - env_vars: Environment variables to inject (secrets for LOCKED only). - timeout: Execution timeout in seconds. - template: Sandbox template name or ID. - "fullstack" -> E2B_TEMPLATE_FULLSTACK env var. - None -> E2B_TEMPLATE_DEFAULT or bare E2B default. - Any other string -> used as raw template ID. - - Returns: - SandboxResult with structured output. Never raw stdout/stderr. - """ + """Execute code in a sandbox and return structured results.""" timeout = timeout or self._default_timeout - # Enforce: PERMISSIVE profile gets zero secrets if profile == SandboxProfile.PERMISSIVE: env_vars = None - # Resolve template name to ID template_id = _get_template_id(template) try: - # E2B v2 SDK: Sandbox.create() is the public factory method. - # Using it as a context manager auto-kills the sandbox on exit. create_kwargs = {} if template_id: create_kwargs["template"] = template_id with Sandbox.create(**create_kwargs) as sbx: - # Inject env vars if provided if env_vars: env_setup = "\n".join( f"import os; os.environ['{k}'] = '{v}'" for k, v in env_vars.items() ) sbx.run_code(env_setup) - # Execute the actual code execution = sbx.run_code(code) - # Collect raw output raw_stdout = "" raw_stderr = "" for log in execution.logs.stdout: @@ -316,26 +276,20 @@ def run( combined = raw_stdout + raw_stderr - # Layer 1: Structured output wrapper test_info = _parse_test_output(combined) - # Extract errors from both stdout/stderr AND execution.error errors = _extract_errors(combined) if execution.error: exec_err = _extract_execution_error(execution.error) if exec_err and exec_err not in errors: errors.insert(0, exec_err) - # Layer 2: Secret scanning safe_summary = _scan_for_secrets(combined) - # If stdout/stderr was empty but we have an execution error, - # include the error in the summary if not safe_summary.strip() and execution.error: exec_err = _extract_execution_error(execution.error) safe_summary = _scan_for_secrets(exec_err) - # Truncate summary to prevent context bloat if len(safe_summary) > 2000: safe_summary = safe_summary[:2000] + "\n... [truncated]" @@ -364,38 +318,136 @@ def run( def run_tests( self, - test_command: str, + commands: list[str], project_files: dict[str, str] | None = None, env_vars: dict[str, str] | None = None, timeout: int = 60, template: str | None = None, ) -> SandboxResult: - """Run a test suite in the sandbox. - - Args: - test_command: Shell command to run tests (e.g., 'pytest tests/ -v'). - project_files: Dict of {filepath: content} to write before testing. - env_vars: Secrets to inject. - timeout: Test timeout in seconds. - template: Sandbox template name or ID (e.g., "fullstack" for Node.js tasks). + """Run validation commands sequentially in the sandbox. + + Each command runs independently -- a missing tool (e.g., ruff) + does not prevent subsequent commands (e.g., pytest) from running. + Results are aggregated across all commands. + """ + import base64 as _b64 + import json as _json + + setup_lines = [] + if project_files: + setup_lines.append("import os, base64") + for fpath, content in project_files.items(): + b64 = _b64.b64encode(content.encode("utf-8")).decode("ascii") + setup_lines.append(f"os.makedirs(os.path.dirname('{fpath}') or '.', exist_ok=True)") + setup_lines.append(f"open('{fpath}', 'w').write(base64.b64decode('{b64}').decode('utf-8'))") + + commands_json = _json.dumps(commands) + run_code = f""" +import subprocess, json + +commands = {commands_json} +results = [] +for cmd in commands: + try: + r = subprocess.run( + cmd, shell=True, capture_output=True, text=True, timeout={timeout}, + ) + results.append({{"cmd": cmd, "rc": r.returncode, "out": r.stdout, "err": r.stderr}}) + except subprocess.TimeoutExpired: + results.append({{"cmd": cmd, "rc": -1, "out": "", "err": "TIMEOUT"}}) + +for r in results: + print(r["out"]) + if r["err"] and r["err"] != "TIMEOUT": + print(r["err"]) + +print("__RESULTS__" + json.dumps([{{"cmd": r["cmd"], "rc": r["rc"]}} for r in results])) +""" + + setup_code = "\n".join(setup_lines) + "\n" if setup_lines else "" + full_code = setup_code + run_code + + # CR fix: scale outer timeout to account for sequential commands + num_commands = max(len(commands), 1) + overall_timeout = timeout * num_commands + 30 + + result = self.run( + full_code, + profile=SandboxProfile.LOCKED, + env_vars=env_vars, + timeout=overall_timeout, + template=template, + ) + + warnings = list(result.warnings) + summary = result.output_summary + + # CR fix: aggregate exit_code from __RESULTS__ trailer instead of + # using the wrapper process exit code. A child command failure (e.g., + # ruff finding lint errors) must propagate even if the wrapper exits 0. + aggregate_exit_code = result.exit_code + + if "[WARN]" in summary: + for line in summary.split("\n"): + if "[WARN]" in line: + warnings.append(line.strip()) + + if "__RESULTS__" in summary: + try: + json_part = summary.split("__RESULTS__")[-1].strip() + cmd_results = _json.loads(json_part) + # Aggregate: first non-zero rc, or 0 if all passed + for cr in cmd_results: + rc = cr.get("rc", 0) + if rc == 127: + warnings.append(f"Command not found: {cr['cmd'][:60]}") + # CR fix: use `if` not `elif` so rc=127 both warns + # AND fails the aggregate. A missing tool (pnpm, tsc) + # should not silently pass validation. + if rc != 0 and aggregate_exit_code == 0: + aggregate_exit_code = rc + summary = summary.split("__RESULTS__")[0].strip() + except (ValueError, IndexError): + pass + + return SandboxResult( + exit_code=aggregate_exit_code, + tests_passed=result.tests_passed, + tests_failed=result.tests_failed, + errors=result.errors, + output_summary=summary, + files_modified=result.files_modified, + timed_out=result.timed_out, + validation_skipped=False, + warnings=warnings, + ) + + def run_script( + self, + script_file: str, + project_files: dict[str, str] | None = None, + env_vars: dict[str, str] | None = None, + timeout: int = 30, + template: str | None = None, + ) -> SandboxResult: + """Execute a script and check exit code + stdout. + + For simple scripts that don't have a test suite. """ - # Build code that writes files then runs tests. - # File content is base64-encoded to avoid escaping issues with - # triple-quoted strings, embedded quotes, and multiline content. - setup_code = "" + import base64 as _b64 + + setup_lines = [] if project_files: - import base64 as _b64 - setup_code += "import os, base64\n" + setup_lines.append("import os, base64") for fpath, content in project_files.items(): b64 = _b64.b64encode(content.encode("utf-8")).decode("ascii") - setup_code += f"os.makedirs(os.path.dirname('{fpath}') or '.', exist_ok=True)\n" - setup_code += f"open('{fpath}', 'w').write(base64.b64decode('{b64}').decode('utf-8'))\n" + setup_lines.append(f"os.makedirs(os.path.dirname('{fpath}') or '.', exist_ok=True)") + setup_lines.append(f"open('{fpath}', 'w').write(base64.b64decode('{b64}').decode('utf-8'))") run_code = f""" import subprocess result = subprocess.run( - {test_command!r}, - shell=True, + ["python", {script_file!r}], capture_output=True, text=True, timeout={timeout}, @@ -403,13 +455,41 @@ def run_tests( print(result.stdout) if result.stderr: print(result.stderr) +print(f"__EXIT_CODE__{{result.returncode}}") """ + setup_code = "\n".join(setup_lines) + "\n" if setup_lines else "" full_code = setup_code + run_code - return self.run( + + result = self.run( full_code, profile=SandboxProfile.LOCKED, env_vars=env_vars, timeout=timeout + 10, template=template, ) + + summary = result.output_summary + script_exit = 0 + if "__EXIT_CODE__" in summary: + try: + code_str = summary.split("__EXIT_CODE__")[-1].strip() + script_exit = int(code_str) + summary = summary.split("__EXIT_CODE__")[0].strip() + except (ValueError, IndexError): + pass + + passed = 1 if script_exit == 0 and not result.errors else 0 + failed = 0 if passed else 1 + + return SandboxResult( + exit_code=script_exit if not result.errors else 1, + tests_passed=passed, + tests_failed=failed, + errors=result.errors, + output_summary=summary, + files_modified=result.files_modified, + timed_out=result.timed_out, + validation_skipped=False, + warnings=[], + ) diff --git a/dev-suite/src/sandbox/validation_commands.py b/dev-suite/src/sandbox/validation_commands.py index 7b9ea56..99feb63 100644 --- a/dev-suite/src/sandbox/validation_commands.py +++ b/dev-suite/src/sandbox/validation_commands.py @@ -2,14 +2,21 @@ Maps file types from Blueprint target_files to: - The appropriate sandbox template (default Python vs fullstack) + - The validation strategy (TEST_SUITE, SCRIPT_EXEC, LINT_ONLY, SKIP) - The validation commands QA should run -This module is pure functions with no side effects — easy to test +This module is pure functions with no side effects -- easy to test and extend as new file types are supported. + +Issue #95: Added ValidationStrategy enum to differentiate between +"run tests", "just execute the script", and "lint only" paths. +Previously, all Python tasks got ruff + pytest even when no test +suite existed, causing misleading "passed" results. """ import os from dataclasses import dataclass, field +from enum import Enum from .e2b_runner import select_template_for_files @@ -19,29 +26,62 @@ FRONTEND_EXTENSIONS = {".svelte", ".ts", ".tsx", ".js", ".jsx", ".css", ".vue"} PYTHON_EXTENSIONS = {".py", ".pyi"} +# CR fix: .pyi stubs are type annotations only -- not executable. +# Use this for _get_primary_script() selection so stubs are never +# chosen as the script to run via SCRIPT_EXEC. +EXECUTABLE_PYTHON_EXTENSIONS = {".py"} + + +class ValidationStrategy(str, Enum): + """How to validate the generated code in the sandbox. + + TEST_SUITE: Full validation -- lint + test runner. Used when test + files are present in target_files. + SCRIPT_EXEC: Run the primary script and check exit code + stdout. + Used for single-file scripts with no test suite. + LINT_ONLY: Syntax/lint check only, no execution. Used for + non-executable code (e.g., .pyi stubs, config generators). + SKIP: No validation needed (non-code files). + """ + TEST_SUITE = "test_suite" + SCRIPT_EXEC = "script_exec" + LINT_ONLY = "lint_only" + SKIP = "skip" + @dataclass class ValidationPlan: """What to validate and how. Attributes: + strategy: The validation approach to use. template: Sandbox template name ("fullstack" or None for default). - commands: Shell commands to run in the sandbox. + commands: Shell commands to run in the sandbox (for TEST_SUITE/LINT_ONLY). + script_file: Primary file to execute (for SCRIPT_EXEC). description: Human-readable summary for logging/QA context. """ + strategy: ValidationStrategy = ValidationStrategy.SKIP template: str | None = None commands: list[str] = field(default_factory=list) + script_file: str | None = None description: str = "" # -- Command Sets -- -PYTHON_COMMANDS = [ - "ruff check . --select=E,F,W --no-fix", - "python -m pytest tests/ -v --tb=short 2>&1 || true", +# CR fix: if/then/else preserves ruff exit code; A && B || C masks failures +PYTHON_LINT_COMMANDS = [ + "if command -v ruff >/dev/null 2>&1; then ruff check . --select=E,F,W --no-fix; else echo '[WARN] ruff not available -- lint skipped'; fi", ] +# CR fix: drop hardcoded tests/ -- let pytest auto-discover +PYTHON_TEST_COMMANDS = [ + "python -m pytest -v --tb=short", +] + +PYTHON_COMMANDS = PYTHON_LINT_COMMANDS + PYTHON_TEST_COMMANDS + FRONTEND_COMMANDS = [ "cd /home/user/dashboard && pnpm check 2>&1 || true", "cd /home/user/dashboard && pnpm exec tsc --noEmit 2>&1 || true", @@ -51,20 +91,96 @@ class ValidationPlan: FULLSTACK_COMMANDS = FRONTEND_COMMANDS + PYTHON_COMMANDS +# CR fix: boundary-based test detection instead of substring matching +def _is_test_file(filepath: str) -> bool: + """Check if a single file looks like a test file using boundary matching. + + Matches on basename boundaries to avoid false positives like + 'latest_utils.py' or 'contest_helper.py'. + """ + lower = filepath.lower().replace("\\", "/") + parts = lower.split("/") + basename = parts[-1] if parts else lower + + # Directory component is "tests" (exact match) + if "tests" in parts[:-1]: + return True + + # Basename starts with "test_" + if basename.startswith("test_"): + return True + + # Basename ends with "_test.py" or "_test.pyi" + name_no_ext = os.path.splitext(basename)[0] + if name_no_ext.endswith("_test"): + return True + + # Exact matches + if basename in ("tests.py", "conftest.py"): + return True + + # JS/TS test patterns: .test.js, .test.ts, .spec.js, .spec.ts + if ".test." in basename or ".spec." in basename: + return True + + return False + + +def _has_test_files(target_files: list[str]) -> bool: + """Check if any target files look like test files or test directories.""" + return any(_is_test_file(f) for f in target_files) + + +def _get_primary_script(target_files: list[str]) -> str | None: + """Find the primary executable script from target files. + + Returns the first .py file (not .pyi stubs) that doesn't look like + a test file, or the first .py file if all look like tests, or None. + + CR fix: .pyi stubs are excluded -- they are type annotations only + and cannot be meaningfully executed. + """ + py_files = [ + f for f in target_files + if os.path.splitext(f.lower())[1] in EXECUTABLE_PYTHON_EXTENSIONS + ] + if not py_files: + return None + + # Prefer non-test files + for f in py_files: + if not _is_test_file(f): + return f + + # Fallback to first .py file + return py_files[0] + + def get_validation_plan(target_files: list[str]) -> ValidationPlan: """Determine the validation plan based on target file types. + Strategy selection: + - No files -> SKIP + - Non-code files only -> SKIP + - Frontend files -> TEST_SUITE (pnpm check + tsc) + - Python files with test files -> TEST_SUITE (ruff + pytest) + - Python files without test files: + - Has executable .py files -> SCRIPT_EXEC (run the script) + - Only .pyi stubs -> LINT_ONLY (ruff check only) + - Mixed frontend + Python -> TEST_SUITE (fullstack) + Args: target_files: List of file paths from the Blueprint. Returns: - ValidationPlan with template, commands, and description. + ValidationPlan with strategy, template, commands, and description. """ if not target_files: return ValidationPlan( + strategy=ValidationStrategy.SKIP, template=None, commands=[], - description="No target files \u2014 skipping validation", + description="No target files -- skipping validation", ) has_frontend = False @@ -80,37 +196,79 @@ def get_validation_plan(target_files: list[str]) -> ValidationPlan: # Pick template template = select_template_for_files(target_files) - # Pick commands + # Pick strategy and commands if has_frontend and has_python: - commands = list(FULLSTACK_COMMANDS) - description = ( - f"Full-stack validation: {len(target_files)} files " - f"(frontend + Python) using fullstack template" - ) - elif has_frontend: - commands = list(FRONTEND_COMMANDS) - description = ( - f"Frontend validation: {len(target_files)} files " - f"using fullstack template (pnpm check + tsc)" - ) - elif has_python: - commands = list(PYTHON_COMMANDS) - description = ( - f"Python validation: {len(target_files)} files " - f"using default template (ruff + pytest)" + # Full-stack: frontend checks + Python (lint always, pytest only if tests exist) + # CR fix: unconditionally using FULLSTACK_COMMANDS reintroduces pytest + # even when no test files are present, causing "no tests collected" failures. + python_cmds = PYTHON_COMMANDS if _has_test_files(target_files) else PYTHON_LINT_COMMANDS + return ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, + template=template, + commands=list(FRONTEND_COMMANDS + python_cmds), + description=( + f"Full-stack validation: {len(target_files)} files " + f"(frontend + Python) using fullstack template" + ), ) - else: - # Non-code files (JSON, YAML, SQL, etc.) \u2014 no validation - commands = [] - description = ( - f"No code validation needed for {len(target_files)} files " - f"(non-code file types)" + + if has_frontend: + return ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, + template=template, + commands=list(FRONTEND_COMMANDS), + description=( + f"Frontend validation: {len(target_files)} files " + f"using fullstack template (pnpm check + tsc)" + ), ) + if has_python: + has_tests = _has_test_files(target_files) + if has_tests: + return ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, + template=template, + commands=list(PYTHON_COMMANDS), + description=( + f"Python validation: {len(target_files)} files " + f"using default template (ruff + pytest)" + ), + ) + else: + primary = _get_primary_script(target_files) + # CR fix: if no executable .py files found (e.g., .pyi stubs + # only), fall back to lint-only instead of trying to execute + if primary is None: + return ValidationPlan( + strategy=ValidationStrategy.LINT_ONLY, + template=template, + commands=list(PYTHON_LINT_COMMANDS), + description=( + f"Lint-only validation: {len(target_files)} files " + f"using default template (no executable .py files)" + ), + ) + return ValidationPlan( + strategy=ValidationStrategy.SCRIPT_EXEC, + template=template, + commands=[], + script_file=primary, + description=( + f"Script execution: {len(target_files)} files " + f"using default template (run + check exit code)" + ), + ) + + # Non-code files (JSON, YAML, SQL, etc.) return ValidationPlan( - template=template, - commands=commands, - description=description, + strategy=ValidationStrategy.SKIP, + template=None, + commands=[], + description=( + f"No code validation needed for {len(target_files)} files " + f"(non-code file types)" + ), ) @@ -119,6 +277,15 @@ def format_validation_summary(plan: ValidationPlan) -> str: Used in the QA prompt to describe what sandbox validation was attempted. """ + if plan.strategy == ValidationStrategy.SKIP: + return plan.description + + if plan.strategy == ValidationStrategy.SCRIPT_EXEC: + lines = [plan.description] + if plan.script_file: + lines.append(f" Executing: python {plan.script_file}") + return "\n".join(lines) + if not plan.commands: return plan.description diff --git a/dev-suite/tests/test_sandbox_validate.py b/dev-suite/tests/test_sandbox_validate.py index f0e01c5..e5241cd 100644 --- a/dev-suite/tests/test_sandbox_validate.py +++ b/dev-suite/tests/test_sandbox_validate.py @@ -1,15 +1,18 @@ """Tests for the sandbox_validate orchestrator node. These tests validate the sandbox_validate node behavior with mocked -E2B runner — no real sandbox needed. Tests cover: +E2B runner -- no real sandbox needed. Tests cover: + - Strategy-based dispatch (TEST_SUITE, SCRIPT_EXEC, SKIP, LINT_ONLY) - Template selection from blueprint target_files - - Validation command execution - Graceful skip when E2B_API_KEY is missing - SandboxResult stored in graph state - - QA prompt includes sandbox results + - Warnings surfaced in trace + - Empty parsed_files guard + +Issue #95: Updated for ValidationStrategy dispatch. """ -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pytest @@ -23,7 +26,12 @@ def _make_state(target_files: list[str], **overrides) -> GraphState: - """Helper to create a GraphState with a blueprint.""" + """Helper to create a GraphState with a blueprint. + + By default, populates parsed_files from target_files so the + empty-files guard doesn't skip sandbox dispatch. Tests that + need empty parsed_files should pass parsed_files=[] explicitly. + """ bp = Blueprint( task_id="test-task", target_files=target_files, @@ -31,6 +39,11 @@ def _make_state(target_files: list[str], **overrides) -> GraphState: constraints=["constraint1"], acceptance_criteria=["criterion1"], ) + # Default: populate parsed_files with dummy content for each target file + default_parsed = [ + {"path": f, "content": f"# stub content for {f}"} + for f in target_files + ] state: GraphState = { "task_description": "test task", "blueprint": bp, @@ -43,19 +56,121 @@ def _make_state(target_files: list[str], **overrides) -> GraphState: "memory_context": [], "memory_writes": [], "trace": [], + "parsed_files": default_parsed, + "tool_calls_log": [], } state.update(overrides) return state +class TestSandboxValidateStrategy: + """Tests for strategy-based dispatch in sandbox_validate_node.""" + + def test_script_exec_for_single_python_file(self): + """Single .py file without tests should use SCRIPT_EXEC.""" + state = _make_state(["hello_world.py"]) + + with patch("src.orchestrator._run_sandbox_script") as mock_script: + mock_script.return_value = SandboxResult( + exit_code=0, + tests_passed=1, + tests_failed=0, + output_summary="Hello, World!", + ) + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is not None + assert result["sandbox_result"].exit_code == 0 + assert result["sandbox_result"].tests_passed == 1 + mock_script.assert_called_once() + call_kwargs = mock_script.call_args[1] + assert call_kwargs["script_file"] == "hello_world.py" + + def test_test_suite_for_python_with_tests(self): + """Python files with test files should use TEST_SUITE.""" + state = _make_state(["src/main.py", "tests/test_main.py"]) + + with patch("src.orchestrator._run_sandbox_tests") as mock_tests: + mock_tests.return_value = SandboxResult( + exit_code=0, + tests_passed=5, + tests_failed=0, + output_summary="5 passed in 1.2s", + ) + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is not None + assert result["sandbox_result"].tests_passed == 5 + mock_tests.assert_called_once() + call_kwargs = mock_tests.call_args[1] + assert call_kwargs["template"] is None + + def test_test_suite_for_frontend_files(self): + """Frontend files should use TEST_SUITE with fullstack template.""" + state = _make_state(["dashboard/src/lib/Widget.svelte"]) + + with patch("src.orchestrator._run_sandbox_tests") as mock_tests: + mock_tests.return_value = SandboxResult( + exit_code=0, + tests_passed=1, + tests_failed=0, + output_summary="svelte-check found 0 errors", + ) + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is not None + call_kwargs = mock_tests.call_args[1] + assert call_kwargs["template"] == "fullstack" + + def test_skip_for_non_code_files(self): + """Non-code files should return None (no sandbox run).""" + state = _make_state(["schema.sql", "config.yaml"]) + + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is None + assert any("skip" in t.lower() for t in result["trace"]) + + def test_empty_parsed_files_skips_sandbox(self): + """CR fix: empty parsed_files should skip sandbox dispatch.""" + state = _make_state( + ["src/main.py", "tests/test_main.py"], + parsed_files=[], + ) + + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is None + assert any("no parsed files" in t.lower() for t in result["trace"]) + + def test_lint_only_for_pyi_stubs(self): + """CR fix: .pyi-only files should dispatch via LINT_ONLY -> _run_sandbox_tests.""" + state = _make_state(["src/types.pyi"]) + + with patch("src.orchestrator._run_sandbox_tests") as mock_tests: + mock_tests.return_value = SandboxResult( + exit_code=0, + tests_passed=None, + tests_failed=None, + output_summary="ruff check passed", + ) + result = sandbox_validate_node(state) + + assert result["sandbox_result"] is not None + assert result["sandbox_result"].exit_code == 0 + mock_tests.assert_called_once() + trace_text = " ".join(result.get("trace", [])) + assert "lint_only" in trace_text.lower() or "command(s) sequentially" in trace_text.lower() + + class TestSandboxValidateNode: - """Tests for the sandbox_validate_node function.""" + """Core sandbox_validate_node tests.""" def test_python_task_uses_default_template(self): """Python-only tasks should use the default (None) template.""" state = _make_state(["src/main.py", "tests/test_main.py"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.return_value = SandboxResult( exit_code=0, tests_passed=5, @@ -67,15 +182,14 @@ def test_python_task_uses_default_template(self): assert result["sandbox_result"] is not None assert result["sandbox_result"].exit_code == 0 assert result["sandbox_result"].tests_passed == 5 - # Check it was called with None template (default) - call_args = mock_run.call_args - assert call_args[1]["template"] is None + call_kwargs = mock_run.call_args[1] + assert call_kwargs["template"] is None def test_svelte_task_uses_fullstack_template(self): """Svelte tasks should use the fullstack template.""" state = _make_state(["dashboard/src/lib/Widget.svelte"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.return_value = SandboxResult( exit_code=0, tests_passed=1, @@ -85,14 +199,14 @@ def test_svelte_task_uses_fullstack_template(self): result = sandbox_validate_node(state) assert result["sandbox_result"] is not None - call_args = mock_run.call_args - assert call_args[1]["template"] == "fullstack" + call_kwargs = mock_run.call_args[1] + assert call_kwargs["template"] == "fullstack" def test_mixed_task_uses_fullstack_template(self): """Mixed Python+frontend tasks should use fullstack.""" state = _make_state(["src/api.py", "dashboard/src/App.svelte"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.return_value = SandboxResult( exit_code=0, tests_passed=6, @@ -101,19 +215,18 @@ def test_mixed_task_uses_fullstack_template(self): ) result = sandbox_validate_node(state) - call_args = mock_run.call_args - assert call_args[1]["template"] == "fullstack" + call_kwargs = mock_run.call_args[1] + assert call_kwargs["template"] == "fullstack" def test_graceful_skip_no_api_key(self): - """Should warn and skip when E2B_API_KEY is missing.""" - state = _make_state(["src/main.py"]) + """Should return None when E2B_API_KEY is missing.""" + state = _make_state(["src/main.py", "tests/test_main.py"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: - mock_run.return_value = None # Signals skip + with patch("src.orchestrator._run_sandbox_tests") as mock_run: + mock_run.return_value = None result = sandbox_validate_node(state) assert result["sandbox_result"] is None - assert any("skip" in t.lower() or "warn" in t.lower() for t in result["trace"]) def test_no_blueprint_skips(self): """Should skip validation when there's no blueprint.""" @@ -125,20 +238,11 @@ def test_no_blueprint_skips(self): assert result["sandbox_result"] is None assert any("no blueprint" in t.lower() for t in result["trace"]) - def test_non_code_files_skip_validation(self): - """Non-code files (JSON, YAML, SQL) should skip sandbox validation.""" - state = _make_state(["schema.sql", "config.yaml"]) - - result = sandbox_validate_node(state) - - assert result["sandbox_result"] is None - assert any("no code validation" in t.lower() or "skip" in t.lower() for t in result["trace"]) - def test_sandbox_error_doesnt_crash(self): """Sandbox errors should be captured, not raise exceptions.""" - state = _make_state(["src/main.py"]) + state = _make_state(["src/main.py", "tests/test_main.py"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.side_effect = Exception("E2B connection failed") result = sandbox_validate_node(state) @@ -146,10 +250,10 @@ def test_sandbox_error_doesnt_crash(self): assert any("error" in t.lower() or "failed" in t.lower() for t in result["trace"]) def test_failed_validation_still_continues(self): - """Sandbox validation failure should not block QA — it adds context.""" - state = _make_state(["src/main.py"]) + """Sandbox validation failure should not block QA -- it adds context.""" + state = _make_state(["src/main.py", "tests/test_main.py"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.return_value = SandboxResult( exit_code=1, tests_passed=3, @@ -159,29 +263,41 @@ def test_failed_validation_still_continues(self): ) result = sandbox_validate_node(state) - # Node should still return the result even though tests failed assert result["sandbox_result"] is not None assert result["sandbox_result"].tests_failed == 2 - # Status should not change — QA decides pass/fail - assert "status" not in result or result.get("status") == WorkflowStatus.REVIEWING - def test_trace_includes_validation_plan(self): - """Trace should include which template and commands were selected.""" - state = _make_state(["dashboard/src/App.svelte"]) + def test_warnings_surfaced_in_trace(self): + """Sandbox warnings should appear in the trace log.""" + state = _make_state(["src/main.py", "tests/test_main.py"]) - with patch("src.orchestrator._run_sandbox_validation") as mock_run: + with patch("src.orchestrator._run_sandbox_tests") as mock_run: mock_run.return_value = SandboxResult( exit_code=0, tests_passed=1, tests_failed=0, - output_summary="svelte-check found 0 errors", + output_summary="1 passed", + warnings=["[WARN] ruff not available -- lint skipped"], + ) + result = sandbox_validate_node(state) + + trace_text = " ".join(result.get("trace", [])) + assert "ruff" in trace_text.lower() + + def test_trace_includes_strategy(self): + """Trace should include which strategy was selected.""" + state = _make_state(["hello_world.py"]) + + with patch("src.orchestrator._run_sandbox_script") as mock_script: + mock_script.return_value = SandboxResult( + exit_code=0, + tests_passed=1, + tests_failed=0, + output_summary="Hello, World!", ) result = sandbox_validate_node(state) - trace = result.get("trace", []) - trace_text = " ".join(trace).lower() - assert "sandbox_validate" in trace_text - assert "fullstack" in trace_text or "frontend" in trace_text + trace_text = " ".join(result.get("trace", [])) + assert "script_exec" in trace_text.lower() class TestSandboxValidateGraphIntegration: @@ -193,7 +309,6 @@ def test_graph_has_sandbox_validate_node(self): graph = build_graph() compiled = graph.compile() - # LangGraph stores node names — check sandbox_validate exists node_names = set(compiled.get_graph().nodes.keys()) assert "sandbox_validate" in node_names diff --git a/dev-suite/tests/test_validation_commands.py b/dev-suite/tests/test_validation_commands.py index fb95850..7204760 100644 --- a/dev-suite/tests/test_validation_commands.py +++ b/dev-suite/tests/test_validation_commands.py @@ -1,14 +1,19 @@ """Tests for validation command mapping. -Validates that file types are correctly mapped to sandbox templates -and validation commands. +Validates that file types are correctly mapped to sandbox templates, +validation strategies, and validation commands. + +Issue #95: Added tests for ValidationStrategy selection -- +SCRIPT_EXEC for simple scripts, TEST_SUITE when tests present. """ from src.sandbox.validation_commands import ( FRONTEND_COMMANDS, FULLSTACK_COMMANDS, PYTHON_COMMANDS, + PYTHON_LINT_COMMANDS, ValidationPlan, + ValidationStrategy, format_validation_summary, get_validation_plan, ) @@ -17,143 +22,294 @@ class TestGetValidationPlan: """Tests for get_validation_plan().""" - # -- Python files -- + # -- Python files with tests (TEST_SUITE) -- - def test_python_only_files(self): + def test_python_with_test_files(self): plan = get_validation_plan(["src/main.py", "tests/test_main.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template is None assert plan.commands == PYTHON_COMMANDS assert "Python" in plan.description - def test_pyi_stub_file(self): - plan = get_validation_plan(["src/types.pyi"]) - assert plan.template is None + def test_python_with_test_prefix(self): + plan = get_validation_plan(["src/utils.py", "test_utils.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE + assert plan.commands == PYTHON_COMMANDS + + def test_python_with_test_suffix(self): + plan = get_validation_plan(["src/auth.py", "src/auth_test.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE + + def test_pyi_stub_with_tests(self): + plan = get_validation_plan(["src/types.pyi", "tests/test_types.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.commands == PYTHON_COMMANDS - # -- Frontend files -- + # -- Python files without tests (SCRIPT_EXEC) -- + + def test_single_python_script(self): + plan = get_validation_plan(["hello_world.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "hello_world.py" + assert plan.commands == [] + assert "Script execution" in plan.description + + def test_python_module_no_tests(self): + plan = get_validation_plan(["src/utils.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "src/utils.py" + + def test_multiple_python_no_tests(self): + plan = get_validation_plan(["src/main.py", "src/helpers.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "src/main.py" + + # -- .pyi stub files (LINT_ONLY -- not executable) -- + + def test_pyi_only_gets_lint_only(self): + """CR fix: .pyi stubs are not executable -- route to LINT_ONLY.""" + plan = get_validation_plan(["src/types.pyi"]) + assert plan.strategy == ValidationStrategy.LINT_ONLY + assert plan.script_file is None + assert plan.commands == PYTHON_LINT_COMMANDS + assert "Lint-only" in plan.description + + def test_pyi_with_py_gets_script_exec(self): + """When .pyi and .py coexist without tests, .py is the script.""" + plan = get_validation_plan(["src/types.pyi", "src/main.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "src/main.py" + + # -- Frontend files (TEST_SUITE) -- def test_svelte_files(self): plan = get_validation_plan(["dashboard/src/lib/Widget.svelte"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == FRONTEND_COMMANDS assert "Frontend" in plan.description def test_typescript_files(self): plan = get_validation_plan(["dashboard/src/routes/+page.ts"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == FRONTEND_COMMANDS def test_javascript_files(self): plan = get_validation_plan(["src/utils.js"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" - assert plan.commands == FRONTEND_COMMANDS def test_jsx_files(self): plan = get_validation_plan(["src/Component.jsx"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" - assert plan.commands == FRONTEND_COMMANDS def test_tsx_files(self): plan = get_validation_plan(["src/Component.tsx"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" - assert plan.commands == FRONTEND_COMMANDS def test_css_files(self): plan = get_validation_plan(["dashboard/src/app.css"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" - assert plan.commands == FRONTEND_COMMANDS def test_vue_files(self): plan = get_validation_plan(["src/App.vue"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" - assert plan.commands == FRONTEND_COMMANDS - # -- Mixed files -- + # -- Mixed files (TEST_SUITE fullstack) -- def test_mixed_python_and_frontend(self): files = [ "dev-suite/src/api/main.py", "dashboard/src/lib/Widget.svelte", + "tests/test_api.py", ] plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == FULLSTACK_COMMANDS assert "Full-stack" in plan.description def test_mixed_python_and_typescript(self): - files = ["src/server.py", "dashboard/src/routes/+page.ts"] + files = ["src/server.py", "dashboard/src/routes/+page.ts", "tests/test_server.py"] plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == FULLSTACK_COMMANDS - # -- Non-code files -- + def test_mixed_frontend_and_python_no_tests(self): + """CR fix: mixed without tests should use frontend + lint only (no pytest).""" + files = ["src/api.py", "dashboard/src/App.svelte"] + plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE + assert plan.template == "fullstack" + assert plan.commands == FRONTEND_COMMANDS + PYTHON_LINT_COMMANDS + assert "Full-stack" in plan.description + + # -- Non-code files (SKIP) -- def test_non_code_files(self): files = ["data.json", "config.yaml", "schema.sql"] plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.SKIP assert plan.template is None assert plan.commands == [] assert "non-code" in plan.description.lower() def test_markdown_files(self): plan = get_validation_plan(["README.md", "CHANGELOG.md"]) + assert plan.strategy == ValidationStrategy.SKIP assert plan.commands == [] def test_makefile(self): plan = get_validation_plan(["Makefile"]) + assert plan.strategy == ValidationStrategy.SKIP assert plan.commands == [] # -- Edge cases -- def test_empty_files_list(self): plan = get_validation_plan([]) + assert plan.strategy == ValidationStrategy.SKIP assert plan.template is None assert plan.commands == [] assert "skipping" in plan.description.lower() def test_mixed_code_and_non_code(self): - """Non-code files alongside Python should still trigger Python validation.""" + """Non-code files alongside Python without tests -> SCRIPT_EXEC.""" files = ["src/main.py", "config.yaml"] plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "src/main.py" + + def test_mixed_code_and_non_code_with_tests(self): + """Non-code files alongside Python with tests -> TEST_SUITE.""" + files = ["src/main.py", "tests/test_main.py", "config.yaml"] + plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.commands == PYTHON_COMMANDS def test_mixed_frontend_and_non_code(self): """Non-code files alongside frontend should still trigger frontend validation.""" files = ["dashboard/src/App.svelte", "README.md"] plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == FRONTEND_COMMANDS + # -- Script file selection -- + + def test_test_file_presence_triggers_test_suite(self): + """CR fix: renamed -- presence of test file triggers TEST_SUITE, not SCRIPT_EXEC.""" + files = ["src/main.py", "test_main.py"] + plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.TEST_SUITE + + def test_script_file_selects_first_py_file(self): + """CR fix: when multiple non-test .py files exist, script_file is the first one.""" + files = ["src/secondary.py", "src/main.py"] + plan = get_validation_plan(files) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "src/secondary.py" + + def test_script_file_single_file(self): + plan = get_validation_plan(["utils.py"]) + assert plan.script_file == "utils.py" + + # -- Boundary matching: false positive avoidance (CR fix) -- + + def test_latest_utils_not_detected_as_test(self): + """'latest_utils.py' contains 'test_' as substring but is not a test file.""" + plan = get_validation_plan(["src/latest_utils.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + + def test_contest_helper_not_detected_as_test(self): + """'contest_helper.py' contains 'test' as substring but is not a test file.""" + plan = get_validation_plan(["src/contest_helper.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + + def test_attestation_not_detected_as_test(self): + """'attestation.py' contains 'test' but is not a test file.""" + plan = get_validation_plan(["src/attestation.py"]) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + + def test_real_test_prefix_still_detected(self): + """'test_utils.py' should still be detected as a test file.""" + plan = get_validation_plan(["src/main.py", "test_utils.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE + + def test_real_test_suffix_still_detected(self): + """'auth_test.py' should still be detected as a test file.""" + plan = get_validation_plan(["src/auth.py", "auth_test.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE + + def test_tests_directory_still_detected(self): + """Files under tests/ directory should still be detected.""" + plan = get_validation_plan(["src/main.py", "tests/test_main.py"]) + assert plan.strategy == ValidationStrategy.TEST_SUITE + class TestValidationPlan: """Tests for the ValidationPlan dataclass.""" def test_default_values(self): plan = ValidationPlan() + assert plan.strategy == ValidationStrategy.SKIP assert plan.template is None assert plan.commands == [] + assert plan.script_file is None assert plan.description == "" def test_custom_values(self): plan = ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, template="fullstack", commands=["pnpm check"], description="Frontend validation", ) + assert plan.strategy == ValidationStrategy.TEST_SUITE assert plan.template == "fullstack" assert plan.commands == ["pnpm check"] + def test_script_exec_plan(self): + plan = ValidationPlan( + strategy=ValidationStrategy.SCRIPT_EXEC, + script_file="hello.py", + description="Run hello.py", + ) + assert plan.strategy == ValidationStrategy.SCRIPT_EXEC + assert plan.script_file == "hello.py" + assert plan.commands == [] + class TestFormatValidationSummary: """Tests for format_validation_summary().""" - def test_no_commands(self): - plan = ValidationPlan(description="No validation needed") + def test_skip_strategy(self): + plan = ValidationPlan( + strategy=ValidationStrategy.SKIP, + description="No validation needed", + ) result = format_validation_summary(plan) assert result == "No validation needed" - def test_with_commands(self): + def test_script_exec_strategy(self): + plan = ValidationPlan( + strategy=ValidationStrategy.SCRIPT_EXEC, + script_file="hello.py", + description="Script execution: 1 files", + ) + result = format_validation_summary(plan) + assert "Script execution" in result + assert "python hello.py" in result + + def test_test_suite_with_commands(self): plan = ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, description="Python validation: 2 files", commands=["ruff check .", "pytest tests/"], ) @@ -164,6 +320,7 @@ def test_with_commands(self): def test_commands_indented(self): plan = ValidationPlan( + strategy=ValidationStrategy.TEST_SUITE, description="Test", commands=["cmd1"], )