From a7a6a49f1f9a66ff0f63adfc809a1e64c0657492 Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Wed, 11 Mar 2026 12:12:21 -0700 Subject: [PATCH] fix: 3 bugs found via mutation testing + 55 adversarial tests Bugs fixed: - compression: zero token budget and all-deletion-only diffs fell through to Stage 3/4 instead of returning early when no diffs fit the budget - llm_response: JSON parser silently dropped findings when LLMs returned line numbers as strings ("42") instead of integers (42) New tests cover mutation testing gaps and adversarial edge cases across compression, triage, verification, and LLM response parsing. Co-Authored-By: Claude Opus 4.6 --- src/parsing/llm_response.rs | 195 +++++++++++++++++++++++++++++++++- src/review/compression.rs | 201 +++++++++++++++++++++++++++++++++++- src/review/triage.rs | 150 +++++++++++++++++++++++++++ src/review/verification.rs | 106 +++++++++++++++++++ 4 files changed, 646 insertions(+), 6 deletions(-) diff --git a/src/parsing/llm_response.rs b/src/parsing/llm_response.rs index 434f42a..4cf26b1 100644 --- a/src/parsing/llm_response.rs +++ b/src/parsing/llm_response.rs @@ -246,12 +246,15 @@ fn parse_json_format(content: &str, file_path: &Path) -> Vec().ok())) + })?; let text = item .get("issue") .or_else(|| item.get("description")) @@ -703,4 +706,188 @@ let data = &input; assert_eq!(comments.len(), 1); assert!(comments[0].code_suggestion.is_some()); } + + // ── Mutation-testing gap fills ───────────────────────────────────── + + #[test] + fn parse_primary_skips_code_fence_markers() { + // ``` markers themselves are skipped (not parsed as comments) + let input = "```rust\n```"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert!(comments.is_empty()); + } + + #[test] + fn parse_primary_skips_heading_lines() { + let input = "# Code Review\nLine 10: Real issue"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + // Heading line skipped, but real Line 10 comment caught + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 10); + } + + #[test] + fn parse_primary_skips_preamble_lines() { + let input = "Here are the issues I found:\nLine 5: Missing check"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 5); + } + + #[test] + fn parse_json_in_code_block_strategy() { + // Test that extract_json_from_code_block specifically works + let input = "Here are findings:\n```json\n[{\"line\": 7, \"issue\": \"Off by one\"}]\n```"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 7); + assert!(comments[0].content.contains("Off by one")); + } + + #[test] + fn parse_json_bare_array_strategy() { + // Test find_json_array with text before/after + let input = "Issues found: [{\"line\": 3, \"issue\": \"Bug\"}] end."; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 3); + } + + // ── Adversarial edge cases ────────────────────────────────────────── + + #[test] + fn parse_line_zero_not_panicking() { + // Line 0 is technically invalid but should not panic + let input = "Line 0: Edge case at line zero."; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 0); + } + + #[test] + fn parse_huge_line_number_no_overflow() { + let input = "Line 999999999999: Absurd line number."; + let file_path = PathBuf::from("src/lib.rs"); + // Should either parse successfully or return empty, not panic + let result = parse_llm_response(input, &file_path); + assert!(result.is_ok() || result.is_err()); + } + + #[test] + fn parse_unicode_content_no_panic() { + let input = "Line 10: 漏洞 — SQL注入风险 🔴"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert!(comments[0].content.contains("漏洞")); + } + + #[test] + fn parse_numbered_list_with_no_line_number_in_path() { + // Numbered list where file path is missing the colon-number + let input = "1. **src/lib.rs** - Missing null check"; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + // Should not extract a comment with a bogus line number + for c in &comments { + assert!(c.line_number > 0 || comments.is_empty()); + } + } + + #[test] + fn parse_json_with_nested_brackets() { + // JSON with nested arrays/objects should not confuse the bracket finder + let input = + r#"[{"line": 10, "issue": "Bug with [array] access", "details": {"nested": [1,2,3]}}]"#; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 10); + } + + #[test] + fn parse_json_with_line_number_as_string() { + // Some LLMs return line numbers as strings — should be handled + let input = r#"[{"line": "42", "issue": "Bug"}]"#; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].line_number, 42); + } + + #[test] + fn parse_malformed_json_no_panic() { + let input = r#"[{"line": 10, "issue": "unclosed string}]"#; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + assert!(comments.is_empty()); + } + + #[test] + fn parse_mixed_strategies_first_wins() { + // Input matches both primary AND numbered list — primary should win + let input = "Line 10: Primary format.\n1. **src/lib.rs:20** - Numbered format."; + let file_path = PathBuf::from("src/lib.rs"); + let comments = parse_llm_response(input, &file_path).unwrap(); + // Primary parser should have caught Line 10, so we get that + assert_eq!(comments[0].line_number, 10); + } + + #[test] + fn parse_code_suggestion_without_preceding_comment() { + // <<>>SUGGESTED + let input = "Line 5: Issue here.\n<<>() + ); + } } diff --git a/src/review/compression.rs b/src/review/compression.rs index dcaf604..1c07a8e 100644 --- a/src/review/compression.rs +++ b/src/review/compression.rs @@ -234,7 +234,19 @@ pub fn compress_diffs( } } - if skipped.is_empty() && !included.is_empty() { + if included.is_empty() { + // Nothing fit after compression (all deletion-only or budget exhausted) + skipped.sort(); + let summary = build_skipped_summary(diffs, &skipped); + return CompressionResult { + strategy: CompressionStrategy::Compressed, + batches: vec![], + skipped_indices: skipped, + skipped_summary: summary, + }; + } + + if skipped.is_empty() { // Everything fit after compression included.sort(); return CompressionResult { @@ -248,7 +260,7 @@ pub fn compress_diffs( }; } - if !skipped.is_empty() && !included.is_empty() && max_calls <= 1 { + if max_calls <= 1 { // Can't split further — return compressed result with skipped files included.sort(); skipped.sort(); @@ -698,4 +710,189 @@ mod tests { "File should be either clipped or skipped" ); } + + // ── Mutation-testing gap fills ───────────────────────────────────── + + #[test] + fn test_estimate_tokens_includes_overhead() { + // Even an empty hunk should have overhead from file path + hunk header + let diff = make_diff("a.rs", vec![make_hunk(vec![])]); + let tokens = estimate_diff_tokens(&diff); + assert!(tokens > 0, "Empty hunk should still have overhead tokens"); + } + + #[test] + fn test_estimate_tokens_content_proportional() { + // Adding more content should always increase token count + let small = make_simple_diff("a.rs", 100); + let medium = make_simple_diff("a.rs", 500); + let large = make_simple_diff("a.rs", 2000); + let t_small = estimate_diff_tokens(&small); + let t_medium = estimate_diff_tokens(&medium); + let t_large = estimate_diff_tokens(&large); + assert!( + t_small < t_medium, + "small={} >= medium={}", + t_small, + t_medium + ); + assert!( + t_medium < t_large, + "medium={} >= large={}", + t_medium, + t_large + ); + } + + #[test] + fn test_clip_diff_reduces_size() { + let diff = make_diff( + "file.rs", + vec![ + make_hunk(vec![make_line(ChangeType::Added, &"a".repeat(500))]), + make_hunk(vec![make_line(ChangeType::Added, &"b".repeat(500))]), + make_hunk(vec![make_line(ChangeType::Added, &"c".repeat(500))]), + ], + ); + let original_tokens = estimate_diff_tokens(&diff); + let clipped = clip_diff(&diff, 600).unwrap(); // budget fits ~1 hunk + let clipped_tokens = estimate_diff_tokens(&clipped); + assert!( + clipped_tokens < original_tokens, + "Clipped ({}) should be smaller than original ({})", + clipped_tokens, + original_tokens + ); + } + + #[test] + fn test_compress_stage2_budget_accounting() { + // Verify that included files actually fit within the budget + let diffs = vec![ + make_simple_diff("small.rs", 200), + make_simple_diff("medium.rs", 500), + make_simple_diff("large.rs", 2000), + ]; + let budget = estimate_diff_tokens(&diffs[0]) + estimate_diff_tokens(&diffs[1]) + 10; + let result = compress_diffs(&diffs, budget, 1); + for batch in &result.batches { + assert!( + batch.estimated_tokens <= budget, + "Batch tokens {} exceeds budget {}", + batch.estimated_tokens, + budget + ); + } + } + + #[test] + fn test_skipped_summary_out_of_bounds_index() { + let diffs = vec![make_simple_diff("a.rs", 100)]; + // Index 99 is out of bounds — should not panic + let summary = build_skipped_summary(&diffs, &[99]); + // Out-of-bounds index is silently ignored + assert!(summary.is_empty()); + } + + // ── Adversarial edge cases ────────────────────────────────────────── + + #[test] + fn test_zero_token_budget() { + let diffs = vec![make_simple_diff("a.rs", 100)]; + // Budget of 0 — should not panic, everything skipped or clipped + let result = compress_diffs(&diffs, 0, 5); + // With 0 budget nothing should fit in Full/Compressed, but the algorithm + // should still produce some output without panicking + assert!( + result.batches.is_empty() || result.batches.iter().all(|b| b.diff_indices.is_empty()), + "Nothing should fit in zero budget" + ); + } + + #[test] + fn test_max_calls_zero_treated_as_one() { + let diffs = vec![make_simple_diff("a.rs", 100)]; + // max_calls=0 should be clamped to 1, not panic + let result = compress_diffs(&diffs, 10000, 0); + assert_eq!(result.strategy, CompressionStrategy::Full); + } + + #[test] + fn test_diff_with_empty_hunks() { + let diff = make_diff("empty.rs", vec![]); + let tokens = estimate_diff_tokens(&diff); + assert!(tokens < 50, "Empty diff should have minimal tokens"); + let result = compress_diffs(&[diff], 10000, 5); + assert_eq!(result.strategy, CompressionStrategy::Full); + } + + #[test] + fn test_all_diffs_deletion_only_compressed_away() { + // Every diff has only deletion hunks — compress_diff returns None for all. + // Use a budget smaller than total tokens to force Stage 2 (where compression happens). + let diffs = vec![ + make_diff( + "a.rs", + vec![make_hunk(vec![make_line( + ChangeType::Removed, + &"x".repeat(200), + )])], + ), + make_diff( + "b.rs", + vec![make_hunk(vec![make_line( + ChangeType::Removed, + &"y".repeat(200), + )])], + ), + ]; + let total = diffs.iter().map(|d| estimate_diff_tokens(d)).sum::(); + // Budget smaller than total forces Stage 2, where deletion-only hunks get removed + let result = compress_diffs(&diffs, total / 2, 5); + // All files should be skipped since they're deletion-only after compression + assert_eq!( + result.skipped_indices.len(), + 2, + "Both deletion-only diffs should be skipped, got {:?}", + result + ); + } + + #[test] + fn test_no_duplicate_indices_across_batches() { + let diffs: Vec<_> = (0..8) + .map(|i| make_simple_diff(&format!("f{}.rs", i), 2000)) + .collect(); + let single_tokens = estimate_diff_tokens(&diffs[0]); + let result = compress_diffs(&diffs, single_tokens * 2, 4); + + let mut seen = std::collections::HashSet::new(); + for batch in &result.batches { + for &idx in &batch.diff_indices { + assert!(seen.insert(idx), "Duplicate index {} across batches", idx); + } + } + for &idx in &result.skipped_indices { + assert!( + seen.insert(idx), + "Index {} appears in both batches and skipped", + idx + ); + } + } + + #[test] + fn test_budget_of_one_token() { + let diffs = vec![make_simple_diff("a.rs", 100), make_simple_diff("b.rs", 100)]; + // Budget so tiny nothing should fit via compressed + let result = compress_diffs(&diffs, 1, 1); + // Should not panic; files end up skipped or in a clipped batch + let total_accounted = result + .batches + .iter() + .map(|b| b.diff_indices.len()) + .sum::() + + result.skipped_indices.len(); + assert_eq!(total_accounted, 2, "All files must be accounted for"); + } } diff --git a/src/review/triage.rs b/src/review/triage.rs index 605c7d3..c6004c3 100644 --- a/src/review/triage.rs +++ b/src/review/triage.rs @@ -812,4 +812,154 @@ mod tests { }); assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); } + + // ── TriageResult method tests (caught by mutation testing) ───────── + + #[test] + fn test_should_skip_returns_true_for_skip_variants() { + assert!(TriageResult::SkipLockFile.should_skip()); + assert!(TriageResult::SkipWhitespaceOnly.should_skip()); + assert!(TriageResult::SkipDeletionOnly.should_skip()); + assert!(TriageResult::SkipGenerated.should_skip()); + assert!(TriageResult::SkipCommentOnly.should_skip()); + } + + #[test] + fn test_should_skip_returns_false_for_needs_review() { + assert!(!TriageResult::NeedsReview.should_skip()); + } + + #[test] + fn test_reason_returns_nonempty_strings() { + assert!(!TriageResult::NeedsReview.reason().is_empty()); + assert!(!TriageResult::SkipLockFile.reason().is_empty()); + assert!(!TriageResult::SkipWhitespaceOnly.reason().is_empty()); + assert!(!TriageResult::SkipDeletionOnly.reason().is_empty()); + assert!(!TriageResult::SkipGenerated.reason().is_empty()); + assert!(!TriageResult::SkipCommentOnly.reason().is_empty()); + } + + #[test] + fn test_reason_differs_per_variant() { + let reasons: Vec<&str> = vec![ + TriageResult::NeedsReview.reason(), + TriageResult::SkipLockFile.reason(), + TriageResult::SkipWhitespaceOnly.reason(), + TriageResult::SkipDeletionOnly.reason(), + TriageResult::SkipGenerated.reason(), + TriageResult::SkipCommentOnly.reason(), + ]; + // All should be unique + let unique: std::collections::HashSet<&str> = reasons.iter().copied().collect(); + assert_eq!( + unique.len(), + reasons.len(), + "Reason strings should be unique" + ); + } + + // ── Adversarial edge cases ────────────────────────────────────────── + + #[test] + fn test_whitespace_only_unequal_line_counts() { + // More added lines than removed — not whitespace-only + let diff = make_diff( + "src/lib.rs", + vec![ + make_line(1, ChangeType::Removed, " let x = 1;"), + make_line(1, ChangeType::Added, " let x = 1;"), + make_line(2, ChangeType::Added, " let y = 2;"), + ], + ); + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_whitespace_only_content_differs() { + // Same whitespace but different content — not whitespace-only + let diff = make_diff( + "src/lib.rs", + vec![ + make_line(1, ChangeType::Removed, "let x = 1;"), + make_line(1, ChangeType::Added, "let x = 2;"), + ], + ); + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_comment_hash_without_space_is_not_comment() { + // "#tag" — not a comment prefix since we changed to "# " + let diff = make_diff( + "src/lib.rs", + vec![make_line(1, ChangeType::Added, "#tag: important")], + ); + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_python_comment_with_space_is_comment() { + let diff = make_diff( + "src/app.py", + vec![make_line( + 1, + ChangeType::Added, + "# this is a python comment", + )], + ); + assert_eq!(triage_diff(&diff), TriageResult::SkipCommentOnly); + } + + #[test] + fn test_unicode_file_path() { + let diff = make_diff( + "src/données/模型.rs", + vec![make_line(1, ChangeType::Added, "let x = 1;")], + ); + // Should not panic, should be NeedsReview + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_file_with_only_context_lines_and_no_changes() { + let diff = make_diff( + "src/lib.rs", + vec![ + make_line(1, ChangeType::Context, "fn main() {}"), + make_line(2, ChangeType::Context, "fn helper() {}"), + ], + ); + // All context, no actual changes → NeedsReview (default) + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_generated_file_with_dots_in_path() { + // .g. should match but .git/ should not + let diff = make_diff( + ".github/workflows/ci.yml", + vec![make_line(1, ChangeType::Added, "name: CI")], + ); + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_empty_string_file_path() { + let diff = make_diff("", vec![make_line(1, ChangeType::Added, "new code")]); + // Should not panic + assert_eq!(triage_diff(&diff), TriageResult::NeedsReview); + } + + #[test] + fn test_all_blank_lines_is_comment_only() { + // Blank lines count as "comment" in is_comment_line + let diff = make_diff( + "src/lib.rs", + vec![ + make_line(1, ChangeType::Added, ""), + make_line(2, ChangeType::Added, " "), + ], + ); + assert_eq!(triage_diff(&diff), TriageResult::SkipCommentOnly); + } } diff --git a/src/review/verification.rs b/src/review/verification.rs index bdaaad9..78ee7ad 100644 --- a/src/review/verification.rs +++ b/src/review/verification.rs @@ -390,4 +390,110 @@ mod tests { fn test_is_auto_zero_import_order() { assert!(is_auto_zero("import order should be alphabetical")); } + + // ── Mutation-testing gap fills ───────────────────────────────────── + + #[test] + fn test_safe_utf8_prefix_short_string() { + let result = safe_utf8_prefix("hello", 100); + assert_eq!(result, "hello"); + } + + #[test] + fn test_safe_utf8_prefix_exact_boundary() { + let result = safe_utf8_prefix("hello", 5); + assert_eq!(result, "hello"); + } + + #[test] + fn test_safe_utf8_prefix_truncates() { + let result = safe_utf8_prefix("hello world", 5); + assert_eq!(result, "hello"); + } + + #[test] + fn test_safe_utf8_prefix_multibyte() { + // "é" is 2 bytes. "éé" = 4 bytes. Truncating at 3 should give "é" (2 bytes). + let result = safe_utf8_prefix("éé", 3); + assert_eq!(result, "é"); + } + + #[test] + fn test_safe_utf8_prefix_emoji() { + // "😀" is 4 bytes. Truncating at 2 should give empty since we can't split the emoji. + let result = safe_utf8_prefix("😀hello", 2); + assert!(result.is_empty() || result.len() <= 2); + } + + #[test] + fn test_safe_utf8_prefix_empty() { + let result = safe_utf8_prefix("", 100); + assert_eq!(result, ""); + } + + // ── Adversarial edge cases ────────────────────────────────────────── + + #[test] + fn test_parse_verification_response_duplicate_findings() { + // LLM returns two results for the same finding + let comments = vec![make_comment("c1", "issue", 10)]; + let response = "FINDING 1: score=9 accurate=true reason=First\nFINDING 1: score=3 accurate=false reason=Second"; + let results = parse_verification_response(response, &comments); + // Both should be captured (first one wins in filter since find() returns first) + let c1_results: Vec<_> = results.iter().filter(|r| r.comment_id == "c1").collect(); + assert!( + c1_results.len() >= 1, + "Should have at least one result for c1" + ); + } + + #[test] + fn test_parse_verification_extra_whitespace() { + let comments = vec![make_comment("c1", "issue", 10)]; + let response = "FINDING 1 : score = 8 accurate = true reason = Valid bug"; + let results = parse_verification_response(response, &comments); + // The regex uses \s+ so extra spaces should be handled + assert_eq!(results.len(), 1); + assert_eq!(results[0].score, 8); + } + + #[test] + fn test_parse_verification_response_with_surrounding_text() { + let comments = vec![make_comment("c1", "issue", 10)]; + let response = "Here are my verification results:\n\nFINDING 1: score=7 accurate=true reason=Valid\n\nOverall the code looks good."; + let results = parse_verification_response(response, &comments); + assert_eq!(results.len(), 1); + assert_eq!(results[0].score, 7); + } + + #[test] + fn test_is_auto_zero_case_sensitivity() { + // Auto-zero should be case-insensitive + assert!(is_auto_zero("MISSING DOCSTRING")); + assert!(is_auto_zero("Type Annotation missing")); + assert!(is_auto_zero("IMPORT ORDER")); + } + + #[test] + fn test_is_auto_zero_partial_match_false_positive() { + // "import" appears in "important" but "import order" does not + assert!(!is_auto_zero("This is an important security fix")); + // "type hint" appears in "cryptotype hinting" — substring match + // This IS a known limitation of substring matching + assert!(!is_auto_zero("The cryptographic module is broken")); + } + + #[test] + fn test_build_verification_prompt_empty_comments() { + let prompt = build_verification_prompt(&[], "some diff"); + assert!(prompt.contains("## Code Diff")); + assert!(prompt.contains("## Findings to Verify")); + } + + #[test] + fn test_build_verification_prompt_empty_diff() { + let comments = vec![make_comment("c1", "issue", 10)]; + let prompt = build_verification_prompt(&comments, ""); + assert!(prompt.contains("Finding 1")); + } }