diff --git a/src/adapters/anthropic.rs b/src/adapters/anthropic.rs index a74c3dc..1a65fde 100644 --- a/src/adapters/anthropic.rs +++ b/src/adapters/anthropic.rs @@ -121,14 +121,18 @@ impl LLMAdapter for AnthropicAdapter { .content .first() .map(|c| { - // Verify it's a text content type if c.content_type == "text" { - c.text.clone() + Ok(c.text.clone()) } else { - format!("Unsupported content type: {}", c.content_type) + Err(anyhow::anyhow!( + "Unsupported content type: {}", + c.content_type + )) } }) - .unwrap_or_default(); + .ok_or_else(|| { + anyhow::anyhow!("Anthropic returned empty content array — no content generated") + })??; Ok(LLMResponse { content, @@ -292,7 +296,7 @@ mod tests { } #[tokio::test] - async fn test_unsupported_content_type() { + async fn test_unsupported_content_type_returns_error() { let mut server = mockito::Server::new_async().await; let _mock = server .mock("POST", "/messages") @@ -311,17 +315,20 @@ mod tests { let adapter = AnthropicAdapter::new(test_config(&server.url())).unwrap(); let result = adapter.complete(test_request()).await; - assert!(result.is_ok()); - let response = result.unwrap(); assert!( - response.content.contains("Unsupported content type"), - "Expected 'Unsupported content type' in response, got: {}", - response.content + result.is_err(), + "Unsupported content type should return an error" + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("Unsupported content type"), + "Error should mention unsupported type, got: {}", + err ); } #[tokio::test] - async fn test_empty_content_array() { + async fn test_empty_content_array_returns_error() { let mut server = mockito::Server::new_async().await; let _mock = server .mock("POST", "/messages") @@ -340,9 +347,16 @@ mod tests { let adapter = AnthropicAdapter::new(test_config(&server.url())).unwrap(); let result = adapter.complete(test_request()).await; - assert!(result.is_ok()); - let response = result.unwrap(); - assert_eq!(response.content, ""); + assert!( + result.is_err(), + "Empty content array should return an error, not silently succeed" + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("empty content"), + "Error should mention empty content: {}", + err + ); } #[test] diff --git a/src/adapters/openai.rs b/src/adapters/openai.rs index 927c593..8b6495a 100644 --- a/src/adapters/openai.rs +++ b/src/adapters/openai.rs @@ -196,7 +196,9 @@ impl OpenAIAdapter { .choices .first() .map(|c| c.message.content.clone()) - .unwrap_or_default(); + .ok_or_else(|| { + anyhow::anyhow!("OpenAI returned empty choices array — no content generated") + })?; Ok(LLMResponse { content, @@ -416,7 +418,7 @@ mod tests { } #[tokio::test] - async fn test_empty_choices_returns_empty_content() { + async fn test_empty_choices_returns_error() { let mut server = mockito::Server::new_async().await; let _mock = server .mock("POST", "/chat/completions") @@ -435,9 +437,16 @@ mod tests { let adapter = OpenAIAdapter::new(test_config(&server.url())).unwrap(); let result = adapter.complete(test_request()).await; - assert!(result.is_ok()); - let response = result.unwrap(); - assert_eq!(response.content, ""); + assert!( + result.is_err(), + "Empty choices should return an error, not silently succeed" + ); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("empty choices"), + "Error should mention empty choices: {}", + err + ); } #[test] diff --git a/src/config.rs b/src/config.rs index f63ea9e..18c9217 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1061,7 +1061,7 @@ impl Config { } fn default_model() -> String { - "claude-sonnet-4-6".to_string() + "claude-opus-4-6".to_string() } fn default_temperature() -> f32 { @@ -1764,4 +1764,16 @@ temperature: 0.3 assert!(config.model_embedding.is_none()); assert!(config.fallback_models.is_empty()); } + + #[test] + fn test_default_model_is_frontier() { + // Per CLAUDE.md: "Always use frontier models (Opus) for AI-powered features + // — never default to Sonnet or smaller" + let model = default_model(); + assert!( + model.contains("opus"), + "Default model should be Opus (frontier), got: {}", + model + ); + } } diff --git a/src/core/comment.rs b/src/core/comment.rs index 74ef90c..205233c 100644 --- a/src/core/comment.rs +++ b/src/core/comment.rs @@ -449,12 +449,23 @@ impl CommentSynthesizer { } fn deduplicate_comments(comments: &mut Vec) { + let severity_rank = |s: &Severity| match s { + Severity::Error => 0, + Severity::Warning => 1, + Severity::Info => 2, + Severity::Suggestion => 3, + }; + + // Sort by file/line/content, then by severity (highest first) comments.sort_by(|a, b| { a.file_path .cmp(&b.file_path) .then(a.line_number.cmp(&b.line_number)) .then(a.content.cmp(&b.content)) + .then(severity_rank(&a.severity).cmp(&severity_rank(&b.severity))) }); + // dedup_by keeps the first element (b) of consecutive duplicates, + // which is the highest severity due to our sort order comments.dedup_by(|a, b| { a.file_path == b.file_path && a.line_number == b.line_number && a.content == b.content }); @@ -548,6 +559,49 @@ pub struct RawComment { mod tests { use super::*; + #[test] + fn test_deduplicate_preserves_highest_severity() { + // Regression: dedup_by keeps the first element of a consecutive pair, + // but doesn't consider severity. If Warning comes before Error + // (due to stable sort on file/line/content), the Error is dropped. + let raw_comments = vec![ + RawComment { + file_path: PathBuf::from("src/lib.rs"), + line_number: 10, + content: "Missing null check".to_string(), + rule_id: None, + suggestion: None, + severity: Some(Severity::Warning), + category: Some(Category::Bug), + confidence: Some(0.8), + fix_effort: None, + tags: Vec::new(), + code_suggestion: None, + }, + RawComment { + file_path: PathBuf::from("src/lib.rs"), + line_number: 10, + content: "Missing null check".to_string(), + rule_id: None, + suggestion: None, + severity: Some(Severity::Error), + category: Some(Category::Bug), + confidence: Some(0.9), + fix_effort: None, + tags: Vec::new(), + code_suggestion: None, + }, + ]; + + let comments = CommentSynthesizer::synthesize(raw_comments).unwrap(); + assert_eq!(comments.len(), 1, "Should deduplicate to one comment"); + assert_eq!( + comments[0].severity, + Severity::Error, + "Should keep the higher severity (Error), not the lower (Warning)" + ); + } + #[test] fn test_severity_display() { assert_eq!(Severity::Error.to_string(), "Error"); diff --git a/src/core/composable_pipeline.rs b/src/core/composable_pipeline.rs index 0735245..322fe07 100644 --- a/src/core/composable_pipeline.rs +++ b/src/core/composable_pipeline.rs @@ -211,12 +211,21 @@ impl PipelineStage for DeduplicateStage { } fn execute(&self, ctx: &mut PipelineContext) -> Result<()> { + use crate::core::comment::Severity; + let severity_rank = |s: &Severity| match s { + Severity::Error => 0, + Severity::Warning => 1, + Severity::Info => 2, + Severity::Suggestion => 3, + }; ctx.comments.sort_by(|a, b| { a.file_path .cmp(&b.file_path) .then(a.line_number.cmp(&b.line_number)) .then(a.content.cmp(&b.content)) + .then(severity_rank(&a.severity).cmp(&severity_rank(&b.severity))) }); + // dedup_by keeps b (the earlier element); our sort puts highest severity first ctx.comments.dedup_by(|a, b| { a.file_path == b.file_path && a.line_number == b.line_number && a.content == b.content }); @@ -688,6 +697,28 @@ mod tests { assert_eq!(ctx.comments.len(), 2); } + // Regression: DeduplicateStage must keep the highest severity when deduplicating + #[test] + fn test_deduplicate_preserves_highest_severity() { + let mut ctx = PipelineContext::new(); + let mut info = make_comment("test.rs", 10, "duplicate issue", 0.7); + info.severity = Severity::Info; + let mut error = make_comment("test.rs", 10, "duplicate issue", 0.7); + error.severity = Severity::Error; + // Insert lower severity first to ensure sort fixes order + ctx.comments.push(info); + ctx.comments.push(error); + + let stage = DeduplicateStage; + stage.execute(&mut ctx).unwrap(); + assert_eq!(ctx.comments.len(), 1); + assert_eq!( + ctx.comments[0].severity, + Severity::Error, + "DeduplicateStage should keep the highest severity" + ); + } + // Regression: SortBySeverityStage must sort Error > Warning > Info #[test] fn test_sort_by_severity_order() { diff --git a/src/core/context.rs b/src/core/context.rs index faea834..62ab295 100644 --- a/src/core/context.rs +++ b/src/core/context.rs @@ -15,7 +15,7 @@ pub struct LLMContextChunk { pub line_range: Option<(usize, usize)>, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub enum ContextType { FileContent, Definition, diff --git a/src/core/diff_parser.rs b/src/core/diff_parser.rs index c461663..cb87b98 100644 --- a/src/core/diff_parser.rs +++ b/src/core/diff_parser.rs @@ -181,8 +181,8 @@ impl DiffParser { new_content: Some(new_content.to_string()), hunks, is_binary: false, - is_deleted: false, - is_new: false, + is_deleted: new_content.is_empty() && !old_content.is_empty(), + is_new: old_content.is_empty() && !new_content.is_empty(), }) } @@ -368,16 +368,17 @@ impl DiffParser { *i += 1; continue; } - if line.is_empty() { - *i += 1; - continue; - } - - let (change_type, content) = match line.chars().next() { - Some('+') => (ChangeType::Added, &line[1..]), - Some('-') => (ChangeType::Removed, &line[1..]), - Some(' ') => (ChangeType::Context, &line[1..]), - _ => (ChangeType::Context, line), + let (change_type, content) = if line.is_empty() { + // Some diff tools emit truly empty lines for empty context lines + // (omitting the leading space). Treat as context to keep line numbers in sync. + (ChangeType::Context, "") + } else { + match line.chars().next() { + Some('+') => (ChangeType::Added, &line[1..]), + Some('-') => (ChangeType::Removed, &line[1..]), + Some(' ') => (ChangeType::Context, &line[1..]), + _ => (ChangeType::Context, line), + } }; let diff_line = match change_type { @@ -540,4 +541,62 @@ index 0000000..f735c20\n\ assert!(diffs[0].is_new); assert!(!diffs[0].is_deleted); } + + #[test] + fn test_parse_hunk_empty_lines_not_skipped() { + // Regression: empty lines in diff body were previously skipped. + // An empty line (no leading space) in some diff tools represents an empty + // context line. The parser should treat it as context, not skip it. + let diff_text = "\ +diff --git a/test.txt b/test.txt\n\ +index abc..def 100644\n\ +--- a/test.txt\n\ ++++ b/test.txt\n\ +@@ -1,4 +1,4 @@\n\ + line1\n\ +\n\ +-old_line3\n\ ++new_line3\n"; + // The empty line (between "line1" and "-old_line3") should be treated as + // context line 2. Without it, line numbers after the empty line are wrong. + let diffs = DiffParser::parse_unified_diff(diff_text).unwrap(); + assert_eq!(diffs.len(), 1); + let hunk = &diffs[0].hunks[0]; + + // Find the removed line — it should be on line 3, not line 2 + let removed = hunk + .changes + .iter() + .find(|c| c.change_type == ChangeType::Removed) + .expect("Should have a removed line"); + assert_eq!( + removed.old_line_no, + Some(3), + "Removed line should be on old line 3 (after the empty context line 2), got {:?}", + removed.old_line_no + ); + } + + #[test] + fn test_parse_text_diff_sets_is_new_for_new_file() { + // Regression: parse_text_diff must set is_new when old_content is empty + let diff = + DiffParser::parse_text_diff("", "new content\n", PathBuf::from("new_file.rs")).unwrap(); + assert!( + diff.is_new, + "parse_text_diff with empty old content should set is_new=true" + ); + } + + #[test] + fn test_parse_text_diff_sets_is_deleted_for_deleted_file() { + // Regression: parse_text_diff must set is_deleted when new_content is empty + let diff = + DiffParser::parse_text_diff("old content\n", "", PathBuf::from("deleted_file.rs")) + .unwrap(); + assert!( + diff.is_deleted, + "parse_text_diff with empty new content should set is_deleted=true" + ); + } } diff --git a/src/core/function_chunker.rs b/src/core/function_chunker.rs index 7f72a86..6c4c41e 100644 --- a/src/core/function_chunker.rs +++ b/src/core/function_chunker.rs @@ -255,16 +255,19 @@ fn find_function_end(lines: &[&str], start: usize, language: &str) -> usize { let mut depth = 0i32; let mut found_open = false; for (i, line) in lines.iter().enumerate().skip(start) { - let mut in_string = false; + let mut in_double_quote = false; + let mut in_single_quote = false; let mut escaped = false; for ch in line.chars() { if escaped { escaped = false; - } else if in_string && ch == '\\' { + } else if (in_double_quote || in_single_quote) && ch == '\\' { escaped = true; - } else if ch == '"' { - in_string = !in_string; - } else if !in_string { + } else if ch == '"' && !in_single_quote { + in_double_quote = !in_double_quote; + } else if ch == '\'' && !in_double_quote && language != "rs" { + in_single_quote = !in_single_quote; + } else if !in_double_quote && !in_single_quote { if ch == '{' { depth += 1; found_open = true; @@ -911,6 +914,44 @@ fn next_func() { assert_eq!(result, Some(2)); } + #[test] + fn test_find_function_end_single_quoted_string_with_brace() { + // Regression: single-quoted strings must be tracked so `'{'` isn't a real brace + let lines: Vec<&str> = vec![ + "function foo() {", // line 0: depth = 1 + " let s = '{';", // line 1: depth should stay 1, but goes to 2 + " console.log(s);", // line 2 + "}", // line 3: depth goes to 1 (not 0!), so foo "never ends" + "", // line 4 + "function bar() {", // line 5: depth goes to 2 + " return 1;", // line 6 + "}", // line 7: depth goes to 1, and foo is "found" here (wrong!) + ]; + let end = find_function_end(&lines, 0, "js"); + assert_eq!( + end, 3, + "foo() should end at line 3, not bleed into bar() due to untracked single-quoted brace" + ); + } + + #[test] + fn test_find_function_end_rust_lifetime_not_string() { + // Regression: Rust lifetime annotations ('a, 'static) must not toggle + // in_single_quote, which would cause the opening brace to be ignored. + let lines: Vec<&str> = vec![ + "pub fn new(name: &'static str) -> Self {", // line 0: has lifetime ' + " Self { name }", // line 1 + "}", // line 2 + "", + "fn other() {", // line 4 + ]; + let end = find_function_end(&lines, 0, "rs"); + assert_eq!( + end, 2, + "Rust lifetime annotation should not break brace tracking" + ); + } + #[test] fn test_find_function_end_escaped_backslash_in_string() { // A string containing "\\" (escaped backslash) on the same line as braces. diff --git a/src/core/interactive.rs b/src/core/interactive.rs index 280c16e..4e3595a 100644 --- a/src/core/interactive.rs +++ b/src/core/interactive.rs @@ -246,6 +246,188 @@ Interactive commands respect these configurations."# } } +#[cfg(test)] +mod tests { + use super::*; + + // === InteractiveCommand::parse tests === + + #[test] + fn test_parse_review_command() { + let cmd = InteractiveCommand::parse("@diffscope review").unwrap(); + assert_eq!(cmd.command, CommandType::Review); + assert!(cmd.args.is_empty()); + assert!(cmd.context.is_none()); + } + + #[test] + fn test_parse_review_with_focus() { + let cmd = InteractiveCommand::parse("@diffscope review security performance").unwrap(); + assert_eq!(cmd.command, CommandType::Review); + assert_eq!(cmd.args, vec!["security", "performance"]); + } + + #[test] + fn test_parse_ignore_command() { + let cmd = InteractiveCommand::parse("@diffscope ignore src/generated/").unwrap(); + assert_eq!(cmd.command, CommandType::Ignore); + assert_eq!(cmd.args, vec!["src/generated/"]); + } + + #[test] + fn test_parse_explain_command() { + let cmd = InteractiveCommand::parse("@diffscope explain").unwrap(); + assert_eq!(cmd.command, CommandType::Explain); + } + + #[test] + fn test_parse_generate_tests() { + let cmd = InteractiveCommand::parse("@diffscope generate tests").unwrap(); + assert_eq!(cmd.command, CommandType::Generate); + assert_eq!(cmd.args, vec!["tests"]); + } + + #[test] + fn test_parse_help() { + let cmd = InteractiveCommand::parse("@diffscope help").unwrap(); + assert_eq!(cmd.command, CommandType::Help); + } + + #[test] + fn test_parse_config() { + let cmd = InteractiveCommand::parse("@diffscope config").unwrap(); + assert_eq!(cmd.command, CommandType::Config); + } + + #[test] + fn test_parse_case_insensitive() { + let cmd = InteractiveCommand::parse("@diffscope REVIEW").unwrap(); + assert_eq!(cmd.command, CommandType::Review); + + let cmd = InteractiveCommand::parse("@diffscope Help").unwrap(); + assert_eq!(cmd.command, CommandType::Help); + } + + #[test] + fn test_parse_unknown_command_returns_none() { + assert!(InteractiveCommand::parse("@diffscope foobar").is_none()); + } + + #[test] + fn test_parse_no_at_mention_returns_none() { + assert!(InteractiveCommand::parse("just a regular comment").is_none()); + } + + #[test] + fn test_parse_empty_string_returns_none() { + assert!(InteractiveCommand::parse("").is_none()); + } + + #[test] + fn test_parse_at_mention_only_returns_none() { + assert!(InteractiveCommand::parse("@diffscope").is_none()); + } + + #[test] + fn test_parse_embedded_in_text() { + // Command embedded in longer comment + let cmd = + InteractiveCommand::parse("Hey team, can you @diffscope review this PR?").unwrap(); + assert_eq!(cmd.command, CommandType::Review); + } + + #[test] + fn test_parse_extra_whitespace() { + let cmd = InteractiveCommand::parse("@diffscope review security").unwrap(); + assert_eq!(cmd.command, CommandType::Review); + assert_eq!(cmd.args, vec!["security"]); + } + + #[test] + fn test_parse_different_at_mention_ignored() { + assert!(InteractiveCommand::parse("@someone review").is_none()); + } + + // === execute_ignore tests === + + #[test] + fn test_ignore_no_args() { + let cmd = InteractiveCommand { + command: CommandType::Ignore, + args: vec![], + context: None, + }; + let result = cmd.execute_ignore().unwrap(); + assert!(result.contains("specify what to ignore")); + } + + #[test] + fn test_ignore_with_patterns() { + let cmd = InteractiveCommand { + command: CommandType::Ignore, + args: vec!["src/generated/".to_string(), "*.test.js".to_string()], + context: None, + }; + let result = cmd.execute_ignore().unwrap(); + assert!(result.contains("src/generated/")); + assert!(result.contains("*.test.js")); + } + + // === help_text / config tests === + + #[test] + fn test_help_text_contains_commands() { + let help = InteractiveCommand::help_text(); + assert!(help.contains("@diffscope review")); + assert!(help.contains("@diffscope ignore")); + assert!(help.contains("@diffscope explain")); + assert!(help.contains("@diffscope generate")); + assert!(help.contains("@diffscope help")); + assert!(help.contains("@diffscope config")); + } + + #[test] + fn test_config_info_contains_yaml() { + let info = InteractiveCommand::get_config_info(); + assert!(info.contains(".diffscope.yml")); + assert!(info.contains("exclude_patterns")); + } + + // === InteractiveProcessor tests === + + #[test] + fn test_processor_new_empty() { + let processor = InteractiveProcessor::new(); + assert!(!processor.should_ignore("any/path")); + } + + #[test] + fn test_processor_add_and_check_pattern() { + let mut processor = InteractiveProcessor::new(); + processor.add_ignore_pattern("src/generated/"); + assert!(processor.should_ignore("src/generated/types.rs")); + assert!(!processor.should_ignore("src/main.rs")); + } + + #[test] + fn test_processor_glob_pattern() { + let mut processor = InteractiveProcessor::new(); + processor.add_ignore_pattern("*.test.js"); + assert!(processor.should_ignore("foo.test.js")); + assert!(!processor.should_ignore("foo.js")); + } + + #[test] + fn test_processor_multiple_patterns() { + let mut processor = InteractiveProcessor::new(); + processor.add_ignore_pattern("vendor/"); + processor.add_ignore_pattern("*.generated.*"); + assert!(processor.should_ignore("vendor/lib.js")); + assert!(processor.should_ignore("types.generated.ts")); + assert!(!processor.should_ignore("src/main.rs")); + } +} + /// Manages per-session ignore patterns from @diffscope ignore commands. /// Will be wired into the review pipeline's triage filter. #[allow(dead_code)] diff --git a/src/main.rs b/src/main.rs index 3405cb1..b981969 100644 --- a/src/main.rs +++ b/src/main.rs @@ -304,32 +304,44 @@ async fn main() -> Result<()> { let _otel_guard: Option = { let otel_enabled = std::env::var("OTEL_EXPORTER_OTLP_ENDPOINT").is_ok(); if otel_enabled { - let exporter = opentelemetry_otlp::SpanExporter::builder() + match opentelemetry_otlp::SpanExporter::builder() .with_tonic() .build() - .expect("Failed to build OTLP span exporter"); - - let tracer_provider = opentelemetry_sdk::trace::TracerProvider::builder() - .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio) - .with_resource(opentelemetry_sdk::Resource::new(vec![ - opentelemetry::KeyValue::new("service.name", "diffscope"), - ])) - .build(); - - opentelemetry::global::set_tracer_provider(tracer_provider.clone()); - - let otel_layer = - tracing_opentelemetry::layer().with_tracer(tracer_provider.tracer("diffscope")); - - let subscriber = tracing_subscriber::fmt::Subscriber::builder() - .with_env_filter(filter) - .finish() - .with(otel_layer); - - tracing::subscriber::set_global_default(subscriber) - .expect("Failed to set tracing subscriber"); - - Some(tracer_provider) + { + Ok(exporter) => { + let tracer_provider = opentelemetry_sdk::trace::TracerProvider::builder() + .with_batch_exporter(exporter, opentelemetry_sdk::runtime::Tokio) + .with_resource(opentelemetry_sdk::Resource::new(vec![ + opentelemetry::KeyValue::new("service.name", "diffscope"), + ])) + .build(); + + opentelemetry::global::set_tracer_provider(tracer_provider.clone()); + + let otel_layer = tracing_opentelemetry::layer() + .with_tracer(tracer_provider.tracer("diffscope")); + + let subscriber = tracing_subscriber::fmt::Subscriber::builder() + .with_env_filter(filter) + .finish() + .with(otel_layer); + + if let Err(e) = tracing::subscriber::set_global_default(subscriber) { + eprintln!("Warning: failed to set OTEL tracing subscriber: {}", e); + // Already initialized by another thread or test — not fatal + } + + Some(tracer_provider) + } + Err(e) => { + eprintln!( + "Warning: OTEL_EXPORTER_OTLP_ENDPOINT set but exporter failed to initialize: {}. Continuing without OpenTelemetry.", + e + ); + tracing_subscriber::fmt().with_env_filter(filter).init(); + None + } + } } else { tracing_subscriber::fmt().with_env_filter(filter).init(); None diff --git a/src/plugins/builtin/duplicate_filter.rs b/src/plugins/builtin/duplicate_filter.rs index e4a6550..b7aca1c 100644 --- a/src/plugins/builtin/duplicate_filter.rs +++ b/src/plugins/builtin/duplicate_filter.rs @@ -33,3 +33,102 @@ impl PostProcessor for DuplicateFilter { Ok(comments) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::comment::{Category, FixEffort, Severity}; + use std::path::PathBuf; + + fn make_comment(file: &str, line: usize, content: &str) -> Comment { + Comment { + id: format!("c-{}-{}", file, line), + file_path: PathBuf::from(file), + line_number: line, + content: content.to_string(), + rule_id: None, + severity: Severity::Warning, + category: Category::Style, + suggestion: None, + confidence: 0.8, + code_suggestion: None, + tags: vec![], + fix_effort: FixEffort::Low, + feedback: None, + } + } + + #[test] + fn test_duplicate_filter_id() { + let filter = DuplicateFilter::new(); + assert_eq!(filter.id(), "duplicate_filter"); + } + + #[tokio::test] + async fn test_removes_exact_duplicates() { + let filter = DuplicateFilter::new(); + let comments = vec![ + make_comment("a.rs", 10, "fix this"), + make_comment("a.rs", 10, "fix this"), + make_comment("a.rs", 10, "fix this"), + ]; + let result = filter.run(comments, "/repo").await.unwrap(); + assert_eq!(result.len(), 1); + } + + #[tokio::test] + async fn test_keeps_different_lines() { + let filter = DuplicateFilter::new(); + let comments = vec![ + make_comment("a.rs", 10, "fix this"), + make_comment("a.rs", 20, "fix this"), + ]; + let result = filter.run(comments, "/repo").await.unwrap(); + assert_eq!(result.len(), 2); + } + + #[tokio::test] + async fn test_keeps_different_content() { + let filter = DuplicateFilter::new(); + let comments = vec![ + make_comment("a.rs", 10, "fix this"), + make_comment("a.rs", 10, "fix that"), + ]; + let result = filter.run(comments, "/repo").await.unwrap(); + assert_eq!(result.len(), 2); + } + + #[tokio::test] + async fn test_keeps_different_files() { + let filter = DuplicateFilter::new(); + let comments = vec![ + make_comment("a.rs", 10, "fix this"), + make_comment("b.rs", 10, "fix this"), + ]; + let result = filter.run(comments, "/repo").await.unwrap(); + assert_eq!(result.len(), 2); + } + + #[tokio::test] + async fn test_empty_input() { + let filter = DuplicateFilter::new(); + let result = filter.run(vec![], "/repo").await.unwrap(); + assert!(result.is_empty()); + } + + #[tokio::test] + async fn test_preserves_order() { + let filter = DuplicateFilter::new(); + let comments = vec![ + make_comment("c.rs", 1, "third"), + make_comment("a.rs", 1, "first"), + make_comment("b.rs", 1, "second"), + make_comment("a.rs", 1, "first"), // duplicate + ]; + let result = filter.run(comments, "/repo").await.unwrap(); + assert_eq!(result.len(), 3); + assert_eq!(result[0].content, "third"); + assert_eq!(result[1].content, "first"); + assert_eq!(result[2].content, "second"); + } +} diff --git a/src/plugins/builtin/eslint.rs b/src/plugins/builtin/eslint.rs index b376884..1713075 100644 --- a/src/plugins/builtin/eslint.rs +++ b/src/plugins/builtin/eslint.rs @@ -3,7 +3,6 @@ use crate::plugins::PreAnalyzer; use anyhow::Result; use async_trait::async_trait; use std::path::PathBuf; -use std::process::Command; pub struct EslintAnalyzer; @@ -13,6 +12,8 @@ impl EslintAnalyzer { } } +const JS_EXTENSIONS: &[&str] = &[".js", ".ts", ".jsx", ".tsx"]; + #[async_trait] impl PreAnalyzer for EslintAnalyzer { fn id(&self) -> &str { @@ -20,28 +21,35 @@ impl PreAnalyzer for EslintAnalyzer { } async fn run(&self, diff: &UnifiedDiff, repo_path: &str) -> Result> { - if !diff.file_path.to_string_lossy().ends_with(".js") - && !diff.file_path.to_string_lossy().ends_with(".ts") - && !diff.file_path.to_string_lossy().ends_with(".jsx") - && !diff.file_path.to_string_lossy().ends_with(".tsx") - { + let path_str = diff.file_path.to_string_lossy(); + if !JS_EXTENSIONS.iter().any(|ext| path_str.ends_with(ext)) { return Ok(Vec::new()); } let file_path = PathBuf::from(repo_path).join(&diff.file_path); + let file_arg = file_path.to_string_lossy().to_string(); + let diff_file_path = diff.file_path.clone(); - let output = Command::new("eslint") - .arg("--format=json") - .arg("--no-eslintrc") - .arg(file_path.to_string_lossy().as_ref()) - .output(); + let timeout = std::time::Duration::from_secs(30); + let result = tokio::time::timeout( + timeout, + tokio::task::spawn_blocking(move || { + use std::process::Command; + Command::new("eslint") + .arg("--format=json") + .arg("--no-eslintrc") + .arg(&file_arg) + .output() + }), + ) + .await; - match output { - Ok(output) => { + match result { + Ok(Ok(Ok(output))) => { let stdout = String::from_utf8_lossy(&output.stdout); if !stdout.trim().is_empty() { Ok(vec![LLMContextChunk { - file_path: diff.file_path.clone(), + file_path: diff_file_path, content: format!("ESLint analysis:\n{}", stdout), context_type: ContextType::Documentation, line_range: None, @@ -50,7 +58,68 @@ impl PreAnalyzer for EslintAnalyzer { Ok(Vec::new()) } } - Err(_) => Ok(Vec::new()), + _ => Ok(Vec::new()), } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn make_diff(file_path: &str) -> UnifiedDiff { + UnifiedDiff { + old_content: None, + new_content: None, + file_path: PathBuf::from(file_path), + is_new: false, + is_deleted: false, + is_binary: false, + hunks: vec![], + } + } + + #[test] + fn test_eslint_analyzer_id() { + let analyzer = EslintAnalyzer::new(); + assert_eq!(analyzer.id(), "eslint"); + } + + #[tokio::test] + async fn test_eslint_skips_non_js_files() { + let analyzer = EslintAnalyzer::new(); + for ext in &[".rs", ".py", ".go", ".java", ".rb", ".css", ".html"] { + let diff = make_diff(&format!("file{}", ext)); + let result = analyzer.run(&diff, "/tmp").await.unwrap(); + assert!(result.is_empty(), "Should skip {}", ext); + } + } + + #[test] + fn test_js_extensions_filter() { + // Verify JS_EXTENSIONS contains all expected extensions + for ext in &[".js", ".ts", ".jsx", ".tsx"] { + assert!( + JS_EXTENSIONS.contains(ext), + "JS_EXTENSIONS should contain {}", + ext + ); + } + // And rejects non-JS extensions + for ext in &[".rs", ".py", ".go"] { + assert!( + !JS_EXTENSIONS.contains(ext), + "JS_EXTENSIONS should not contain {}", + ext + ); + } + } + + #[tokio::test] + async fn test_eslint_handles_missing_binary() { + let analyzer = EslintAnalyzer::new(); + let diff = make_diff("test.js"); + let result = analyzer.run(&diff, "/nonexistent/repo").await; + assert!(result.is_ok()); + } +} diff --git a/src/plugins/builtin/semgrep.rs b/src/plugins/builtin/semgrep.rs index bb605e9..7775c9e 100644 --- a/src/plugins/builtin/semgrep.rs +++ b/src/plugins/builtin/semgrep.rs @@ -3,7 +3,9 @@ use crate::plugins::PreAnalyzer; use anyhow::Result; use async_trait::async_trait; use std::path::PathBuf; -use std::process::Command; + +/// Timeout for semgrep execution in seconds. +const SEMGREP_TIMEOUT_SECS: u64 = 30; pub struct SemgrepAnalyzer; @@ -21,20 +23,32 @@ impl PreAnalyzer for SemgrepAnalyzer { async fn run(&self, diff: &UnifiedDiff, repo_path: &str) -> Result> { let file_path = PathBuf::from(repo_path).join(&diff.file_path); + let file_arg = file_path.to_string_lossy().to_string(); + let diff_file_path = diff.file_path.clone(); - let output = Command::new("semgrep") - .arg("--config=auto") - .arg("--json") - .arg("--quiet") - .arg(file_path.to_string_lossy().as_ref()) - .output(); + let timeout = std::time::Duration::from_secs(SEMGREP_TIMEOUT_SECS); + let result = tokio::time::timeout( + timeout, + tokio::task::spawn_blocking(move || { + use std::process::Command; + Command::new("semgrep") + .arg("--config=auto") + .arg("--json") + .arg("--quiet") + .arg("--timeout") + .arg(SEMGREP_TIMEOUT_SECS.to_string()) + .arg(&file_arg) + .output() + }), + ) + .await; - match output { - Ok(output) => { + match result { + Ok(Ok(Ok(output))) => { let stdout = String::from_utf8_lossy(&output.stdout); if !stdout.trim().is_empty() { Ok(vec![LLMContextChunk { - file_path: diff.file_path.clone(), + file_path: diff_file_path, content: format!("Semgrep analysis:\n{}", stdout), context_type: ContextType::Documentation, line_range: None, @@ -43,7 +57,50 @@ impl PreAnalyzer for SemgrepAnalyzer { Ok(Vec::new()) } } - Err(_) => Ok(Vec::new()), + _ => Ok(Vec::new()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_diff(file_path: &str) -> UnifiedDiff { + UnifiedDiff { + old_content: None, + new_content: None, + file_path: PathBuf::from(file_path), + is_new: false, + is_deleted: false, + is_binary: false, + hunks: vec![], } } + + #[test] + fn test_semgrep_analyzer_id() { + let analyzer = SemgrepAnalyzer::new(); + assert_eq!(analyzer.id(), "semgrep"); + } + + #[tokio::test] + async fn test_semgrep_handles_missing_binary() { + // When semgrep is not installed, should return empty vec (not error) + let analyzer = SemgrepAnalyzer::new(); + let diff = make_diff("nonexistent_file.py"); + let result = analyzer.run(&diff, "/nonexistent/repo").await; + assert!(result.is_ok()); + // Either empty (semgrep not found) or has output (semgrep found but file missing) + } + + #[tokio::test] + async fn test_semgrep_timeout_returns_empty() { + // When semgrep times out, should return empty vec (not error) + let analyzer = SemgrepAnalyzer::new(); + let diff = make_diff("test.py"); + // This will either timeout or fail to find semgrep — both should return Ok + let result = analyzer.run(&diff, "/nonexistent/repo").await; + assert!(result.is_ok()); + } } diff --git a/src/review/context_helpers.rs b/src/review/context_helpers.rs index 5b65e92..64a466a 100644 --- a/src/review/context_helpers.rs +++ b/src/review/context_helpers.rs @@ -56,13 +56,30 @@ pub fn resolve_pattern_repositories( resolved } +/// Check whether a source string looks like a git URL and uses an allowed scheme. +/// Accepts `https://`, `ssh://`, `git@` (SSH shorthand), and `http://` (with `.git` suffix). +fn is_safe_git_url(source: &str) -> bool { + source.starts_with("https://") + || source.starts_with("ssh://") + || source.starts_with("git@") + || (source.starts_with("http://") && source.ends_with(".git")) +} + fn is_git_source(source: &str) -> bool { - source.contains("://") || source.starts_with("git@") || source.ends_with(".git") + is_safe_git_url(source) } fn prepare_pattern_repository_checkout(source: &str) -> Option { use std::process::Command; + if !is_safe_git_url(source) { + warn!( + "Rejecting pattern repository '{}': only https://, ssh://, and git@ URLs are allowed", + source + ); + return None; + } + let home_dir = dirs::home_dir()?; let cache_root = home_dir.join(".diffscope").join("pattern_repositories"); if std::fs::create_dir_all(&cache_root).is_err() { @@ -476,4 +493,71 @@ mod tests { assert_eq!(result.len(), 1); assert!(result[0].content.starts_with("Active review rules.")); } + + // === URL validation tests === + + #[test] + fn test_is_git_source_https() { + assert!(is_git_source("https://github.com/org/repo.git")); + assert!(is_git_source("https://github.com/org/repo")); + } + + #[test] + fn test_is_git_source_ssh() { + assert!(is_git_source("git@github.com:org/repo.git")); + } + + #[test] + fn test_is_git_source_http_with_git_suffix() { + assert!(is_git_source("http://example.com/repo.git")); + } + + #[test] + fn test_is_git_source_rejects_local_paths() { + assert!(!is_git_source("/tmp/evil")); + assert!(!is_git_source("../relative/path")); + assert!(!is_git_source("file:///etc/passwd")); + } + + #[test] + fn test_is_git_source_rejects_other_schemes() { + assert!(!is_git_source("ftp://example.com/repo.git")); + } + + #[test] + fn test_is_git_source_accepts_ssh() { + assert!(is_git_source("ssh://example.com/repo")); + } + + #[test] + fn test_is_safe_git_url_allows_https() { + assert!(is_safe_git_url("https://github.com/org/repo")); + assert!(is_safe_git_url("https://gitlab.com/org/repo.git")); + } + + #[test] + fn test_is_safe_git_url_allows_ssh() { + assert!(is_safe_git_url("git@github.com:org/repo.git")); + assert!(is_safe_git_url("ssh://example.com/repo")); + assert!(is_safe_git_url("ssh://git@gitlab.internal/org/rules.git")); + } + + #[test] + fn test_is_safe_git_url_rejects_file_urls() { + assert!(!is_safe_git_url("file:///etc/passwd")); + assert!(!is_safe_git_url("/tmp/evil")); + assert!(!is_safe_git_url("../traversal")); + } + + #[test] + fn test_is_safe_git_url_rejects_arbitrary_schemes() { + assert!(!is_safe_git_url("ftp://example.com/repo")); + assert!(!is_safe_git_url("gopher://example.com/repo")); + } + + #[test] + fn test_is_safe_git_url_rejects_http_without_git_suffix() { + // Plain http without .git suffix is not safe enough + assert!(!is_safe_git_url("http://example.com/repo")); + } } diff --git a/src/review/triage.rs b/src/review/triage.rs index c6004c3..9780a8c 100644 --- a/src/review/triage.rs +++ b/src/review/triage.rs @@ -45,8 +45,8 @@ const COMMENT_PREFIXES: &[&str] = &["//", "# ", "/*", "*/", "* ", "--", "